# Linux
## Install ollama locally

In [None]:
!curl https://ollama.ai/install.sh | sh

### Downloand prebuild gguf model

In [None]:
!ollama pull llama2
!ollama run llama2

### Alternately, provision your own preexisting model

In [None]:
!ln -s /mnt/data/ws/llama.cpp/models/ggml-llama-7b-q4_0.bin /usr/share/ollama/.ollama/models/

In [None]:
!echo "FROM /mnt/data/ws/llama.cpp/models/ggml-llama-7b-q4_0.bin"  > llama.Modelfile

In [None]:
!ollama create codellama -f llama.Modelfile

In [None]:
!ollama run chatllama

### Run Llama2 via langchain

In [5]:
# a llm
from langchain_community.llms import Ollama
llm = Ollama(model="chatllama")

In [6]:
# a chat model
from langchain_community.chat_models import ChatOllama
chat_model = ChatOllama()

In [7]:
# difference between a llm and a chatmodel
from langchain.schema import HumanMessage

text = "What would be a good company name for a company that makes colorful socks?"
messages = [HumanMessage(content=text)]

llm.invoke(text)
# >> Feetful of Fun

chat_model.invoke(messages)
# >> AIMessage(content="Socks O'Color")

OllamaEndpointNotFoundError: Ollama call failed with status code 404. Maybe your model is not found and you should pull the model with `ollama pull llama2`.

In [None]:
#invoke with a prompt
from langchain_core.prompts import ChatPromptTemplate
prompt = ChatPromptTemplate.from_messages([
    ("system", "You are world class programmer, specializing in writing maintainable java code."),
    ("user", "{input}")
])
chain = prompt | llm 

In [None]:
# steam the answer
query = {"input": "Is drinking tea late night good for health?"}
for chunk in chain.stream(query):
    print(chunk, end="", flush=True) 

### Answering based on localized context

Vectorize the context document.
- use a loader
- and a splitter

In [None]:
from langchain_community.document_loaders import WebBaseLoader
loader = WebBaseLoader("https://docs.smith.langchain.com/overview")
docs = loader.load()

In [None]:
from langchain_community.embeddings import OllamaEmbeddings
embeddings = OllamaEmbeddings(model="chatllama")

In [None]:
# (optional) if docs are not yet vectorized - takes 8 mins
from langchain_community.vectorstores import FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter()
documents = text_splitter.split_documents(docs)
print("split_documents done")
vector = FAISS.from_documents(documents, embeddings)
print("vector done")

vdb_path="/mnt/data/ws/langchain-assist/smith.vdb"
vector.save_local(vdb_path)

In [None]:
from langchain_community.vectorstores import FAISS
vdb_path="/mnt/data/ws/langchain-assist/smith.vdb"
vdb = FAISS.load_local(vdb_path, embeddings)
print("vdb:" , vdb)

Create a prompt template and mixin with the chain

In [None]:
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains.combine_documents import create_stuff_documents_chain

prompt_custom = ChatPromptTemplate.from_template("""Answer the following question based only on the provided context:

<context>
{context}
</context>

Question: {input}""")

document_chain = create_stuff_documents_chain(llm, prompt_custom)

In [None]:
from langchain_core.documents import Document

query = {
    "input": "how can langsmith help with testing?",
    "context": [Document(page_content="langsmith can let you visualize test results")]
}

for chunk in document_chain.stream(query):
    print(chunk, end="", flush=True) # steam the answer

Documents come from a retreiver
https://python.langchain.com/docs/modules/data_connection

In [None]:
from langchain.chains import create_retrieval_chain

retriever = vdb.as_retriever()
retrieval_chain = create_retrieval_chain(retriever, document_chain)

In [None]:
query={"input": "how can langsmith help with testing?"}

# for chunk in retrieval_chain.stream(query):
#     print(chunk, end="", flush=True) # steam the answer

response = retrieval_chain.invoke(query)
print(response["answer"])