#-----------
# Install Dependencies
#-----------
!pip install langchain langchain-community faiss-cpu sentence-transformers transformers torch python-dotenv tqdm requests


In [1]:
import os

os.makedirs("data", exist_ok=True)

sample_text = """
LangChain + Olllama RAG Test Document

This document discusses embeddings , retrieval,  LangChain text splitters, and running local LLMs such as phi3, mistral, and llama3.1 via Olama.
"""

with open("data/sample.txt", "w", encoding="utf-8") as f:
    if f.write(sample_text):
        print("sample.txt created!")
    else: print("write failed!")


sample.txt created!


In [2]:
# Simple way to load a file
from langchain_community.document_loaders import TextLoader, DirectoryLoader


loader = DirectoryLoader("data/", glob="*.txt", loader_cls=TextLoader)
docs = loader.load()
print("Documents:", docs)

Documents: [Document(metadata={'source': 'data\\sample.txt'}, page_content='\nLangChain + Olllama RAG Test Document\n\nThis document discusses embeddings , retrieval,  LangChain text splitters, and running local LLMs such as phi3, mistral, and llama3.1 via Olama.\n')]


In [3]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
chunks = splitter.split_documents(docs)
print("chunks:", len(chunks))

chunks: 1


In [4]:
# from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS

embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

faiss_index = FAISS.from_documents(chunks, embeddings)
faiss_index.save_local("faiss_index")

#retriever = faiss_index.as_retriever()
retriever = faiss_index.as_retriever(search_kwargs={"k": 3})


print("FAISS index saved.")

FAISS index saved.


In [5]:
#from langchain_community.llms import Ollama
from langchain_ollama import OllamaLLM
from langchain_openai import ChatOpenAI

llm = OllamaLLM(model="phi3")               # you can use mistral, llama3.1
#llm = Ollama(modle="mistral")
#llm = Ollama(model="llama3.1")
# llm = ChatOpenAI(model="gpt-4.1-mini")

#-----------
# Create the RAG Chain (LCEL)
#-----------

In [6]:
from langchain_core.prompts import PromptTemplate
#from langchain_community.llms import Ollama
from langchain_core.runnables import RunnableMap, RunnableSequence, RunnableLambda
#from langchain_core.outputs import StringOutputParser   - depricated
from langchain_core.output_parsers import StrOutputParser

In [7]:
prompt = PromptTemplate.from_template(
"Use the following context to answer the question.\n\nContext:\n{context}\n\nQuestion:\n{question}"
)

In [8]:
def rag_chain(retriever):
    return(
        {
            "context": lambda x: "\n\n".join(
                doc.page_content for doc in retriever.invoke(x["question"])
            ),
            "question": lambda x: x["question"],
        }
        | prompt
        | llm
        | StrOutputParser()
    )

#-----------
# Build the chain
#-----------

In [9]:
chain = rag_chain(retriever)
#chain

In [10]:
response = chain.invoke({"question": "What does this document say about LangChain?"})
print(response)

The document highlights that the embeddings are based on Olllama RAG (Retrieval-Augmented Generation) technology for efficient text processing with large language models, using tools like Olama. It also mentions various aspects of LangChain such as its role in splitting texts and facilitating local runs of LLMs including phi3, mistral, and llama3.1 through Olala interface on a user's device or server without the need for internet connectivity during inference time. The aim is to improve speed by offloading computation from cloud infrastructure where possible. Moreover, it talks about using tools like LangChain text splitters in conjunction with Olllama and how they can help optimize large language model computations locally on smaller hardware setups or even mobile devices when internet access may not be available during inference time. This document seems to emphasize the potential of these technologies for efficient local processing, especially where connectivity is a concern while r

In [11]:
response = chain.invoke({"question": "Which models can I run locally with Ollama?"})
print(response)

According to the original text "This document discusses embeddings , retrieval,  LangChain text splitters, and running local LLMs such as phi3, mistral, and llama3.1 via Olama." You can run the Phi-Phi (phi3), Mistral, and Llama models locally using Ollama platform. These include three versions of large language models like LLama 3.1 specifically designed to work on local machines with low hardware requirements compared to cloud-based services.


In [12]:
import langchain_core.outputs
print(dir(langchain_core.outputs))

['ChatGeneration', 'ChatGenerationChunk', 'ChatResult', 'Generation', 'GenerationChunk', 'LLMResult', 'RunInfo']
