In [7]:
from dotenv import load_dotenv
import os
import pprint
load_dotenv("../.env")

from rich import print

# Character Text Splitting Algorithm

In [8]:
from langchain_core.documents import Document
text = """
The astronauts on the International Space Station conducted a spacewalk to repair a malfunctioning solar panel. 
The aroma of freshly baked croissants wafted through the charming French bakery. 
The new policy aimed to reduce carbon emissions by 50% within the next decade. 
The ancient Egyptian pharaohs were known for their elaborate headdresses and ornate jewelry."""

# Manual Splitting
chunks = []
chunk_size = 35 # Characters
for i in range(0, len(text), chunk_size):
    chunk = text[i:i + chunk_size]
    chunks.append(chunk)
documents = [Document(page_content=chunk, metadata={"source": "local"}) for chunk in chunks]
print(documents)

# Character Text Splitting Library

In [9]:
from langchain.text_splitter import CharacterTextSplitter
text_splitter = CharacterTextSplitter(chunk_size = 35, chunk_overlap=0, separator='', strip_whitespace=False)
documents = text_splitter.create_documents([text])
print(documents)

# Recursive Text Splitting

In [10]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(chunk_size = 35, chunk_overlap=0) 
print(text_splitter.create_documents([text]))


# Semantic Chunking

In [43]:
from langchain_experimental.text_splitter import SemanticChunker
from langchain_openai.embeddings import OpenAIEmbeddings
from langchain_community.embeddings.ollama import OllamaEmbeddings

# Percentile - all differences between sentences are calculated, and then any difference greater than the 
# X percentile is split

embed_function = OllamaEmbeddings(model="nomic-embed-text")
#embed_function = OpenAIEmbeddings()
text_splitter = SemanticChunker(
    embeddings=embed_function, 
    breakpoint_threshold_type="percentile", # "percentile" "standard_deviation", "interquartile",
    breakpoint_threshold_amount=60
)
documents = text_splitter.create_documents([text])
print(documents)


# AI Split

In [24]:
from langchain_community.chat_models.ollama import ChatOllama
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate


prompt_template="""Based on the following context anwer the question:
=============================
Context: {context}
==============================
Question: {question}

Extra instructions: 
1. DO NOT ALTER ANY SENTENCE IN THE CONTEXT. 
2. CONSIDER ALL THE SENTENCES IN THE CONTEXT. 
3. DO NOT INVENT ANY NEW CONTEXT
4. DO NOT SUMMARIZE THE CONTEXT WHEN PROVIDING A REPLY
"""
prompt:ChatPromptTemplate = ChatPromptTemplate.from_template(prompt_template)

#Use your local LLM 
local_llm = ChatOllama(
    verbose=True,
    model="phi3", 
    base_url="http://127.0.0.1:11434", 
    temperature=0)
#or use a paid one much faster
#local_llm = ChatOpenAI(temperature=0)

chain = (
        {"context": RunnablePassthrough(), "question": RunnablePassthrough()}
        | prompt
        | local_llm
        | StrOutputParser()
    )

result = chain.invoke({
    "question":"""
        Split the page_content based on semantic differences if you find necessary
        return your reply in the same schema of the context received""",
    "context": documents
    })
print(result)

#Expected execution time: 51s

# Function Calling

Function calling is a terrible name! It should be called formatted output or something that convey a better meaning of what it does, which is exactly this: given a formatted prompt instruction to a LLM, it returns the response in a specific json format based in a schema.


In [44]:
from langchain_community.chat_models.ollama import ChatOllama
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate

schema = {
  "type": "OlamaFunction",
  "name": "myFunction",
  "output": {
    "type": "object",
    "contexts": {
      "context": {"type": "string"}
    }
  }
}

prompt_template="""Based on the following context, anwer the question in the provided schema:
=============================
Context: {context}
==============================
Question: {question}
==============================
Schema: {schema}
"""
prompt = ChatPromptTemplate.from_template(prompt_template)

#Use your local LLM 
local_llm = ChatOllama(
    verbose=False,
    model="phi3", 
    base_url="http://127.0.0.1:11434", 
    temperature=0,
    format="json")

chain = (
        {"context": RunnablePassthrough(), "question": RunnablePassthrough(), "schema": RunnablePassthrough()}
        | prompt
        | local_llm
        | StrOutputParser()
    )


In [45]:
#Rag input
documents = [document.page_content for document in documents]
print(documents)

In [46]:
result = chain.invoke({
    "question":"Split the page_content based on semantic differences if you find it necessary",
    "context": documents,
    "schema": schema
    })

print(result.rstrip().lstrip())
#Expected execution time: 1m30s

# Testing a Simple RAG

In [67]:
from langchain_community.chat_models.ollama import ChatOllama
from langchain_community.embeddings.ollama import OllamaEmbeddings
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_community.vectorstores.faiss import FAISS


def rag(question:str, chunks:list):
    local_llm = ChatOllama(model="phi3", base_url="http://127.0.0.1:11434", temperature=0)
    # FAISS is an in-memory vector store
    vectorstore = FAISS.from_documents(
        documents=chunks,
        embedding=OllamaEmbeddings(model="nomic-embed-text")
    )
    retriever = vectorstore.as_retriever()
    prompt_template="""Answer the question based only on the following context:
    ==============================
    {context}
    ==============================
    Question: {question}
    """
    prompt = ChatPromptTemplate.from_template(prompt_template)

    #You need a RunnablePassThrough if you are going to pass the parameter to the chain
    chain = (
        {"context": retriever, "question": RunnablePassthrough()}
        | prompt
        | local_llm
        | StrOutputParser()
    )
    result = chain.invoke({"question": question})
    print(result)

In [56]:
import json
output = json.loads(result)
print(output)


In [68]:
chunks = [Document(page_content=text['text']) for text in output["context"]]
rag("What the astrounauts did ?", chunks)
#Expected execution time: 15s

In [17]:
# Now without the RAG to compare the time differences

from langchain_community.llms import (
    Ollama,
)  # Did you notice we´re using Ollama from another library ? :)
from langchain_core.runnables import (
    RunnablePassthrough,
)  # This just says: The parameter will be provided later
from langchain_core.output_parsers import (
    StrOutputParser,
)  # gets rid of chunk['message']['content'] (lesson 4 stream)
from langchain_core.prompts import (
    ChatPromptTemplate,
    HumanMessagePromptTemplate,
    SystemMessagePromptTemplate,
    AIMessagePromptTemplate,
)

# model phi3 is a small model from Microsoft... download it with ollama pull phi3 you haven't done it already
llm = Ollama(model="phi3", temperature=0, base_url="http://127.0.0.1:11434")
prompt = ChatPromptTemplate.from_messages(
    [
        SystemMessagePromptTemplate.from_template(
            "You are a helpful assistant that give an aswer only based on the provided {context}."
        ),
        HumanMessagePromptTemplate.from_template("Question: {question}"),
    ]
)

chain = (
    {"context": RunnablePassthrough(), "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

text = """
The astronauts on the International Space Station conducted a spacewalk to repair a malfunctioning solar panel. 
The aroma of freshly baked croissants wafted through the charming French bakery. 
The new policy aimed to reduce carbon emissions by 50% within the next decade. 
The ancient Egyptian pharaohs were known for their elaborate headdresses and ornate jewelry.
"""

result = chain.invoke({"question": "What the astronauts did ?", "context": text})
print(result)
# Expected execution time: 24s