In [None]:
import os
from dotenv import load_dotenv

load_dotenv()

OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
# MODEL = "gpt-3.5-turbo"
# MODEL = "gpt-4o-mini"
# MODEL = "text-embedding-3-small"

# MODEL = "mixtral:8x7b"
MODEL = "llama3.2"

In [21]:
from langchain_community.embeddings import OllamaEmbeddings
from langchain_community.llms import Ollama
from langchain_core.output_parsers import StrOutputParser
from langchain_openai.chat_models import ChatOpenAI
from langchain_openai.embeddings import OpenAIEmbeddings

if MODEL.startswith("gpt"):
    model = ChatOpenAI(openai_api_key=OPENAI_API_KEY, model=MODEL)
    embeddings = OpenAIEmbeddings()
else:
    model = Ollama(model=MODEL)
    embeddings = OllamaEmbeddings(model=MODEL)


parser = StrOutputParser()

chain = model | parser 
print(chain.invoke("Tell me a joke"))

Why don't scientists trust atoms? 

Because they make up everything!


# PDF

In [40]:
from langchain_community.document_loaders import PyPDFLoader

# loader = PyPDFLoader("../data/Eliott Legendre CV.pdf")
loader = PyPDFLoader("../data/CDI Eliott LEGENDRE.pdf")
pages = loader.load_and_split()
pages[0]

Ignoring wrong pointing object 6 0 (offset 0)
Ignoring wrong pointing object 8 0 (offset 0)
Ignoring wrong pointing object 10 0 (offset 0)
Ignoring wrong pointing object 12 0 (offset 0)
Ignoring wrong pointing object 14 0 (offset 0)
Ignoring wrong pointing object 16 0 (offset 0)
Ignoring wrong pointing object 18 0 (offset 0)
Ignoring wrong pointing object 20 0 (offset 0)
Ignoring wrong pointing object 28 0 (offset 0)
Ignoring wrong pointing object 30 0 (offset 0)
Ignoring wrong pointing object 38 0 (offset 0)
Ignoring wrong pointing object 46 0 (offset 0)


Document(metadata={'source': '../data/CDI Eliott LEGENDRE.pdf', 'page': 0}, page_content='1  \n CONTRAT DE TRAVAIL A DUREE INDETERMINEE        ENTRE-LES SOUSSIGNES   OPENVALUE, société par actions simplifiée au capital de 179 297,20 euros, dont le siège social est 58 Avenue Charles de Gaulle 92200 Neuilly Sur Seine, immatriculée au Registre du Commerce et des sociétés de Nanterre sous le numéro 804 998 045  Représentée par son Président, Monsieur Guillaume LEBOUCHER  Ci-après «la Société"  D’UNE PART      ET  Monsieur Eliott LEGENDRE,     Ci-après "le Salarié"     D\'AUTRE PART,')

# Prompt

In [30]:
from langchain.prompts import PromptTemplate

template = """
Answer the question based on the context below. If you can't 
answer the question, reply "I don't know".

Context: {context}

Question: {question}
"""

prompt = PromptTemplate.from_template(template)
print(prompt.format(context="Here is some context", question="Here is a question"))


Answer the question based on the context below. If you can't 
answer the question, reply "I don't know".

Context: Here is some context

Question: Here is a question



In [34]:
chain = prompt | model | parser

chain.invoke({"context": "My parents named me Santiago", "question": "What's your name'?"})

"I don't know."

# RAG

In [None]:
from langchain_community.vectorstores import DocArrayInMemorySearch

vectorstore = DocArrayInMemorySearch.from_documents(pages, embedding=embeddings)
retriever = vectorstore.as_retriever() 
#  select top k chunks here

In [42]:
from operator import itemgetter

chain = (
    {
        "context": itemgetter("question") | retriever,
        "question": itemgetter("question"),
    }
    | prompt
    | model
    | parser
)

In [45]:
questions = [
    # "Which schools did this person attend and for how long?",
    "What type of document is this?",
    "What is the name of the person?",
    "What is the starting salary?",
    "Is there any evolution planned fo compensation?",
    "Is there any evolution planned fo salary?",
    "What else is important to know from this contract?",
]

for question in questions:
    print(f"Question: {question}")
    print(f"Answer: {chain.invoke({'question': question})}")
    print()

Question: What type of document is this?
Answer: This document is a contract of employment (contrat de travail) for an indefinite duration (durée indéterminée).

Question: What is the name of the person?
Answer: The name of the person is Eliott LEGENDRE.

Question: What is the starting salary?
Answer: The starting salary is a fixed base monthly gross remuneration of 5,000.00 euros.

Question: Is there any evolution planned fo compensation?
Answer: I don't know.

Question: Is there any evolution planned fo salary?
Answer: Yes, there is an evolution planned for the salary. After a period of 6 months, the Salarié will receive a fixed base salary of 5,166.66 euros per month, up from the initial salary of 5,000.00 euros.

Question: What else is important to know from this contract?
Answer: I don't know.



In [46]:
# batching and streaming options from langchain