In [124]:
from langchain.prompts import PromptTemplate
from langchain.llms import OpenAI
from langchain_openai import ChatOpenAI
from dotenv import load_dotenv
import os
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.document_loaders import TextLoader
from langchain.schema import HumanMessage
load_dotenv()

True

In [125]:
llm = ChatOpenAI(api_key= os.getenv("OPENAI_API_KEY"), model="gpt-3.5-turbo")
model_path = "./sentence-camembert-large"

# Model and encoding configurations
model_kwargs = {'device': 'cpu'}
encode_kwargs = {'normalize_embeddings': False}

# Initialize the embedding model
embedding_model = HuggingFaceEmbeddings(
    model_name=model_path,
    model_kwargs=model_kwargs,
    encode_kwargs=encode_kwargs
)

In [126]:
# Function to split text into chunks
def split_paragraphs(rawText):
    text_splitter = CharacterTextSplitter(
        chunk_size=300,
        chunk_overlap=100
    )
    return text_splitter.split_text(rawText)

# Function to load PDFs and split text into chunks
def load_txt_file(txt_path):
    text_chunks = []    
    loader = TextLoader(txt_path)
    documents = loader.load()
    for doc in documents:
        chunks = split_paragraphs(doc.page_content)
        text_chunks.extend(chunks)
    return text_chunks

In [None]:
txt_path = "base_competence.txt"
base_chunks = load_txt_file(txt_path)

In [128]:
store = FAISS.from_texts(base_chunks, embedding_model)
store.save_local("faiss_index")

In [None]:
db = FAISS.load_local("faiss_index", embedding_model, allow_dangerous_deserialization=True)
query = "INRAE"
k = 4
docs = db.similarity_search_with_score(query, fetch_k=k)
sorted_docs = sorted(docs, key=lambda x: x[1], reverse=True)
for i in range(k):
    print(sorted_docs[i])

In [159]:
def custom_prompt(retriever, query):
    results = retriever.similarity_search(query, k=4)
    source_knowledge = "\n".join([x.page_content for x in results])
    augment_prompt = f"""Tu es un assistant et tu dois repondre aux questions qui te sont posées par un recruteur sur moi.
Tu dois être consis et convainquand dans tes reponses. 
Voici le contexte dont tu dois te servir : 
\nContexte :\n{source_knowledge}
\nQuestion : \n{query}"""
    return augment_prompt

In [None]:

query = "cites mois les langages que HTI utilise"
prompt = [
    HumanMessage(content=custom_prompt(db, query))]
res = llm.invoke(prompt)
print(res.content)