## Setup env variables

In [None]:
%pip install python-dotenv

In [None]:
import os
from dotenv import load_dotenv

load_dotenv("./.env.local")

print(os.getenv('AWS_ACCESS_KEY_ID'))

os.environ['LANGCHAIN_TRACING_V2'] = 'true'


## Clean chromadb 

In [None]:
from langchain_community.vectorstores import Chroma

vectorstore = Chroma(persist_directory="./chroma_db")
vectorstore.delete_collection()
vectorstore.persist()

In [None]:
from chromadb import PersistentClient

persist_directory = "./chroma_db"
client = PersistentClient(path=persist_directory)
collection = client.get_or_create_collection(name="documents")
client.delete_collection(name="documents")
print(f"Collection 'documents' supprimée.")

Collection 'documents' supprimée.


In [None]:
import subprocess

def pull_model():
    print(f"--- Pulling {model} model ---")
    subprocess.run(["ollama", "pull", model], check=True)
    print(f"Model {model} pulled successfully.")

def run_model():
    print(f"--- Running {model} model ---")
    subprocess.run(["ollama", "run", model], check=True)
    print(f"Model {model} is now running.")

pull_model()

In [None]:
import os
import ollama
import chromadb
from sentence_transformers import SentenceTransformer

#### INDEXING ####

# Load Documents
print("--- Step 1 : Upload document ---")
with open('./assets/ressources/base.txt', 'r', encoding='utf-8') as fichier:
    doc_content = fichier.read()

# Split - Chunking
print("--- Step 2 : Chunking document ---")
def chunk_text(text, chunk_size=1000, chunk_overlap=200):
    chunks = []
    start = 0
    while start < len(text):
        end = min(start + chunk_size, len(text))
        chunks.append(text[start:end])
        start += chunk_size - chunk_overlap 
    return chunks

splits = chunk_text(doc_content)

print(f"✅ Document découpé en {len(splits)} chunks.")

# Embeddings Model
print("--- Step 3 : Upload embeddings model and Creating embeddings ---")
embedding_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
embeddings = embedding_model.encode(splits).tolist()

print(f"✅ {len(embeddings)} embeddings générés.")


# Create ChromaDB database
print("--- Step 4 : Create ChromaDB database ---")
client = chromadb.PersistentClient(path="./chroma_db")
collection = client.get_or_create_collection(name="documents")

# 📌 Ajouter les embeddings à la base de données
print("--- Step 4 : Storing embeddings in ChromaDB ---")
collection.add(
    ids=[str(i) for i in range(len(splits))],
    documents=splits,
    embeddings=embeddings
)
print("✅ Documents enregistrés dans ChromaDB.")


def retrieve_documents(query, top_k=3):
    query_embedding = embedding_model.encode([query]).tolist()[0]
    
    results = collection.query(
        query_embeddings=[query_embedding],
        n_results=top_k
    )

    retrieved_docs = results["documents"][0]
    return retrieved_docs

def generate_response(context, question):
    full_prompt = f"Context:\n{context}\n\nQuestion: {question}\nAnswer:"
    
    response = ollama.chat(
        model="mistral:7b",
        messages=[{"role": "user", "content": full_prompt}],
        stream=False
    )
    
    return response["message"]["content"]


def rag_pipeline(question):
    print("\n--- Récupération des documents ---")
    retrieved_docs = retrieve_documents(question)
    context = "\n\n".join(retrieved_docs)

    print(f"Documents récupérés : {retrieved_docs}\n")

    print("\n--- Génération de réponse ---")
    response = generate_response(context, question)

    return response


question = "Can you list the team members of M-Motors?"
response = rag_pipeline(question)

print("\n--- Réponse finale ---")
print(response)


In [None]:
from ollama import chat
from langchain.llms import Ollama
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
from langchain_core.documents import Document
from langchain_community.vectorstores import Chroma
from langchain_core.output_parsers import StrOutputParser
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter

#### INDEXING ####

# Load Documents
print("--- Step 1 : Upload document ---")
with open('./assets/ressources/base.txt', 'r', encoding='utf-8') as fichier:
    doc_content = fichier.read()

# Convert doc in langchaindoc format
print("--- Step 2 : Convert document in Langchain document format ---")
docs = [Document(page_content=doc_content)]

# Split - Chunking
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)

# Embeddings Model
print("--- Step 3 : Upload embeddings model ---")
model_name = "sentence-transformers/all-MiniLM-L6-v2" 
model_kwargs = {"device": "cpu"}
encode_kwargs = {"normalize_embeddings": True}
embeddings = HuggingFaceEmbeddings(
    model_name=model_name, model_kwargs=model_kwargs, encode_kwargs=encode_kwargs
)
print(f"Upload finish : {model_name}")

# Create ChromaDB database
print("--- Step 4 : Create ChromaDB database ---")
vectorstore = Chroma.from_documents(splits, embedding=embeddings, persist_directory="./chroma_db")

# Retriever
print("--- Step 5 : Create retriever ---")
retriever = vectorstore.as_retriever()

# Persiste data in ChromaDB
print("--- Step 6 : Persiste data in ChromaDB ---")
vectorstore.persist()

print("\n--- Verifications ---")
print(f"Document content :\n{doc_content}\n")
print(f"Object document create :\n{docs}\n")
print(f"Number of chunks create : {len(splits)}")
for i, chunk in enumerate(splits):
    print(f"\nChunk {i+1}:")
    print(chunk)
print(f"\n")
print(f"Object Embeddings : \n{embeddings}\n")
print(f"Object VectorStore : \n{vectorstore}\n")
print(f"Object Retriever : \n{retriever}\n")


# Define Prompt Template
print("--- Step 1: Defining prompt model ---")
prompt_template = PromptTemplate(
    template="Context:\n{context}\n\nQuestion: {question}\nAnswer:",
    input_variables=["context", "question"]
)
print(f"Prompt model : \n{prompt_template}\n")

# Load LLM Model
print("\n--- Step 2: Loading LLM model ---")
llm = Ollama(model=model)
print(f"{model} model successfully loaded.")

# Format docs
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

# RAG chaine : Retrieval + Generation
def rag_chain(question, retriever):

    # Retrieved 
    print("\n--- Executing RAG Pipeline ---")

    docs = retriever.invoke(question)
    context = format_docs(docs)

    print(f"Retrieved {len(docs)} documents.")
    print(f"Retrieved documents :")
    for i, doc in enumerate(docs):
        print(f"\Doc {i+1}:")
        print(doc)
    print(f"\n")

    print(f"Context : \n{context}\n")
    
    # Build prompt
    full_prompt = prompt_template.format(question=question, context=context)
    print(f"Prompt : \n{full_prompt}\n")

    # Execution
    response = chat(
        model="mistral:7b",
        messages=[{'role': 'user', 'content': full_prompt}],
        stream=False
    )

    return StrOutputParser().parse(response["message"]["content"])

question = "Can you list the team members of M-Motors?"
print("\n--- Testing RAG Query ---")
response = rag_chain(question, retriever)

print("\n--- Final Response ---")
print(response)