In [12]:
from dotenv import load_dotenv

# Load API KEY information
load_dotenv(override=True)

from langchain_mistralai import ChatMistralAI, MistralAIEmbeddings

from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyMuPDFLoader
from langchain_community.vectorstores import FAISS
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_core.prompts import PromptTemplate


In [13]:
#Parameters and ChatMistral object creation


# Create the ChatMistralAI object
llm = ChatMistralAI(
    temperature=1,  # Low temperature for more focused responses
    model="mistral-small-latest", 
)

#If we want to understand pictures, we should use this model : "pixtral-12b-2409"

# Loading data

In [14]:
# Step 1: Load Documents
loader = PyMuPDFLoader("Data/Atlas.pdf")
docs = loader.load()
print(f"Number of pages in the document: {len(docs)}")



Number of pages in the document: 308


In [15]:
# Step 2: Split Documents
custom_separators = [
    "\n \n",        # paragraphs
    "\n",         # lines
    ". ",         # sentence-ish boundary
    "; ",         # clause boundary
    ", ",         # phrase boundary
    " ",          # words
    ""            # fallback: characters
]
text_splitter = RecursiveCharacterTextSplitter(separators = custom_separators, chunk_size=500, chunk_overlap=50)      #Paramètre à modifier par la suite pour de meilleur performance
split_documents = text_splitter.split_documents(docs)
print(f"Number of split chunks: {len(split_documents)}")

Number of split chunks: 696


In [16]:
# Step 3: Generate Embeddings
embeddings = MistralAIEmbeddings(model="mistral-embed")

  from .autonotebook import tqdm as notebook_tqdm


In [17]:
# Step 4: Create and Save the Database
# Create a vector store
vectorstore = FAISS.from_documents(documents=split_documents, embedding=embeddings)
print("Vector store created successfully!")

Vector store created successfully!


In [18]:
# Step 5: Create Retriever
# Search and retrieve information contained in the documents
retriever = vectorstore.as_retriever()

In [19]:
# Step 6: Create Prompt


prompt = PromptTemplate.from_template(
    """You are an assistant for question-answering tasks. 
Use the following pieces of retrieved context to answer the question. 
If you don't know the answer, just say that you don't know. 

#Context: 
{context}

#Question:
{question}

#Answer:"""
)

In [20]:
# Step 7: Setup LLM
llm = ChatMistralAI(model="mistral-small-latest", temperature=0)

In [21]:
# Step 8: Create Chain
chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [22]:
#Exemple

# Run Chain
# Input a query about the document and print the response
question = "Qui a gagné le ballon d'or en 2009"
response = chain.invoke(question)
print(response)
print("-----")
question = "Quel pays a légalisé l'avortement en premier dans le monde ?"
response = chain.invoke(question)
print(response)


Je ne sais pas.
-----
D'après le contexte fourni, l'Islande est le premier pays à avoir légalisé l'avortement, dès 1934.


# Prompt engineering

Here we setup a basic template for our prompt engineering.
In our case, the LLM will be a specialist in geography, in secondary school.

The student will interact with the LLM in two different ways :
    -He can ask any type of question about any topic in the course.
    -He can ask to have his knowledge tested (he will then receive a score on his answer and a feedback)

In [23]:
# Persona prompt, specific for when the student has a question about a specific part of the course

persona_template = (
    "Act as a supportive but rigorous geography teacher.\n" \
    "Your tone should be constructive, specific, and pedagogical.\n" \
        """Tu es un professeur de géographie avec 20 ans d'expérience, et ton but est de répondre aux questions d'un élève en difficulté.
            Tu es encourageant, mais tout de fois rigoureux quant à la précision de tes réponses.

    CONTRAINTES:
    1. Utilise UNIQUEMENT le contexte fourni qui vient d .
    2. Cite chaque fait avec la page sous forme [Page X].
    3. Ne fabrique rien.

    Format attendu:
    Réponse concise en français.
    CITES: Page: X,Y,... (liste unique de pages utilisées)

    Question: {question}

    Contexte:
    {context}
    """
)

scores = """
- Pertinence : Est-ce que l'étudiant répond bien à la question posé et non pas à autre chose  /30;
- Faits non correctes: Est-ce qu'il y'a des faits qui ne sont pas correctes dans la réponse  /30;
- Faits manquants : Est-ce que tous les faits attendus sont bien présent dans la réponse  /30;
- Stucture : Est-ce que la réponse est bien stucturée /10;
"""

test_template = (    f"Act as a supportive but rigorous history teacher.\n"
    "Your goal is to generate a question based on the course."             
    "The student gives you an answer and your goal is to evaluate it.\n"
    "Assignment requirement: {task_description}\n"
    "Grading rubric: {grading_rubric}\n"
    "Return ONLY a JSON object with these keys:\n"
    "- Section: the general theme of the question\n"
    "- Question: the question you asked the student\n"
    "- Answer: The answer the student gave\n"
    "- grade: number (0-100), must equal sum of all scores\n"
    "- scores: f{scores} \n"
    "- advice: array of short, actionable improvement suggestions\n"
    "Constraints: grade MUST equal Pertinence+Faits non correctes + Faits manquants + Structure. No extra text outside the JSON.\n\n"
    )

In [24]:
from langchain_mistralai import ChatMistralAI
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnableBranch, RunnableLambda

# -------------------------
# LLM SETUP
# -------------------------

# Standard text model
llm = ChatMistralAI(
    temperature=1,
    model="mistral-small-latest",
)

# If you ever need vision:
# llm = ChatMistralAI(
#     temperature=1,
#     model="pixtral-12b-2409"
# )

parser = StrOutputParser()

# -------------------------
# PROMPTS
# -------------------------

persona_template = PromptTemplate(
    input_variables=["question", "context"],
    template=(
        "Act as a supportive but rigorous geography teacher.\n"
        "Ton but est de répondre aux questions d'un élève en difficulté.\n"
        "Tu es encourageant, mais rigoureux dans la précision.\n\n"
        "CONTRAINTES:\n"
        "1. Utilise UNIQUEMENT le contexte fourni.\n"
        "2. Cite chaque fait avec la page sous forme [Page X].\n"
        "3. Ne fabrique rien.\n\n"
        "Format attendu:\n"
        "Réponse concise en français.\n"
        "CITES: Page: X,Y,...\n\n"
        "Question: {question}\n\n"
        "Contexte:\n"
        "{context}"
    )
)

scores = """
- Pertinence : ... /30;
- Faits non correctes : ... /30;
- Faits manquants : ... /30;
- Stucture : ... /10;
"""

test_template = PromptTemplate(
    input_variables=["task_description", "grading_rubric", "question", "answer", "scores_text"],
    template=(
        "Act as a supportive but rigorous history teacher.\n"
        "Your goal is to evaluate the student's answer.\n"
        "Assignment requirement: {task_description}\n"
        "Grading rubric: {grading_rubric}\n"
        "Return ONLY a JSON object with these keys:\n"
        "- Section\n"
        "- Question\n"
        "- Answer\n"
        "- grade\n"
        "- scores: {scores_text}\n"
        "- advice\n"
        "Constraints: grade MUST equal Pertinence + Faits non correctes + Faits manquants + Structure.\n"
    )
)



# -------------------------
# CHAINS
# -------------------------

persona_chain = (
    {
        "question": lambda inp: inp["question"],
        "context": lambda inp: retriever.get_relevant_documents(inp["question"]),
    }
    | RunnableLambda(
        lambda d: {
            "question": d["question"],
            "context": "\n\n".join([doc.page_content for doc in d["context"]])
        }
    )
    | persona_template
    | llm
    | StrOutputParser()
)

test_chain = (
    test_template
    | llm
    | StrOutputParser()
)

# -------------------------
# ROUTING LOGIC
# -------------------------

def route_to_persona(inputs):
    # If the user does NOT provide "answer", it's a question → persona chain
    return "answer" not in inputs

conditional_chain = RunnableBranch(
    (lambda inputs: route_to_persona(inputs), persona_chain),
    (lambda inputs: not route_to_persona(inputs), test_chain),
    test_chain  # default
)


# -------------------------
# MAIN ENTRYPOINT
# -------------------------

def respond(inputs):
    if "scores_text" not in inputs:
        inputs["scores_text"] = scores
    return conditional_chain.invoke(inputs)

# -------------------------
# EXAMPLES
# -------------------------

# Persona mode
result1 = respond({
    "question": "Quelle est la différence entre climat continental et climat océanique ?"
})
print(result1)


# Test mode
result2 = respond({
    "task_description": "Définir la Révolution industrielle.",
    "grading_rubric": "Les 4 critères.",
    "question": "Explique la Révolution industrielle.",
    "answer": "Elle commence en Angleterre grâce à la machine à vapeur."
})
print(result2)



AttributeError: 'VectorStoreRetriever' object has no attribute 'get_relevant_documents'

In [None]:
def interactive_test():
    print("=== Conditional Chain Tester ===\n")
    print("Choose mode:")
    print("1. Ask a question about the course (auto-retrieval)")
    print("2. Submit a student's answer for grading\n")

    mode = input("Your choice (1/2): ").strip()

    if mode == "1":
        question = input("\nEnter your question: ")
        inputs = {"question": question}

        print("\n--- Response ---")
        print(respond(inputs))
        print("----------------\n")

    elif mode == "2":
        task_description = input("\nTask description: ")
        grading_rubric = input("Grading rubric: ")
        question = input("The question asked to the student: ")
        answer = input("Student answer: ")

        inputs = {
            "task_description": task_description,
            "grading_rubric": grading_rubric,
            "question": question,
            "answer": answer,
            "scores_text": scores,
        }

        print("\n--- Correction ---")
        print(respond(inputs))
        print("------------------\n")

    else:
        print("Invalid option.\n")


# Launch interactive mode
interactive_test()


=== Conditional Chain Tester ===

Choose mode:
1. Ask a course question (persona prompt)
2. Submit an answer for grading (test prompt)


--- Response ---
Voici une réponse concise et rigoureuse basée uniquement sur le contexte fourni :

"Le contexte ne fournit pas d'informations précises sur le début de la légalisation de l'avortement. Pour une réponse complète, il faudrait des sources supplémentaires comme des dates, des pays ou des lois spécifiques."

CITES : Aucune page mentionnée dans le contexte fourni.

*Je reste à votre disposition pour approfondir le sujet si vous fournissez des détails supplémentaires (livre, pages, etc.).*
----------------



In [30]:
# ============================================================
# FULL WORKING VERSION — RAG + PERSONA CHAIN + GRADING CHAIN
# ============================================================

from langchain_mistralai import ChatMistralAI, MistralAIEmbeddings
from langchain_community.document_loaders import PyMuPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnableLambda, RunnableBranch


# ============================================================
# 1. LLM CONFIG
# ============================================================

llm = ChatMistralAI(
    model="mistral-small-latest",   # Vision model would be: pixtral-12b-2409
    temperature=1
)

# OPTIONAL: if you want vision, replace above with:
# llm = ChatMistralAI(model="pixtral-12b-2409", temperature=1)



# ============================================================
# 2. LOAD DOCUMENTS (PDF)
# ============================================================

loader = PyMuPDFLoader("Data/Atlas.pdf")
docs = loader.load()



# ============================================================
# 3. SPLIT DOCUMENTS
# ============================================================

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=500,
    chunk_overlap=50,
    separators=[
        "\nCHAPITRE", "\n##", "\n###", "\nSection", "\n\n", "\n", ".", " ", ""
    ]
)

split_documents = text_splitter.split_documents(docs)



# ============================================================
# 4. EMBEDDINGS + VECTORSTORE + RETRIEVER
# ============================================================

embeddings = MistralAIEmbeddings(model="mistral-embed")

vectorstore = FAISS.from_documents(split_documents, embeddings)

retriever = vectorstore.as_retriever()



# ============================================================
# 5. PERSONA CHAIN (Teacher mode + RAG)
# ============================================================

persona_template = ChatPromptTemplate.from_messages([
    ("system",
     "Tu es un professeur bienveillant. Explique simplement mais sans infantiliser. "
     "Appuie-toi uniquement sur le contexte fourni."),
    ("human",
     "Question: {question}\n\n"
     "Contexte issu des documents:\n{context}")
])

persona_chain = (
    persona_template
    | llm
)



# ============================================================
# 6. GRADING CHAIN (Automatic evaluation)
# ============================================================

test_template = ChatPromptTemplate.from_messages([
    ("system",
     "Tu es un correcteur automatique. Évalue la réponse de l'élève selon les critères fournis."),
    ("human",
     "Instruction donnée à l'élève : {task_description}\n"
     "Barème : {grading_rubric}\n"
     "Question : {question}\n"
     "Réponse de l'élève : {answer}\n\n"
     "Donne une note sur 20 + justification.")
])

test_chain = (
    test_template
    | llm
)



# ============================================================
# 7. RAG WRAPPER — run retrieval only if needed
# ============================================================

def rag_logic(inputs):
    query = inputs.get("question", "")
    if not query:
        inputs["context"] = ""
        return inputs

    docs = retriever.invoke(query)
    ctx = "\n\n".join(doc.page_content for doc in docs)

    inputs["context"] = ctx
    return inputs


rag_chain = RunnableLambda(rag_logic)



# ============================================================
# 8. ROUTING — decide whether to use Persona or Test chain
# ============================================================

def route(inputs):
    """
    If 'answer' is provided → grading mode.
    Otherwise → persona/teacher mode.
    """
    return "answer" not in inputs


conditional_chain = RunnableBranch(
    # condition → persona mode
    (
        lambda inputs: route(inputs),
        rag_chain | persona_chain
    ),
    # fallback → grading mode
    test_template | llm
)



# ============================================================
# 9. MAIN ENTRYPOINT
# ============================================================

def respond(inputs):
    return conditional_chain.invoke(inputs)



# ============================================================
# 10. INTERACTIVE TESTING WITH input()
# ============================================================

if __name__ == "__main__":

    print("=== Tutor System Running ===")
    print("Ask a question to the teacher, or type 'test' to grade an answer.\n")

    mode = input("Mode (teach/test): ").strip().lower()

    if mode == "teach":
        question = input("Your question: ")
        result = respond({"question": question})
        print("\n--- Teacher answer ---")
        print(result)

    elif mode == "test":
        task = input("Description de la consigne : ")
        rubric = input("Barème : ")
        question = input("Question posée : ")
        answer = input("Réponse de l'élève : ")

        result = respond({
            "task_description": task,
            "grading_rubric": rubric,
            "question": question,
            "answer": answer
        })

        print("\n--- Grading result ---")
        print(result)

    else:
        print("Unknown mode.")



=== Tutor System Running ===
Ask a question to the teacher, or type 'test' to grade an answer.


--- Grading result ---
content="**Note : 20/20**\n\n**Justification :**\nLa réponse de l'élève est correcte et précise. L'URSS est effectivement le premier pays à avoir légalisé l'avortement en 1920. La réponse est concise et directe, répondant parfaitement à la question posée. Il n'y a pas d'erreurs factuelles ou grammaticales à relever. La réponse mérite donc la note maximale." additional_kwargs={} response_metadata={'token_usage': {'prompt_tokens': 87, 'total_tokens': 176, 'completion_tokens': 89}, 'model_name': 'mistral-small-latest', 'model': 'mistral-small-latest', 'finish_reason': 'stop', 'model_provider': 'mistralai'} id='lc_run--caf55dd5-76fe-4609-8e1d-6fb7cfb417b2-0' usage_metadata={'input_tokens': 87, 'output_tokens': 89, 'total_tokens': 176}
