In [1]:
from dotenv import load_dotenv

# Load API KEY information
load_dotenv(override=True)

from langchain_mistralai import ChatMistralAI, MistralAIEmbeddings

from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyMuPDFLoader
from langchain_community.vectorstores import FAISS
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_core.prompts import PromptTemplate


In [2]:
#Parameters and ChatMistral object creation


# Create the ChatMistralAI object
llm = ChatMistralAI(
    temperature=1,  # Low temperature for more focused responses
    model="mistral-small-latest", 
)

#If we want to understand pictures, we should use this model : "pixtral-12b-2409"

# Loading data

In [3]:
# Step 1: Load Documents
loader = PyMuPDFLoader("Data/Atlas.pdf")
docs = loader.load()
print(f"Number of pages in the document: {len(docs)}")



Number of pages in the document: 308


In [4]:
# Step 2: Split Documents
custom_separators = [
    "\n \n",        # paragraphs
    "\n",         # lines
    ". ",         # sentence-ish boundary
    "; ",         # clause boundary
    ", ",         # phrase boundary
    " ",          # words
    ""            # fallback: characters
]
text_splitter = RecursiveCharacterTextSplitter(separators = custom_separators, chunk_size=500, chunk_overlap=50)      #Param√®tre √† modifier par la suite pour de meilleur performance
split_documents = text_splitter.split_documents(docs)
print(f"Number of split chunks: {len(split_documents)}")

Number of split chunks: 696


In [5]:
# Step 3: Generate Embeddings
embeddings = MistralAIEmbeddings(model="mistral-embed")

  from .autonotebook import tqdm as notebook_tqdm


In [6]:
# Step 4: Create and Save the Database
# Create a vector store
vectorstore = FAISS.from_documents(documents=split_documents, embedding=embeddings)
print("Vector store created successfully!")

Vector store created successfully!


In [7]:
# Step 5: Create Retriever
# Search and retrieve information contained in the documents
retriever = vectorstore.as_retriever()

In [8]:
# Step 6: Create Prompt


prompt = PromptTemplate.from_template(
    """You are an assistant for question-answering tasks. 
Use the following pieces of retrieved context to answer the question. 
If you don't know the answer, just say that you don't know. 

#Context: 
{context}

#Question:
{question}

#Answer:"""
)

In [9]:
# Step 7: Setup LLM
llm = ChatMistralAI(model="mistral-small-latest", temperature=0)

In [10]:
# Step 8: Create Chain
chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [11]:
#Exemple

# Run Chain
# Input a query about the document and print the response
question = "Qui a gagn√© le ballon d'or en 2009"
response = chain.invoke(question)
print(response)
print("-----")
question = "Quel pays a l√©galis√© l'avortement en premier dans le monde ?"
response = chain.invoke(question)
print(response)


Je ne sais pas.
-----
Selon le contexte fourni, l'Islande est le premier pays √† avoir l√©galis√© l'avortement en 1934.


# Prompt engineering

Here we setup a basic template for our prompt engineering.
In our case, the LLM will be a specialist in geography, in secondary school.

The student will interact with the LLM in two different ways :
    -He can ask any type of question about any topic in the course.
    -He can ask to have his knowledge tested (he will then receive a score on his answer and a feedback)

In [12]:
# Persona prompt, specific for when the student has a question about a specific part of the course

persona_template = (
    "Act as a supportive but rigorous geography teacher.\n" \
    "Your tone should be constructive, specific, and pedagogical.\n" \
        """Tu es un professeur de g√©ographie avec 20 ans d'exp√©rience, et ton but est de r√©pondre aux questions d'un √©l√®ve en difficult√©.
            Tu es encourageant, mais tout de fois rigoureux quant √† la pr√©cision de tes r√©ponses.

    CONTRAINTES:
    1. Utilise UNIQUEMENT le contexte fourni.
    2. Cite chaque fait avec la page sous forme [Page X].
    3. Ne fabrique rien.

    Format attendu:
    R√©ponse concise en fran√ßais.
    CITES: Page: X,Y,... (liste unique de pages utilis√©es)

    Question: {question}

    Contexte:
    {context}
    """
)

scores = """
- Pertinence : Est-ce que l'√©tudiant r√©pond bien √† la question pos√© et non pas √† autre chose  /30;
- Faits non correctes: Est-ce qu'il y'a des faits qui ne sont pas correctes dans la r√©ponse  /30;
- Faits manquants : Est-ce que tous les faits attendus sont bien pr√©sent dans la r√©ponse  /30;
- Stucture : Est-ce que la r√©ponse est bien stuctur√©e /10;
"""

test_template = (    f"Act as a supportive but rigorous history teacher.\n"
    "Your goal is to generate a question based on the course."             
    "The student gives you an answer and your goal is to evaluate it.\n"
    "Assignment requirement: {task_description}\n"
    "Grading rubric: {grading_rubric}\n"
    "Return ONLY a JSON object with these keys:\n"
    "- Section: the general theme of the question\n"
    "- Question: the question you asked the student\n"
    "- Answer: The answer the student gave\n"
    "- grade: number (0-100), must equal sum of all scores\n"
    "- scores: f{scores} \n"
    "- advice: array of short, actionable improvement suggestions\n"
    "Constraints: grade MUST equal Pertinence+Faits non correctes + Faits manquants + Structure. No extra text outside the JSON.\n\n"
    )

# Data base

In [13]:
import sqlite3
import json

# -------------------------------
# 1. Create the SQLite database
# -------------------------------
conn = sqlite3.connect('student_results.db')  # This creates a file on disk
cursor = conn.cursor()

# Create table for storing answers and grading
cursor.execute('''
CREATE TABLE IF NOT EXISTS student_results (
    id INTEGER PRIMARY KEY AUTOINCREMENT,
    student_name TEXT,
    question TEXT,
    answer TEXT,
    grade REAL,
    scores TEXT,        -- we'll store JSON as a string
    advice TEXT
)
''')
conn.commit()
print("Database 'student_results.db' ready")

# -------------------------------
# 2. Function to save a result
# -------------------------------
def save_result(student_name, question, answer, grading_json):
    """
    grading_json: dictionary returned by LLM in test mode
    """
    # Convert the 'scores' dict to a JSON string
    scores_str = json.dumps(grading_json.get("scores", {}), ensure_ascii=False)

    cursor.execute('''
        INSERT INTO student_results (student_name, question, answer, grade, scores, advice)
        VALUES (?, ?, ?, ?, ?, ?)
    ''', (
        student_name,
        question,
        answer,
        grading_json.get("grade", 0),
        scores_str,
        grading_json.get("advice", "")
    ))
    conn.commit()
    print(f"Result for {student_name} saved successfully!")

# -------------------------------
# 3. Example usage
# -------------------------------

# Suppose we have a grading result from the LLM (already parsed as JSON)
example_grading_json = {
    "Section": "Histoire",
    "Question": "Quels furent les principaux √©v√©nements qui ont marqu√© le d√©but de la Seconde Guerre mondiale en Europe ?",
    "Answer": "L'invasion de la Pologne par l'Allemagne, et le fait qu'il y'avait une crise √©conomique assez forte",
    "grade": 40,
    "scores": {
        "Pertinence": 20,
        "Faits non correctes": 20,
        "Faits manquants": 40,
        "Structure": 10
    },
    "advice": "La r√©ponse mentionne correctement l'invasion de la Pologne, qui est un √©v√©nement cl√©. Cependant, la crise √©conomique, bien que pertinente, n'est pas un √©v√©nement marquant du d√©but de la guerre..."
}

#save_result("Edin", example_grading_json["Question"], example_grading_json["Answer"], example_grading_json)




Database 'student_results.db' ready


# Vector embeddings

Only needs to be run one to get the vectors. They are stored in a folder called "faiss_store"

In [None]:
"""
PDF Chapter & Theme Extractor using RAG
Extracts all chapters and themes from a PDF document
"""

from langchain_mistralai import ChatMistralAI, MistralAIEmbeddings
from langchain_community.document_loaders import PyMuPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
import json

# ============================================================
# 1. SETUP LLM
# ============================================================

llm = ChatMistralAI(
    model="mistral-small-latest",
    temperature=0  # Low temperature for structured extraction
)

# ============================================================
# 2. LOAD PDF
# ============================================================

print("Loading PDF...")
loader = PyMuPDFLoader("Data/Atlas.pdf")
docs = loader.load()
print(f"‚úÖ Loaded {len(docs)} pages")

# ============================================================
# 3. EXTRACT TABLE OF CONTENTS & STRUCTURE
# ============================================================

def extract_chapters_and_themes(docs, llm):
    """
    Extract chapters and themes from PDF using RAG approach
    """
    
    # Get first 20 pages (usually contains TOC and intro)
    first_pages = docs[:20]
    first_pages_text = "\n\n".join([doc.page_content for doc in first_pages])
    
    # Prompt to extract TOC
    toc_prompt = ChatPromptTemplate.from_messages([
        ("system", """You are an expert at analyzing document structure. 
Extract the table of contents, chapters, and main sections from the document.

Return a JSON structure like this:
{{
  "title": "Document Title",
  "chapters": [
    {{
      "number": "1",
      "title": "Chapter Title",
      "page": 10,
      "subsections": ["Subsection 1", "Subsection 2"]
    }}
  ]
}}

If no clear chapter structure exists, identify the main sections and themes."""),
        ("user", "Here are the first pages of the document:\n\n{text}\n\nExtract the structure:")
    ])
    
    print("\nüîç Analyzing document structure...")
    chain = toc_prompt | llm | StrOutputParser()
    toc_result = chain.invoke({"text": first_pages_text[:15000]})  # Limit to avoid token limits
    
    return toc_result

# ============================================================
# 4. EXTRACT THEMES FROM FULL DOCUMENT
# ============================================================

def extract_themes_from_full_doc(docs, llm):
    """
    Extract main themes by analyzing the entire document
    """
    
    # Sample pages throughout the document
    sample_indices = [0, len(docs)//4, len(docs)//2, 3*len(docs)//4, len(docs)-1]
    sample_pages = [docs[i] for i in sample_indices if i < len(docs)]
    sample_text = "\n\n".join([f"[Page {doc.metadata['page']}]\n{doc.page_content}" for doc in sample_pages])
    
    themes_prompt = ChatPromptTemplate.from_messages([
        ("system", """You are an expert at thematic analysis. 
Analyze the document and identify the main themes, topics, and key concepts.

Return a JSON structure like this:
{{
  "main_themes": ["Theme 1", "Theme 2", "Theme 3"],
  "key_topics": ["Topic 1", "Topic 2"],
  "document_type": "textbook/manual/report/etc",
  "summary": "Brief overview of what the document covers"
}}"""),
        ("user", "Here are sample pages from throughout the document:\n\n{text}\n\nExtract the themes:")
    ])
    
    print("\nüéØ Extracting themes from document...")
    chain = themes_prompt | llm | StrOutputParser()
    themes_result = chain.invoke({"text": sample_text[:15000]})
    
    return themes_result

# ============================================================
# 5. CREATE RAG SYSTEM FOR DETAILED QUERIES
# ============================================================

def create_rag_system(docs):
    """
    Create a RAG system to answer specific questions about chapters
    """
    print("\nüìö Building vector store...")
    
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=1000,
        chunk_overlap=100,
    )
    
    splits = text_splitter.split_documents(docs)
    print(f"‚úÖ Created {len(splits)} chunks")
    
    embeddings = MistralAIEmbeddings(model="mistral-embed")
    vectorstore = FAISS.from_documents(splits, embeddings)
    retriever = vectorstore.as_retriever(search_kwargs={"k": 5})
    
    return retriever

# ============================================================
# 6. QUERY SPECIFIC CHAPTERS
# ============================================================

def query_chapter_details(retriever, llm, chapter_name):
    """
    Get detailed information about a specific chapter
    """
    
    # Retrieve relevant chunks
    docs = retriever.invoke(f"What is covered in {chapter_name}?")
    context = "\n\n".join([doc.page_content for doc in docs])
    
    detail_prompt = ChatPromptTemplate.from_messages([
        ("system", "You are analyzing a document chapter. Provide a detailed summary of the content."),
        ("user", """Based on this context about {chapter}:

        {context}

        Provide:
        1. Main topics covered
        2. Key concepts
        3. Important details or findings""")
            ])
    
    chain = detail_prompt | llm | StrOutputParser()
    result = chain.invoke({"chapter": chapter_name, "context": context})
    
    return result

# ============================================================
# 7. MAIN EXECUTION
# ============================================================

def main():
    """
    Main function to extract all chapters and themes
    """
    
    # Extract structure
    structure = extract_chapters_and_themes(docs, llm)
    print("\n" + "="*60)
    print("üìñ DOCUMENT STRUCTURE")
    print("="*60)
    print(structure)
    
    # Extract themes
    themes = extract_themes_from_full_doc(docs, llm)
    print("\n" + "="*60)
    print("üé® MAIN THEMES")
    print("="*60)
    print(themes)
    
    # Create RAG system for detailed queries
    retriever = create_rag_system(docs)
    
    print("\n" + "="*60)
    print("‚úÖ RAG SYSTEM READY")
    print("="*60)
    print("\nYou can now query specific chapters for more details!")
    
    # Example: Query first chapter (uncomment to use)
    # print("\n" + "="*60)
    # print("üìù EXAMPLE: Chapter 1 Details")
    # print("="*60)
    # chapter_details = query_chapter_details(retriever, llm, "Chapter 1")
    # print(chapter_details)
    
    return structure, themes, retriever

# ============================================================
# RUN
# ============================================================

if __name__ == "__main__":
    structure, themes, retriever = main()
    
    # Optional: Save results to file
    with open("document_analysis.txt", "w", encoding="utf-8") as f:
        f.write("DOCUMENT STRUCTURE\n")
        f.write("="*60 + "\n")
        f.write(structure + "\n\n")
        f.write("MAIN THEMES\n")
        f.write("="*60 + "\n")
        f.write(themes + "\n")
    
    print("\nüíæ Results saved to 'document_analysis.txt'")
    
    # Example usage for querying specific chapters:
    print("\n" + "="*60)
    print("USAGE EXAMPLES")
    print("="*60)
    print("\n# Query a specific chapter:")
    print("details = query_chapter_details(retriever, llm, 'Introduction')")
    print("\n# Query a theme:")
    print("details = query_chapter_details(retriever, llm, 'Risk Management')")

Loading PDF...
‚úÖ Loaded 308 pages

üîç Analyzing document structure...

üìñ DOCUMENT STRUCTURE
```json
{
  "title": "Grand Atlas 2024",
  "chapters": [
    {
      "number": "1",
      "title": "Points chauds",
      "subsections": [
        "La guerre en Ukraine : la Russie peut-elle gagner ?",
        "L‚ÄôIndopacifique, th√©√¢tre des rivalit√©s mondiales",
        "Ta√Øwan : des √©lections √† haut risque",
        "Turquie : la victoire d‚ÄôErdogan",
        "Le Caucase sous tensions",
        "Syrie, une guerre inachev√©e ?",
        "Le Kosovo : un anniversaire sous tension",
        "Un conflit isra√©lo-palestinien loin d‚Äô√™tre marginalis√©",
        "Iran, entre r√©pression et isolement",
        "L‚Äôinstabilit√© gagne-t-elle l‚ÄôAfrique ?",
        "Un arc de crise sah√©lien"
      ]
    },
    {
      "number": "2",
      "title": "Les grands enjeux de 2024",
      "subsections": [
        "L‚ÄôOTAN, le retour, 75 ans apr√®s sa naissance",
        "Les √âtats-Unis, de r

In [19]:
# ============================================================
# FULL WORKING VERSION ‚Äî RAG + PERSONA CHAIN + GRADING CHAIN
# ============================================================

from langchain_mistralai import ChatMistralAI, MistralAIEmbeddings
from langchain_community.document_loaders import PyMuPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnableLambda, RunnableBranch
import fitz  # PyMuPDF
from langchain_community.docstore.document import Document
import base64
from langchain_core.messages import HumanMessage


# ============================================================
# 1. LLM CONFIG
# ============================================================

llm = ChatMistralAI(
    model="mistral-small-latest",   # Vision model would be: pixtral-12b-2409
    temperature=1
)

vision_llm = ChatMistralAI(
    model="pixtral-12b-2409",
    temperature=0
)

# OPTIONAL: if you want vision, replace above with:
# llm = ChatMistralAI(model="pixtral-12b-2409", temperature=1)



# ============================================================
# 2. LOAD DOCUMENTS (PDF)
# ============================================================

loader = PyMuPDFLoader("Data/Atlas.pdf")
docs = loader.load()



# ============================================================
# 3. SPLIT DOCUMENTS
# ============================================================

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=500,
    chunk_overlap=50,
    #separators=["\nCHAPITRE", "\n##", "\n###", "\nSection", "\n\n", "\n", ".", " ", ""]
)

text_docs = text_splitter.split_documents(docs)

def caption_images_from_pdf(pdf_path):
    pdf = fitz.open(pdf_path)
    image_docs = []

    for page_num in range(len(pdf)):
        page = pdf.load_page(page_num)

        for img_index, img_info in enumerate(page.get_images(full=True)):
            xref = img_info[0]
            image_data = pdf.extract_image(xref)

            image_bytes = image_data["image"]
            ext = image_data.get("ext", "png")

            # Base64 encode
            raw_b64 = base64.b64encode(image_bytes).decode("utf-8")

            mime_map = {
                "png": "image/png",
                "jpg": "image/jpeg",
                "jpeg": "image/jpeg"
            }
            mime = mime_map.get(ext.lower(), "image/png")

            # Correct format for Pixtral/LangChain
            image_url = f"data:{mime};base64,{raw_b64}"

            # CORRECTED: Use proper message format for vision models
            from langchain_core.messages import HumanMessage
            
            message = HumanMessage(
                content=[
                    {"type": "text", "text": "Describe this image in detailed natural language."},
                    {
                        "type": "image_url",
                        "image_url": {"url": image_url}
                    }
                ]
            )

            response = vision_llm.invoke([message])
            caption = response.content

            image_docs.append(
                Document(
                    page_content=caption,
                    metadata={
                        "type": "image_caption",
                        "page": page_num + 1,
                        "image_index": img_index,
                        "source": pdf_path
                    }
                )
            )

    pdf.close()
    return image_docs


image_docs = caption_images_from_pdf("Data/Atlas.pdf")
all_docs = text_docs + image_docs




# ============================================================
# 4. EMBEDDINGS + VECTORSTORE + RETRIEVER
# ============================================================

embeddings = MistralAIEmbeddings(model="mistral-embed")

vectorstore = FAISS.from_documents(all_docs, embeddings)

retriever = vectorstore.as_retriever()

# Save to disk
vectorstore.save_local("faiss_index")
print("Vectorstore saved to 'faiss_index/'")

Retrying langchain_mistralai.chat_models.ChatMistralAI.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised ReadTimeout: The read operation timed out.


KeyboardInterrupt: 

# Interface

There are basically two ways the student can interact with the LLM : there is the mode "teaching" where the student asks question about the course in order to enhance his knowledge and understanding of the subject, and there is the mode "test" where the LLM generates a question based on the course and is the student is graded based on the quality of his answer. The grade shall then be stored in a local db to track the student's progress.

In [16]:
from langchain_community.vectorstores import FAISS
from langchain_mistralai import MistralAIEmbeddings

# Initialize embeddings
embeddings = MistralAIEmbeddings(model="mistral-embed")

# Load the vectorstore (allow unsafe deserialization since you created it)
vectorstore = FAISS.load_local("faiss_index", embeddings, allow_dangerous_deserialization=True)

retriever = vectorstore.as_retriever()
print("Vectorstore loaded successfully")



# ============================================================
# 5. PERSONA CHAIN (Teacher mode + RAG)
# ============================================================

persona_template = ChatPromptTemplate.from_messages([
    ("system",
     "Tu es un professeur bienveillant. Explique simplement mais sans infantiliser. "
     "Appuie-toi uniquement sur le contexte fourni."),
    ("human",
     "Question: {question}\n\n"
     "Contexte issu des documents:\n{context}")
])

persona_chain = (
    persona_template
    | llm
)



# ============================================================
# 6. GRADING CHAIN (Automatic evaluation)
# ============================================================

test_template = ChatPromptTemplate.from_messages([
    ("system",
     "Tu es un correcteur automatique. √âvalue la r√©ponse de l'√©l√®ve selon les crit√®res fournis."),
    ("human",
     #"Instruction donn√©e √† l'√©l√®ve : {task_description}\n"
     "Bar√®me : {grading_rubric}\n"
     "Question : {question}\n"
     "R√©ponse de l'√©l√®ve : {answer}\n\n"
     "Donne une note sur 20 + justification.")
])

test_chain = (
    test_template
    | llm
)



# ============================================================
# 7. RAG WRAPPER ‚Äî run retrieval only if needed
# ============================================================

def rag_logic(inputs):
    query = inputs.get("question", "")
    if not query:
        inputs["context"] = ""
        return inputs

    docs = retriever.invoke(query)
    ctx = "\n\n".join(doc.page_content for doc in docs)

    inputs["context"] = ctx
    return inputs


rag_chain = RunnableLambda(rag_logic)



# ============================================================
# 8. ROUTING ‚Äî decide whether to use Persona or Test chain
# ============================================================

def route(inputs):
    """
    If 'answer' is provided ‚Üí grading mode.
    Otherwise ‚Üí persona/teacher mode.
    """
    return "answer" not in inputs


conditional_chain = RunnableBranch(
    # condition ‚Üí persona mode
    (
        lambda inputs: route(inputs),
        rag_chain | persona_chain
    ),
    # fallback ‚Üí grading mode
    test_template | llm
)

def generate_test_question(criteria):
    """
    Generate a test question using document context (RAG) and student instructions.
    """
    # Step 1: Retrieve relevant context using RAG
    # Use invoke() method to retrieve documents
    docs = retriever.invoke(criteria)               #Here criteria means the subject the students wishes to be tested in.

    context_text = "\n\n".join([doc.page_content for doc in docs])

    # Step 2: Prompt for question generation using the retrieved context
    question_gen_prompt = ChatPromptTemplate.from_messages([
        ("system",
         "Tu es un professeur bienveillant et rigoureux. "
         "√Ä partir du contexte fourni, g√©n√®re une question pertinente pour un √©l√®ve."),
        ("human",
         "Instructions : {criteria}\n\nContexte : {context}")
    ])

    question_gen_chain = question_gen_prompt | llm

    generated_question = question_gen_chain.invoke({
        "criteria": criteria,
        "context": context_text
    }).content.strip()

    return generated_question



# ============================================================
# 9. MAIN ENTRYPOINT
# ============================================================

def respond(inputs):
    return conditional_chain.invoke(inputs)



# ============================================================
# 10. INTERACTIVE TESTING WITH input()
# ============================================================

if __name__ == "__main__":

    print("=== Tutor System Running ===")
    print("Ask a question to the teacher, or type 'test' to grade an answer.\n")

    mode = input("Mode (teach/test): ").strip().lower()

    if mode == "teach":
        question = input("Your question: ")
        result = respond({"question": question})
        print("\n--- Teacher answer ---")
        print(result)

    elif mode == "test":
        # √âtudiant fournit uniquement le bar√®me et sa r√©ponse
        rubric = input("Bar√®me : ")

        # --- Etape 1 : Generate question using RAG context ---
        generated_question = generate_test_question(rubric)
        print("\n--- Question g√©n√©r√©e automatiquement ---")
        print(generated_question)

        # --- √âtape 2 : l'√©l√®ve fournit sa r√©ponse ---
        answer = input("\nR√©ponse de l'√©l√®ve : ")

        # --- √âtape 3 : prompt de correction (JSON output) ---
        scores_text = (
            "- Pertinence : ... /30;\n"
            "- Faits non correctes : ... /30;\n"
            "- Faits manquants : ... /30;\n"
            "- Structure : ... /10;"
        )

        test_prompt_template = ChatPromptTemplate.from_messages([
            ("system",
            "Act as a supportive but rigorous history teacher.\n"
            "Your goal is to evaluate the student's answer and return ONLY a JSON object."),
            ("human",
            "Grading rubric: {grading_rubric}\n"
            "Question: {question}\n"
            "Answer: {answer}\n"
            "Scores template: {scores_text}\n"
            "Constraints: grade MUST equal sum of all scores.\n"
            "Return a JSON object with keys:\n"
            "- Section\n"
            "- Question\n"
            "- Answer\n"
            "- grade (0-100)\n"
            "- scores\n"
            "- advice\n"
            "No extra text or Markdown, ONLY JSON.")
        ])

        test_chain = test_prompt_template | llm

        grading_result = test_chain.invoke({
            "grading_rubric": rubric,
            "question": generated_question,
            "answer": answer,
            "scores_text": scores_text
        })

        # --- √âtape 4 : transformer la string en dictionnaire Python ---
        import json

        raw_output = grading_result.content.strip()

        # Retirer les ```json ou ``` √©ventuels
        if raw_output.startswith("```"):
            raw_output = "\n".join(raw_output.split("\n")[1:-1])

        try:
            grading_json = json.loads(raw_output)
        except json.JSONDecodeError:
            print("Erreur : le LLM n'a pas retourn√© un JSON valide")
            grading_json = None

        # --- √âtape 5 : afficher le r√©sultat ---
        if grading_json:
            print("\n--- JSON Grading Result ---")
            print(grading_json)
            print("\nNote :", grading_json["grade"])
            print("Conseils :", grading_json["advice"])
        #save_result("Edin", grading_json["Question"], grading_json["Answer"], grading_json)



    else:
        print("Unknown mode.")



Vectorstore loaded successfully
=== Tutor System Running ===
Ask a question to the teacher, or type 'test' to grade an answer.


--- Question g√©n√©r√©e automatiquement ---
**Question :** En vous basant sur le contexte fourni, expliquez comment la course √† l'espace, initi√©e par le lancement du premier satellite artificiel sovi√©tique (Spoutnik) en 1957, a influenc√© les dynamiques de la Guerre froide. Quels √©taient les enjeux politiques, technologiques et id√©ologiques de cette comp√©tition spatiale entre les √âtats-Unis et l'Union sovi√©tique ?

--- JSON Grading Result ---
{'Section': 'Guerre froide', 'Question': "En vous basant sur le contexte fourni, expliquez comment la course √† l'espace, initi√©e par le lancement du premier satellite artificiel sovi√©tique (Spoutnik) en 1957, a influenc√© les dynamiques de la Guerre froide. Quels √©taient les enjeux politiques, technologiques et id√©ologiques de cette comp√©tition spatiale entre les √âtats-Unis et l'Union sovi√©tique ?", 'Answ

In [11]:
# -------------------------------
# 4. Query the database
# -------------------------------
cursor.execute("SELECT * FROM student_results")
rows = cursor.fetchall()
for row in rows:
    print(row)

# Close connection when done
conn.close()

(1, 'Edin', 'Quelles √©taient les principales causes de la Premi√®re Guerre mondiale et comment ont-elles conduit √† son d√©clenchement en 1914 ?', "L'assassination de l'archeduc Fran√ßois ferdinant", 20.0, '{"Pertinence": 10, "Faits non correctes": 20, "Faits manquants": 20, "Structure": 0}', "Votre r√©ponse est incompl√®te et manque de structure. Vous avez mentionn√© un √©v√©nement important, mais vous devez inclure d'autres causes majeures comme le nationalisme, l'imp√©rialisme, le militarisme et les alliances. De plus, vous devez expliquer comment ces facteurs ont conduit √† la guerre en 1914. Travaillez sur la clart√© et l'organisation de votre r√©ponse.")


In [8]:
print(grading_json['scores'])


{'Pertinence': 0, 'Faits non correctes': 30, 'Faits manquants': 30, 'Structure': 10}
