In [22]:
import os
from langchain_openai import ChatOpenAI
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
from typing import TypedDict, Optional
from langgraph.graph import StateGraph
from retriever import load_hybrid_retriever
from langchain_core.messages import HumanMessage
import warnings
warnings.filterwarnings("ignore")


In [4]:
def load_llm():
    return ChatOpenAI(
        api_key=os.getenv("OPENAI_API_KEY"),
        model="gpt-4o",
        temperature=0.1
    )

In [37]:
llm = load_llm()
retriever = load_hybrid_retriever(
        index_path="../vectorstore/faiss_vectorestore",
        pickle_path="../vectorstore/documents.pkl",
        model_path="../multilingual-e5-large"
    )

In [94]:
from typing import Optional, TypedDict
from langchain.prompts import PromptTemplate
from langchain.chains import RetrievalQA
from langchain.chains import LLMChain
from langchain.chains import StuffDocumentsChain

from langchain_core.messages import HumanMessage
from langgraph.graph import StateGraph
import yfinance as yf
import matplotlib.pyplot as plt
import io
import base64
import pandas as pd
import re


# État du graphe
class GraphState(TypedDict):
    question: str
    route: Optional[str]
    rag_result: Optional[str]
    stock_data: Optional[dict]
    graph_base64: Optional[str]
    final_response: Optional[str]


# Construction de la chaîne RAG
def build_rag_chain(retriever, llm):
    prompt = PromptTemplate(
        template="""
        Tu es un expert fiable et professionnel sur l'entreprise Renault.
        {instruction}
        ---
        Contexte: {context}
        Question: {question}
        Réponse :
        ---
        """,
        input_variables=["context", "question", "instruction"]
    )

    llm_chain = LLMChain(llm=llm, prompt=prompt)
    stuff_chain = StuffDocumentsChain(
        llm_chain=llm_chain,
        document_variable_name="context"
    )

    return retriever, stuff_chain


# Noeud RAG avec instruction dynamique selon la route
def rag_node_factory(retriever, stuff_chain):
    def rag_node(state: GraphState) -> GraphState:
        query = state["question"]
        route = state.get("route", "rag_only")  # Default to 'rag_only' if route is not set
        print(f"Current route: {route}")  # Debugging information

        if route == "graph_flow":
            instruction = (
                "Prépare une réponse qui extrait les dates clés, les chiffres de vente ou les éléments comparables "
                "utiles pour tracer un graphe (évolution, comparaison, corrélation)."
                "Si on te demande de creer un graph, contente toi juste d'extraire à partir du contexte les données nécessaire sous la forme : yyyy-01-01:vente1, yyyy-01-01:vente2 etc"
            )
        elif route == "finance_only":
            instruction = (
                "Fournis uniquement les dates d'annonce des résultats ou les périodes précises nécessaires à l'analyse boursière."
            )
        else:
            instruction = (
                "Fournis une réponse directe et claire à la question en utilisant uniquement le contexte fourni."
            )

        print(f"Instruction: {instruction}")  # Debugging information

        # Get relevant documents from retriever
        docs = retriever.get_relevant_documents(query)
        
        # Run the stuff chain with all required inputs
        result = stuff_chain.run(
            input_documents=docs,
            question=query,
            instruction=instruction
        )

        return {"rag_result": result, "route": route}

    return rag_node


# Router LLM
def router_node(state: GraphState) -> GraphState:
    question = state["question"]

    system_prompt = (
        "Tu es un routeur intelligent. Ta tâche est de classer une question dans l'une des catégories suivantes :\n"
        "- 'rag_only' : si la question porte uniquement sur des informations à extraire des documents (comme ventes, résumé, plan).\n"
        "- 'finance_only' : si la question porte uniquement sur la bourse, les cours d'action, CAC40, ou les données financières.\n"
        "- 'graph_flow' : si la question demande une comparaison, une corrélation ou un graphe entre plusieurs variables (ex: stock et ventes).\n"
        "Répond uniquement par : rag_only, finance_only, ou graph_flow."
    )

    response = llm.invoke([
        HumanMessage(content=system_prompt),
        HumanMessage(content=f"Question : {question}")
    ])

    decision = response.content.strip().lower()
    if decision not in {"rag_only", "finance_only", "graph_flow"}:
        decision = "rag_only"
    
    print(decision)

    return {"route": decision}


# Extraction des prix boursiers à partir des dates dans le RAG
def finance_node(state: GraphState) -> GraphState:
    rag_context = state.get("rag_result", "")
    date_matches = re.findall(r"\d{4}-\d{2}-\d{2}", rag_context)
    dates = list(set(date_matches))

    if not dates:
        return state

    ticker_renault = yf.Ticker("RNO.PA").history(start="2020-01-01", end="2025-01-01")
    ticker_cac = yf.Ticker("^FCHI").history(start="2020-01-01", end="2025-01-01")

    stock_data = {}
    for date in dates:
        try:
            renault_price = ticker_renault.loc[date]["Close"]
            cac_price = ticker_cac.loc[date]["Close"]
            stock_data[date] = {
                "Renault": round(renault_price, 2),
                "CAC40": round(cac_price, 2)
            }
        except:
            continue

    return {"stock_data": stock_data}


# Plot financier
def plot_node(state: GraphState) -> GraphState:
    stock_data = state.get("stock_data")
    if not stock_data:
        return state

    df = pd.DataFrame.from_dict(stock_data, orient="index")
    df.index = pd.to_datetime(df.index)
    df = df.sort_index()

    plt.figure(figsize=(10, 5))
    for col in df.columns:
        plt.plot(df.index, df[col], label=col)

    plt.title("Évolution des cours Renault vs CAC40 (Jours de publication)")
    plt.xlabel("Date")
    plt.ylabel("Cours (€)")
    plt.legend()
    plt.grid(True)

    buffer = io.BytesIO()
    plt.savefig(buffer, format="png")
    buffer.seek(0)
    image_base64 = base64.b64encode(buffer.read()).decode("utf-8")
    plt.close()

    return {"graph_base64": f"data:image/png;base64,{image_base64}"}


# Réponse finale
def response_node(state: GraphState) -> GraphState:
    if state.get("graph_base64"):
        final_response = f"Voici le graphe demandé : {state['graph_base64']}"
    elif state.get("stock_data"):
        final_response = f"Données boursières extraites :\n{state['stock_data']}"
    elif state.get("rag_result"):
        final_response = state["rag_result"]
    else:
        final_response = "Je n'ai pas pu générer de réponse avec les éléments disponibles."
    
    return {"final_response": final_response}


# Construction du graphe LangGraph
def build_langgraph_rag(retriever, llm):
    retriever_obj, stuff_chain = build_rag_chain(retriever, llm)
    rag_node = rag_node_factory(retriever_obj, stuff_chain)

    graph = StateGraph(GraphState)

    graph.add_node("router", router_node)
    graph.add_node("rag_only", rag_node)
    graph.add_node("finance_only", finance_node)
    graph.add_node("graph_flow", rag_node)
    graph.add_node("finance_node", finance_node)
    graph.add_node("plot_node", plot_node)
    graph.add_node("response", response_node)

    graph.add_conditional_edges(
        "router",
        lambda state: state["route"],
        {
            "rag_only": "rag_only",
            "finance_only": "finance_only",
            "graph_flow": "graph_flow"
        }
    )

    graph.add_edge("rag_only", "response")
    graph.add_edge("finance_only", "response")
    graph.add_edge("graph_flow", "finance_node")
    graph.add_edge("finance_node", "plot_node")
    graph.add_edge("plot_node", "response")

    graph.set_entry_point("router")
    graph.set_finish_point("response")

    return graph.compile()

In [95]:
compiled_graph = build_langgraph_rag(retriever, llm)  

In [96]:
result = compiled_graph.invoke({"question": "Summarize the Renaultion plan report when it’s announced in 2021.."})

print(result['final_response'])

rag_only
Current route: rag_only
Instruction: Fournis une réponse directe et claire à la question en utilisant uniquement le contexte fourni.
The Renaulution plan, announced by Renault Group in January 2021, is an ambitious strategic transformation aimed at shifting the company's focus from volume to value. The plan kick-started a significant change in Renault's approach, emphasizing capital efficiency and strategic agility to address the ongoing automotive transition. The Renaulution plan is structured in phases, with the third phase, "Revolution," announced in late 2022, focusing on transforming Renault into a next-generation automotive company. The plan includes significant investments, such as €3 billion by 2027 to launch new models outside Europe, and aims to achieve carbon neutrality in Europe by 2040 and worldwide by 2050. Additionally, the plan involves reducing production costs and establishing Ampere, a company dedicated to electric vehicles and software, to make Renault full