In [None]:
# Wenwen Aufgaben:
# 1). Build a Conversational RAG Retrieval QA Chain with proper citations, like [1][2] with article title, pages and context
# (RAG_QA_Cita-3.ipynb) is the conversational QA_App, answer questions based on given PDFs.

# 2). Bild a Multi-Vector RAG, which can make summary of text and tables from a PDF
# (Multi_Modal_RAG-v2.ipynb) is the Multi_vector_Model, which can make summary of text and tables from a PDF.

# 3). Build a Multi-Modal RAG Retrieval QA Chain with proper citations, like [1][2] with article title, pages and context
# (Multi_RAG_QA_Cita-v4.ipynb) is the combination with (RAG_QA_Cita-3.ipynb) and (Multi_Modal_RAG-v2.ipynb), so that my App can make dialog with me, based on the text and tables from given PDFs.

# 4). In the end, this (Multi_RAG_Agent.ipynb) is the final version of the app, 
# which can make dialog with me, based on the text and tables from given PDFs, 
# and also can make a summary of the text and tables from a PDF, with proper citation style.

# 5). combine all Agents (Multi_RAG_Agent from Wenwen, Web_Search_Agent and Data_Science_Agent from Hanna) with Supervisor Agent (from Wenwen)
# 6). create a Gradio chat interface
# 7). create a Huggingface Space for presentation (https://huggingface.co/spaces/hussamalafandi/test_space)

In [None]:
# step 4: build a supervisor_Agent, to control the RAG_Agent from me and Website_Agent & Data_Science_Agent from Hanna
# Create supervisor with langgraph-supervisor
# https://langchain-ai.github.io/langgraph/tutorials/multi_agent/agent_supervisor/#2-create-supervisor-with-langgraph-supervisor 


In [None]:
# "Multi_RAG_Agent.ipynb" from Wenwen
# 1. use LangSmith
import getpass
import os

os.environ["LANGSMITH_TRACING"] = "true"
os.environ["LANGSMITH_API_KEY"] = getpass.getpass()

# Configure environment to connect to LangSmith.
os.environ["LANGSMITH_TRACING"] = "true"
os.environ["LANGSMITH_ENDPOINT"]="https://api.smith.langchain.com"
os.environ["LANGSMITH_PROJECT"]="KI_multi-modal-RAG"

# 2. Components
# 2.1 Select chat model: Google Gemini
import getpass
import os

if not os.environ.get("GOOGLE_API_KEY"):
  os.environ["GOOGLE_API_KEY"] = getpass.getpass("Enter API key for Google Gemini: ")

from langchain.chat_models import init_chat_model
llm = init_chat_model("gemini-2.0-flash", model_provider="google_genai")

# 2.2 Select embedding model: HuggingFace
from langchain_huggingface import HuggingFaceEmbeddings
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")

# 2.3 Select vector store: Chroma (install and upgrade langchain_chroma)
from langchain_chroma import Chroma

vector_store = Chroma(
    collection_name="example_collection",
    embedding_function=embeddings,
    persist_directory="./chroma_langchain_db",  # Where to save data locally, remove if not necessary
)

# 3. index our documents:

from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

# 3.1 Load PDF files from a folder
import os
folder_path = "D:/4-IntoCode/16_LangChain/AgilProjekt_multiModel/Raw_Data/Apple1/"  # company folder, use / instead of \
all_docs = []

for file in os.listdir(folder_path):
    if file.endswith(".pdf"):
        loader = PyPDFLoader(os.path.join(folder_path, file))
        pages = loader.load_and_split()
        all_docs.extend(pages)

# 3.2 Split into chunks
splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
docs = splitter.split_documents(all_docs)
print(f"Loaded {len(docs)} chunks from {len(all_docs)} pages across {len(os.listdir(folder_path))} PDF files.")
# Result: "Loaded 4419 chunks from 1347 pages across 22 PDF files."

# 3.3 Index chunks
_ = vector_store.add_documents(documents=docs)

# Add Logging:
import logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# Validate Your Data:
# Add this after loading documents
logger.info(f"Loaded {len(docs)} documents")
logger.info(f"Sample document: {docs[0]}")
logger.info(f"Sample metadata: {docs[0].metadata}")

# Check 1: Are the documents actually in the vectorstore?
print(f"Total documents in ChromaDB: {len(vector_store.get())}")
# Result: "Total documents in ChromaDB: 7"

print(f"# of docs to add: {len(docs)}")  # Should be in thousands, not 7
# Result: # of docs to add: 4419
'''so your docs list has 4419 chunks to add. ✅ That means:
PDF loading ✔️
Chunking ✔️
Number of expected documents ✔️
❌ add_documents() didn't actually store them'''

# to Fix: 1. Delete and Rebuild the ChromaDB from Scratch
import shutil
shutil.rmtree("./chroma_db", ignore_errors=True)
# 2. Re-initialize Chroma with persist directory
from langchain_chroma import Chroma

vector_store = Chroma(
    persist_directory="./chroma_db",
    embedding_function=embeddings
)
# 3. Add all 4419 documents
print(f"Adding {len(docs)} docs")
vector_store.add_documents(docs)
# 4.  Verify
print("Total documents in ChromaDB:", len(vector_store.get()['documents']))
# Should print 4419
# Total documents in ChromaDB: 4419
# Result: Total documents in ChromaDB: 4419

# 4. Multi_RAG application: reconstruct the Q&A app with citations
# Conversational RAG: additional tool-calling features of chat models to cite document IDs;
# Multi-Vector RAG: use multiple vector stores to retrieve text and tables from a PDF

from langchain_core.messages import SystemMessage, AIMessage
from langgraph.graph import MessagesState
from langgraph.prebuilt import ToolNode
from typing import List
from langchain_core.documents import Document

# 4.1 Define state for application (modified)
class State(MessagesState):
    context: List[Document] # change 1

# 4.2 load a retriever and construct our prompt:
# Combine_Step_1: use our own MultiVectorRetriever from (Multi_Modal_RAG-v2.ipynb)
from langchain.retrievers.multi_vector import MultiVectorRetriever
from langchain.storage import InMemoryStore

store = InMemoryStore()
retriever = MultiVectorRetriever(
    vectorstore=vector_store,
    docstore=store,
    id_key="doc_id",  # Keep track of original full content
)
retriever.search_kwargs["k"] = 4  # number of documents to retrieve

# 4.3 Define the tool
from langchain_core.tools import tool

# Combine_Step_3: Update the Tool to Use Multi-Vector Retrieval and Store Metadata
@tool(response_format="content_and_artifact")
def retrieve(query: str):
    """Retrieve information related to a query."""
    try:
        retrieved_docs = retriever.invoke(query) # change 3
        if not retrieved_docs:
            return {
                "content": "No relevant documents found",
                "artifact": []
            }


        # Rebuild full documents from store using doc_id, change 4
        full_docs = []
        for doc in retrieved_docs:
            if "doc_id" not in doc.metadata:
                continue
            doc_id = doc.metadata["doc_id"]
            full_text = retriever.docstore.mget([doc_id])[0]
            if full_text:
                full_docs.append(Document(page_content=full_text, metadata=doc.metadata))

        serialized = "\n\n".join(
           f"Source: {doc.metadata.get('source', 'Unknown')}\n"
           f"Page: {doc.metadata.get('page', 'N/A')}\n"
           f"Content: {doc.page_content}"
           for doc in full_docs
        )
        return {
            "content": serialized,
            "artifact": full_docs
       }
    except Exception as e:
        return {
            "content": f"Retrieval error: {str(e)}",
            "artifact": []
        }
    

# Step 1: Generate an AIMessage that may include a tool-call to be sent.
def query_or_respond(state: State):
    """Generate tool call for retrieval or respond."""
    llm_with_tools = llm.bind_tools([retrieve])
    response = llm_with_tools.invoke(state["messages"])
    # MessagesState appends messages to state instead of overwriting
    return {"messages": [response]}


# Step 2: Execute the retrieval.
tools = ToolNode([retrieve])

# 4.4 Combine_Step_2: Summarize Text + Tables and Load into MultiVectorRetriever
# Use your partition_pdf + summary chain:
from unstructured.partition.pdf import partition_pdf
from typing import Any
from pydantic import BaseModel

# Use unstructured to extract
raw_pdf_elements = partition_pdf(
    filename=folder_path + file,
    extract_images_in_pdf=True,
    infer_table_structure=True,
    chunking_strategy="by_title",
)

class Element(BaseModel):
    type: str
    text: Any

# Categorize by type
categorized_elements = []
for element in raw_pdf_elements:
    if "unstructured.documents.elements.Table" in str(type(element)):
        categorized_elements.append(Element(type="table", text=str(element)))
    elif "unstructured.documents.elements.CompositeElement" in str(type(element)):
        categorized_elements.append(Element(type="text", text=str(element)))

# Separate into text and table
text_elements = [e for e in categorized_elements if e.type == "text"]
table_elements = [e for e in categorized_elements if e.type == "table"]

# 4.5 Text and Table summaries
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate

# Prompt
prompt_text = """You are an assistant tasked with summarizing tables and text. \
Give a concise and essential summary of the table or text. 
Each summary should not longer than 10 sentences. Please keep it as short as possible. \
Table or text chunk: {element} """
prompt = ChatPromptTemplate.from_template(prompt_text)

# 4.6 Summary chain
import getpass
import os

if not os.environ.get("GOOGLE_API_KEY"):
  os.environ["GOOGLE_API_KEY"] = getpass.getpass("Enter API key for Google Gemini: ") # use Google Gemini instead of OpenAI

from langchain_google_genai import ChatGoogleGenerativeAI
model = ChatGoogleGenerativeAI(model="gemma-3-27b-it", temperature=0)    # use "gemma-3-27b-it" instead of gemini-2.0-flash or 1.5

summarize_chain = {"element": lambda x: x} | prompt | model | StrOutputParser()

# Summarize each
text_summaries = summarize_chain.batch([e.text for e in text_elements], {"max_concurrency": 1})
table_summaries = summarize_chain.batch([e.text for e in table_elements], {"max_concurrency": 1})

# 4.7 Add to retriever
from langchain_core.documents import Document
import uuid

# Store original full text in memory, summaries in vectorstore
# Before adding summaries to the vectorstore, Add Document Title & Page Metadata
text_ids = [str(uuid.uuid4()) for _ in text_elements]

# Build a list of (element, summary, doc_id, metadata)
text_triplets = list(zip(text_elements, text_summaries, text_ids))
for (element, summary, doc_id) in text_triplets:
    idx = text_elements.index(element)
    raw_metadata = raw_pdf_elements[idx].metadata
    retriever.vectorstore.add_documents([
        Document(
            page_content=summary,
            metadata={
                "doc_id": doc_id,
                "source": file,
                "page": getattr(raw_metadata, "page_number", -1)
            }
        )
    ])
retriever.docstore.mset(list(zip(text_ids, [e.text for e in text_elements])))

# Same for tables
table_ids = [str(uuid.uuid4()) for _ in table_elements]

# Build a list of (element, summary, doc_id, metadata)
text_triplets = list(zip(text_elements, text_summaries, text_ids))
for (element, summary, doc_id) in text_triplets:
    idx = text_elements.index(element)
    raw_metadata = raw_pdf_elements[idx].metadata
    retriever.vectorstore.add_documents([
        Document(
            page_content=summary,
            metadata={
                "doc_id": doc_id,
                "source": file,
                "page": getattr(raw_metadata, "page_number", -1),
                "element_type": "table"  # Add this to distinguish tables
            }
        )
    ])
retriever.docstore.mset(list(zip(table_ids, [e.text for e in table_elements])))


# 4.8 Step 3: Generate a response using the retrieved content.
def generate(state: MessagesState):
    """Generate answer."""
    # Get generated ToolMessages
    try: 
        # Get tool messages
        tool_messages = [msg for msg in state["messages"] if msg.type == "tool"]

        if not tool_messages or not tool_messages[0].artifact:
            return {
                "messages": [AIMessage(content="I couldn't retrieve any relevant information.")],
                "context": []
            }
        # Format context
        docs_content = "\n\n".join(
            f"Document {i+1} (Page {doc.metadata.get('page', 'N/A')}):\n{doc.page_content}"
            for i, doc in enumerate(tool_messages[0].artifact)
        )

        system_message = SystemMessage(
            content=f"""You are an assistant for question-answering tasks. 
            Use the following retrieved context to answer the question. 
            Cite sources like [1][2] and list them at the end.
            If unsure, say 'I don't know'.\n\n{docs_content}"""
        )

        conversation = [
            msg for msg in state["messages"] 
            if msg.type in ("human", "system") or 
            (msg.type == "ai" and not msg.tool_calls)
        ]
        
        response = llm.invoke([system_message] + conversation)
        
        # Add citations
        answer = response.content
        if tool_messages[0].artifact:
            answer += "\n\nSources:"
            for i, doc in enumerate(tool_messages[0].artifact, 1):
                source = doc.metadata.get('source', 'Unknown document')
                page = doc.metadata.get('page', 'N/A')
                answer += f"\n[{i}] {source}, page {page}"

        return {
            "messages": [AIMessage(content=answer)],
            "context": tool_messages[0].artifact
        }
    except Exception as e:
        return {
            "messages": [AIMessage(content=f"Error generating response: {str(e)}")],
            "context": []
        }


# 4.9 compile the application:
from langgraph.graph import StateGraph
from langgraph.graph import END
from langgraph.prebuilt import tools_condition


graph_builder = StateGraph(MessagesState)

graph_builder.add_node(query_or_respond)
graph_builder.add_node(tools)
graph_builder.add_node(generate)

graph_builder.set_entry_point("query_or_respond")
graph_builder.add_conditional_edges(
    "query_or_respond",
    tools_condition,
    {END: END, "tools": "tools"},
)
graph_builder.add_edge("tools", "generate")
graph_builder.add_edge("generate", END)

graph = graph_builder.compile()

from IPython.display import Image, display
display(Image(graph.get_graph().draw_mermaid_png()))

# 4.10 Invoking our application, the retrieved Document objects are accessible from the application state.
# # about Text
input_message = "What is iPhone net sales in the year of 2020?" # the answer should be with ToolMessage

for step in graph.stream(
    {"messages": [{"role": "user", "content": input_message}]},
    stream_mode="values",
):
    step["messages"][-1].pretty_print()

# Question 2: about Table
input_message = "tell me about table, which shows net sales by category for 2022, 2021 and 2020?" # the answer should be with ToolMessage

for step in graph.stream(
    {"messages": [{"role": "user", "content": input_message}]},
    stream_mode="values",
):
    step["messages"][-1].pretty_print()

# 5. make a Multi_RAG_Agent (after combining the conversation memory and retriever-multi_vector: text, tables)
from langgraph.prebuilt import create_react_agent
Multi_RAG_Agent = create_react_agent(llm, [retrieve])

# inspect the graph:
display(Image(Multi_RAG_Agent.get_graph().draw_mermaid_png()))

# give a question that would typically require an iterative sequence of retrieval steps to answer:
config = {"configurable": {"thread_id": "def234"}}

input_message = (
    "What is the Total net sales in the Year 2020?\n\n"
    "Once you get the answer, look up Net sales by category, "
    "which products were included and how much of each share was."
)

for event in Multi_RAG_Agent.stream(
    {"messages": [{"role": "user", "content": input_message}]},
    stream_mode="values",
    config=config,
):
    event["messages"][-1].pretty_print()

INFO:sentence_transformers.SentenceTransformer:Use pytorch device_name: cpu
INFO:sentence_transformers.SentenceTransformer:Load pretrained SentenceTransformer: sentence-transformers/all-mpnet-base-v2


Loaded 387 chunks from 114 pages across 1 PDF files.


INFO:__main__:Loaded 387 documents
INFO:__main__:Sample document: page_content='UNITED STATES
SECURITIES AND EXCHANGE COMMISSION
Washington, D.C. 20549
FORM 10-K
(Mark One)
☒    ANNUAL REPORT PURSUANT TO SECTION 13 OR 15(d) OF THE SECURITIES EXCHANGE ACT OF 1934
For the fiscal year ended September 24, 2022
or
☐    TRANSITION REPORT PURSUANT TO SECTION 13 OR 15(d) OF THE SECURITIES EXCHANGE ACT OF 1934
For the transition period from              to             .
Commission File Number: 001-36743
Apple Inc.
(Exact name of Registrant as specified in its charter)
California 94-2404110
(State or other jurisdiction
of incorporation or organization)
(I.R.S. Employer Identification No.)
One Apple Park Way
Cupertino, California 95014
(Address of principal executive offices) (Zip Code)
(408) 996-1010
(Registrant’s telephone number, including area code)
Securities registered pursuant to Section 12(b) of the Act:
Title of each class
Trading 
symbol(s) Name of each exchange on which registered
Comm

Total documents in ChromaDB: 7
# of docs to add: 387
Adding 387 docs


INFO:pikepdf._core:pikepdf C++ to Python logger bridge initialized


Total documents in ChromaDB: 774


INFO:unstructured_inference:Reading PDF for file: D:/4-IntoCode/16_LangChain/AgilProjekt_multiModel/Raw_Data/Apple1/10-K-2022.pdf ...
INFO:unstructured_inference:Loading the Table agent ...
INFO:unstructured_inference:Loading the table structure model ...
INFO:timm.models._builder:Loading pretrained weights from Hugging Face hub (timm/resnet18.a1_in1k)
INFO:timm.models._hub:[timm/resnet18.a1_in1k] Safe alternative available for 'pytorch_model.bin' (as 'model.safetensors'). Loading weights using safetensors.
INFO:timm.models._builder:Missing keys (fc.weight, fc.bias) discovered while loading pretrained weights. This is expected if model is being adapted.


In [None]:
# "Web_Search_Agent.ipynb" from Hanna
#!pip install gradio
#!pip install openai requests beautifulsoup4
#!pip install langgraph langchain openai gradio beautifulsoup4
#!pip install tavily-python
#!from tavily import TavilyClient
#!pip install langchain tavily-python openai
#!pip install python-dotenv
from tavily import TavilyClient
#!pip install langchain_community
#!pip install --upgrade langchain langchain_community

from langchain.llms import HuggingFaceHub
#!pip install --upgrade huggingface_hub
from huggingface_hub import InferenceClient
from langchain.chat_models import ChatOpenAI
from langchain.agents import initialize_agent, Tool, AgentType

import os
from dotenv import load_dotenv
import openai
import requests
import gradio as gr
from bs4 import BeautifulSoup
from google.colab import files
uploaded = files.upload()

# Конфигурация API NewsAPI
NEWS_API_URL = "https://newsapi.org/v2/everything"

class Config:
    @staticmethod
    def setup():
        # Загружаем переменные из .env файла
        load_dotenv()
        token = os.getenv("HUGGINGFACEHUB_API_TOKEN")
        # Настройка переменных окружения для LangChain, Tavily и OpenAI
        os.environ["LANGCHAIN_API_KEY"] = os.getenv("LANGCHAIN_API_KEY", "")
        os.environ["LANGCHAIN_ENDPOINT"] = os.getenv("LANGSMITH_ENDPOINT", "")
        os.environ["LANGCHAIN_PROJECT"] = os.getenv("LANGSMITH_PROJECT", "")
        os.environ["TAVILY_API_KEY"] = os.getenv("TAVILY_API_KEY", "")
        os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY", "")
        os.environ["HUGGINGFACEHUB_API_TOKEN"] = os.getenv("HUGGINGFACEHUB_API_TOKEN", "")

        if os.getenv("LANGSMITH_TRACING", "false").lower() == "true":
            os.environ["LANGCHAIN_TRACING_V2"] = "true"

# Запускаем настройку  # Инициализация клиента

client = InferenceClient(token=os.getenv("HUGGINGFACEHUB_API_TOKEN"))

# Вызываем модель
response = client.text_generation(
    "Hi! Tell me something interesting..",
    model="mistralai/Mistral-7B-Instruct-v0.1",
    temperature=0.7,
    max_new_tokens=100,
)

print(response)

llm = HuggingFaceHub(repo_id="google/flan-t5-base", model_kwargs={"temperature":0, "max_length":512})

print(llm("Hi! Tell me something interesting.."))

# 🧠 Инструмент Tavily
def tavily_search_tool(query: str) -> str:
    tavily = TavilyClient(api_key=os.environ["TAVILY_API_KEY"])
    results = tavily.search(query=query, search_depth="basic")

    summaries = []
    for res in results['results'][:3]:
        try:
            html = requests.get(res['url']).content
            soup = BeautifulSoup(html, 'html.parser')
            text = ' '.join([p.get_text() for p in soup.find_all('p')])
            summaries.append(text[:500])
        except Exception as e:
            summaries.append(f"[Fehler {res['url']}: {e}]")
    return '\n\n'.join(summaries)


# ✅ Регистрируем инструмент LangChain
tools = [
    Tool(
        name="Tavily Web Search",
        func=tavily_search_tool,
        description=""Searches for up-to-date market information on the Internet at the user's request""
    )
]

# 🤖 Инициализация агента
agent = initialize_agent(
    tools=tools,
    llm=llm,
    agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
    verbose=True
)

# 🎯 Обработчик запроса
def agent_handler(user_input):
    try:
        response = agent.run(user_input)
        return response
    except Exception as e:
        return f"❌ Agent Error: {str(e)}"

# 🎛 Интерфейс Gradio
gr.Interface(
    fn=agent_handler,
    inputs=gr.Textbox(label="Your query (eg: What news is impacting Google today?))"),
    outputs=gr.Textbox(label="Agent's response"),
    title="🧠 Web search agent based onTavily + OpenAI",
    description="This agent searches for fresh news and information using Tavily and analyzes it using GPT."
).launch()

from langgraph.prebuilt import create_react_agent

Web_Search_Agent = create_react_agent(llm, [retrieve])

In [None]:
# "Data_Science_Agent.ipynb" from Hanna 
# Agent analitic plotlib5

#!pip install huggingface_hub
#!pip install langchain huggingface_hub transformers
from langchain.llms import HuggingFacePipeline
from transformers import pipeline
#!pip install huggingface_hub[hf_xet]
#!pip install hf_xet
#!pip install --upgrade transformers
from transformers import GPT2LMHeadModel, GPT2Tokenizer
#!pip install torch
#!pip install plotly


#!pip install --upgrade transformers langchain huggingface_hub torch


#hf_pipeline = pipeline("text-generation", model="distilgpt2", max_new_tokens=100)

#!pip install huggingface_hub[hf_xet]
from langchain.agents import Tool, initialize_agent, AgentType
from langchain.tools import BaseTool
#from langchain.llms import HuggingFace
from transformers import pipeline
import pandas as pd
import glob
import os
from prophet import Prophet
import plotly.graph_objects as go
from typing import List, Tuple
import time
import logging
from dotenv import load_dotenv

from langchain.chat_models import ChatOpenAI
from langchain.agents import Tool, initialize_agent, AgentType
from langchain.tools import BaseTool
from transformers import pipeline

# Загружаем ключи из .env
load_dotenv(".env")

# Setting up the logger
logging.basicConfig(level=logging.INFO)

# class CSVForecastTool(BaseTool):CSV с Prophet
class CSVForecastTool(BaseTool):
    name: str = "CSVForecast"
    description: str = "Predicts time series from CSV files (columns Date и Close)"
    #Add an analysis tool CSV

    def _run(self, company_name: str) -> List[Tuple[str, go.Figure]]:
     #   company_folder = f"parsed/{company_name.lower()}/"
        company_folder = f"parsed/{company_name}/"

        if not os.path.exists(company_folder):
            return [(f"Error: Folder for company{company_name} not found.", None)]

        csv_files = glob.glob(os.path.join(company_folder, "*.csv"))
        if not csv_files:
            return [(f"Error: No CSV files for company{company_name}.", None)]

        results = []
        for file in csv_files:
            try:
                df = pd.read_csv(file)
                if "Date" not in df.columns or "Close" not in df.columns:
                    continue

                # Data transformation for Prophet
                df['Date'] = pd.to_datetime(df['Date'], errors='coerce')
                df = df.dropna(subset=["Date", "Close"])
                df = df.rename(columns={"Date": "ds", "Close": "y"})

                # Forecasting with Prophett
                model = Prophet()
                model.fit(df)
                future = model.make_future_dataframe(periods=90)
                forecast = model.predict(future)

                # Построение графика
                fig = go.Figure()
                fig.add_trace(go.Scatter(x=forecast['ds'], y=forecast['yhat'], mode='lines', name='Forecast'))
                fig.add_trace(go.Scatter(x=df['ds'], y=df['y'], mode='markers', name='Historical data'))
                fig.update_layout(title=f"Forecast fur {company_name}", xaxis_title=Datum", yaxis_title="Closing price")

                # Text generation
                change = forecast['yhat'].iloc[-1] - forecast['yhat'].iloc[-91]
                change_pct = (change / forecast['yhat'].iloc[-91]) * 100
                trend = "Height" if change > 0 else "Fall"
                result_text = f"{company_name}: Прогноз на следующий квартал: {trend} ~{abs(change_pct):.2f}%"
                results.append((result_text, fig))

            except Exception as e:
                results.append((f"error while processing {file}: {str(e)}", None))

        return results

# Инструмент калькулятора
class CalculatorTool(BaseTool):
    name: str = "Calculator"
    description: str = "Выполняет математические вычисления"

    def _run(self, query: str) -> str:
        try:
            return str(eval(query))
        except Exception as e:
            return f"Ошибка вычислений: {str(e)}"

# Создание инструментов
csv_forecast_tool = CSVForecastTool()
calculator_tool = CalculatorTool()

tools = [
    Tool(name="CSVForecast", func=csv_forecast_tool._run, description="Прогноз по CSV для компании"),
    Tool(name="Calculator", func=calculator_tool._run, description="Математический калькулятор"),
   # Tool(name=csv_forecast_tool.name, func=csv_forecast_tool._run, description=csv_forecast_tool.description)
]

# Инициализация LLM с Hugging Face
# Загружаем модель и токенизатор
model = GPT2LMHeadModel.from_pretrained("gpt2")
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")

hf_pipeline = pipeline("text-generation", model="gpt2", max_new_tokens=100)

# Генерация текста
output = hf_pipeline("Hello, how are you?")
print(output[0]['generated_text'])

output = hf_pipeline("Hello, how are you?")
print(output)
#llm_hf = pipeline("text-generation", model="gpt2")                     #HuggingFace(pipeline=hf_pipeline)

# Обёртка в LangChain совместимый llm
llm = HuggingFacePipeline(pipeline=hf_pipeline)
# Инициализация агента
agent = initialize_agent(
    tools=tools,
    llm=llm,                            #llm_hf,
    agent_type=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
    verbose=True
)

# Пример вызова агента
query = "Проанализируй Nvidia и построй график"
response = agent.run(query)

query = "Проанализируй Nvidia за 6 месяцев, покажи график и спрогнозируй на следующий квартал."
response = agent.run(query)

# Проверка и отображение
if isinstance(response, list) and isinstance(response[0], tuple):
    text, fig = response[0]
    print(text)
    if fig:
        fig.show()
else:
    print("Ответ агента:", response)

#if isinstance(response, list):
#    for result in response:
 #       if isinstance(result, tuple):
  #          text, figure = result
  #          print(text)
   #         if figure:
    #            figure.show()  # Отображение графика
    #    else:
    #        print(result)  # Если это просто ошибка или информация
#else:
 #   print("Ответ от агента не является списком. Полученный ответ:", response)

# Пример запроса
query = "Сколько будет 2 + 2?"
response = agent.run(query)
print(response)

# Вывод результата
if isinstance(response, list):
    for result in response:
        if isinstance(result, tuple):
            text, figure = result
            print(text)
            if figure:
                figure.show()  # Отображение графика
        else:
            print(result)  # Если это просто ошибка или информация
else:
    print("Ответ от агента не является списком. Полученный ответ:", response)




from langgraph.prebuilt import create_react_agent

Data_Science_Agent = create_react_agent(llm, [retrieve])

In [None]:
# Supervisor_Agent from Wenwen
from langgraph_supervisor import create_supervisor
from langchain.chat_models import init_chat_model

supervisor = create_supervisor(
    model=init_chat_model("gemini-2.0-flash", model_provider="google_genai"), # use Google Gemini instead of OpenAI
    agents=[Multi_RAG_Agent, Web_Search_Agent, Data_Science_Agent],
    prompt=(
        "You are a supervisor managing two agents:\n"
        "- Multi_RAG_Agent. Assign tasks related to text and table analysis from PDFs to this agent\n"
        "- Web_Search_Agent. Assign web search tasks to this agent\n"
        "- Data_Science_Agent. Assign data science-related tasks to this agent\n"
        "Assign work to one agent at a time, do not call agents in parallel.\n"
        "Do not do any work yourself."
    ),
    add_handoff_back_messages=True,
    output_mode="full_history",
).compile()

In [None]:
# create a Gradio chat interface using a LangChain chat model, from Wenwen
import gradio as gr
from langchain_core.messages import HumanMessage, AIMessage
from langchain_google_genai import ChatGoogleGenerativeAI
import os


# Initialize the chat model with explicit API key
model = supervisor

def respond(
    message: str,
    history: list[list[str]],  # Gradio's history format: [[user_msg, ai_msg], ...]
) -> str:
    """
    Respond to user input using the model.
    """
    # Convert Gradio history to LangChain message format
    chat_history = []
    for human_msg, ai_msg in history:
        chat_history.extend([
            HumanMessage(content=human_msg),
            AIMessage(content=ai_msg)
        ])
    
    # Add the new user message
    chat_history.append(HumanMessage(content=message))
    
    # Get the AI's response
    response = model.invoke({'messages': chat_history}, config={"configurable": {"thread_id": "thread_123"}})
    
    return response["messages"][-1].content

demo = gr.ChatInterface(
    fn=respond,
    # examples=["Hello", "What's AI?", "Tell me a joke"],
    title="Gemini Chat",
)

demo.launch()
