In [1]:
# %pip install langgraph
# %pip install langchain
# %pip install langchain-openai
# %pip install langchain-cohere
# %pip install pinecone-client
# %pip install langchain-pinecone
# %pip install python-dotenv
# %pip install pandas 
# %pip install langchain_core
# %pip install langgraph-checkpoint-sqlite

In [2]:
from langgraph.graph import StateGraph, END
from langchain_core.messages import AnyMessage, SystemMessage, HumanMessage, ToolMessage
from langchain_core.tools import tool
from langchain_cohere import ChatCohere
from langchain_openai import ChatOpenAI
from langchain_openai import OpenAIEmbeddings
from langchain_community.tools.tavily_search import TavilySearchResults
from langgraph.checkpoint.sqlite import SqliteSaver
from typing import TypedDict, Annotated
from dotenv import load_dotenv
from pinecone import Pinecone
import operator
import pandas as pd
import numpy as np
import os

import uuid
from contextlib import ExitStack

  from tqdm.autonotebook import tqdm


In [3]:
load_dotenv()

# model = ChatCohere(model="command-r-plus")
model = ChatOpenAI(model="gpt-4o-mini")

search_tool = TavilySearchResults(max_results=4) 

stack = ExitStack()
memory = stack.enter_context(SqliteSaver.from_conn_string(":memory:"))

In [4]:
class AnalysisState(TypedDict):
    messages: Annotated[list[AnyMessage], operator.add]

In [5]:
analysis_prompt = """You an expert in data analysis. \
You are a dataset and you must analyze it to identify patterns and anomalies. \

Example input:
data = { "sales": [120, 150, 80, 200, 90], "customer_feedback": ["great", "poor", "great", "medium", "poor"], "region": ["north", "south", "north", "east", "west"] }

Example output:
The sales are generally increasing, but there was a dip in sales in the third quarter. \
The customer feedback is mixed, with some customers being very happy and others being unhappy. \
The region with the highest sales is the south region.
"""

report_prompt = """You are an expert in writing reports. \
The results of data analysis and must write a report on it. \

Return the report as a markdown file.

If there is insufficient data to draw conclusions, ONLY RETURN "Insufficient".

DO NOT return anything else.
ANY OTHER OUTPUT WILL BE PENALIZED.
"""


In [28]:
class DataAnalysisAgent:
    def __init__(self, model, tools, checkpointer, analysis_prompt="", report_prompt=""):
        self.analysis_prompt = analysis_prompt
        self.report_prompt = report_prompt
        graph = StateGraph(AnalysisState)
        graph.add_node("analysis", self.analysis)
        graph.add_node("report", self.report)
        graph.add_edge("analysis", "report")
        graph.add_conditional_edges("report", self.insufficient, {True: "analysis", False: END})
        graph.set_entry_point("analysis")

        self.graph = graph.compile(checkpointer=checkpointer)
        self.tools = {t.name: t for t in tools}
        self.model = model

    def analysis(self, state: AnalysisState):
        messages = state['messages']
        if self.analysis_prompt:
            messages = [SystemMessage(content=self.analysis_prompt)] + messages
        message = self.model.invoke(messages)
        return {'messages': [message]}

    def report(self, state: AnalysisState):
        messages = state['messages']
        if self.report_prompt:
            messages = [SystemMessage(content=self.report_prompt)] + messages
        message = self.model.invoke(messages)
        return len(message.tool_calls) > 0

    def insufficient(self, state: AnalysisState):
        messages = state['messages']
        return messages[-1].content == "Insufficient"

In [29]:
data = """{ "sales": [120, 150, 80, 200, 90], "customer_feedback": ["great", "poor", "great", "medium", "poor"], "region": ["north", "south", "north", "east", "west"] }"""
user_input = [HumanMessage(content=f"{data}")]

agent = DataAnalysisAgent(model, [search_tool], memory, analysis_prompt, report_prompt)
user_uuid = str(uuid.uuid4())

analysis_agent = DataAnalysisAgent(model, [search_tool], memory, analysis_prompt, report_prompt)
thread = {"configurable": {"thread_id": user_uuid}}

In [30]:
attempts = 0
while attempts < 3:
    try:
        # results = router(curator_agent, artifact_agent, thread, user_input)
        for event in agent.graph.stream({"messages": user_input}, thread):
            for v in event.values():
                print(v['messages'])
        break
    except Exception as e:
        print('Error: ', e)
        thread = {"configurable": {"thread_id": str(uuid.uuid4())}}
        attempts += 1
        if attempts == 3:
            print("Failed to run the graph 3 times")
            break

[AIMessage(content='The sales data shows a fluctuating trend. Starting at 120, there is an increase to 150, followed by a significant drop to 80 in the third quarter. Sales then rebound dramatically to 200 in the fourth quarter, before dropping again to 90. This indicates some volatility in sales performance, particularly in the third quarter.\n\nCustomer feedback varies across the dataset, with two instances of "great" feedback, one "medium," and two "poor" responses. This suggests a mixed sentiment among customers, implying that while some customers are satisfied, there are notable concerns that could impact overall customer satisfaction.\n\nIn terms of regional performance, the south region stands out as having the highest sales figure at 150. The north region also shows a strong presence with sales figures of 120 and 80, while the east and west regions have more variable results. Overall, while there are positive aspects in sales and customer feedback, the patterns indicate areas t

KeyboardInterrupt: 