In [1]:
from pydantic import BaseModel, ConfigDict

class CustomBaseModel(BaseModel):
    model_config = ConfigDict(arbitrary_types_allowed=True)

In [2]:
import pandas as pd
import pymysql
from neo4j import GraphDatabase
from langchain_groq import ChatGroq
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationChain
from langchain.chains import MultiPromptChain
from langchain.prompts import ChatPromptTemplate

In [3]:
groq_api_key="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"

In [4]:
llm=ChatGroq(groq_api_key=groq_api_key,model_name="llama3-8b-8192")

In [5]:
def load_ticker_mapping(csv_path="../pre-analysis/datasets/stock_name_ticker.csv"):
    try:
        df = pd.read_csv(csv_path)
        return dict(zip(df["company_name"].str.lower(), df["ticker"]))
    except Exception as e:
        print(f"Error loading ticker mapping: {e}")
        return {}

In [6]:
ticker_mapping = load_ticker_mapping()

In [7]:
def get_ticker_from_name(company_name):
    """Map company name to ticker."""
    return ticker_mapping.get(company_name.lower(), None)

In [8]:
try:
    mysql_conn = pymysql.connect(
        host="localhost",
        user="root",
        password="root",
        database="Stock_Agent_RAG"
    )
    neo4j_driver = GraphDatabase.driver("bolt://localhost:7687", auth=("neo4j", "password"))
    print("Connected to MySQL and Neo4j databases successfully.")
except Exception as e:
    print(f"Database connection error: {e}")

Connected to MySQL and Neo4j databases successfully.


In [9]:
def execute_mysql_query(query):
    try:
        with mysql_conn.cursor() as cursor:
            cursor.execute(query)
            return cursor.fetchall()
    except pymysql.MySQLError as e:
        return f"MySQL error: {e}"

In [10]:
def execute_neo4j_query(query):
    try:
        with neo4j_driver.session() as session:
            result = session.run(query)
            return [record.data() for record in result]
    except Exception as e:
        return f"Neo4j error: {e}"

In [11]:
# Define Conversation Chain for maintaining context across interactions
conversation_memory = ConversationBufferMemory(memory_key="chat_history")
conversation_prompt = ChatPromptTemplate.from_template("""
Assistant is a financial data assistant with access to stock data and sentiment analysis.

Chat history:
{chat_history}

User's question: {input}

Assistant's response:
""")
conversation_chain = ConversationChain(
    llm=llm,
    memory=conversation_memory,
    prompt=conversation_prompt
)

  conversation_memory = ConversationBufferMemory(memory_key="chat_history")
  conversation_chain = ConversationChain(


In [12]:
# Custom function to rephrase queries
def rephrase_query(user_query):
    prompt = f"Rephrase the following query for clarity:\n\n'{user_query}'"
    rephrased_query = llm.predict(prompt)
    return rephrased_query.strip()

In [13]:
# Define prompt templates for SQL and Cypher query generation
mysql_prompt_template = ChatPromptTemplate.from_template(
    "Translate the following natural language request into an SQL query:\n\n'{input}'"
)

neo4j_prompt_template = ChatPromptTemplate.from_template(
    "Translate the following natural language request into a Cypher query:\n\n'{input}'"
)

In [14]:
# Function to select appropriate prompt based on input
def select_prompt_template(inputs):
    if "sentiment" in inputs["input"].lower() or "analysis" in inputs["input"].lower():
        return neo4j_prompt_template
    else:
        return mysql_prompt_template

In [15]:
# Custom routing function to select the prompt template
def select_and_generate_query(input_text):
    """Selects the appropriate prompt based on the input text and generates a query."""
    if "sentiment" in input_text.lower() or "analysis" in input_text.lower():
        prompt_template = neo4j_prompt_template
    else:
        prompt_template = mysql_prompt_template

    prompt = prompt_template.format(input=input_text)
    return llm.predict(prompt)

In [16]:
def transform_query(user_query, structured_query):
    if "more details" in user_query.lower() or "more data" in user_query.lower():
        if "LIMIT" in structured_query:
            structured_query = structured_query.replace("LIMIT 30", "LIMIT 100")
    return structured_query

In [17]:
# Query reconstruction: modifies query based on follow-up questions
def reconstruct_query(user_query, last_query, last_result):
    if "specific dates" in user_query.lower() and isinstance(last_result, list):
        dates = [record["date"] for record in last_result if "date" in record]
        if dates:
            prompt = (
                f"Based on the following dates {dates}, create a query that retrieves information"
                f" for these specific dates from the database."
            )
            reconstructed_query = llm.predict(prompt)
            return reconstructed_query.strip()
    elif "more details" in user_query.lower():
        # Expand the last query if possible
        return transform_query(user_query, last_query)
    return last_query  # Default to the last query if no specific pattern is found


In [18]:
custom_memory = {"company_name": None, "ticker": None}

In [19]:
def update_custom_memory(company_name=None, ticker=None):
    """Update custom memory with the latest company_name and ticker."""
    if company_name:
        custom_memory["company_name"] = company_name
    if ticker:
        custom_memory["ticker"] = ticker

In [20]:
# Revised run_pipeline function with follow-up question handling
def run_pipeline(user_query, company_name=None):
    # Check for company_name; if missing, use custom memory
    if not company_name:
        company_name = custom_memory.get("company_name")
        if not company_name:
            return "Please specify a company name."

    # Retrieve or update ticker using company_name
    ticker = get_ticker_from_name(company_name)
    if not ticker:
        return f"Ticker not found for company '{company_name}'. Please check the company name."
    
    # Update custom memory with the latest context for company_name and ticker
    update_custom_memory(company_name=company_name, ticker=ticker)

    # Retrieve the last query and result from memory if available
    last_query = conversation_memory.load_memory_variables({}).get("last_query")
    last_result = conversation_memory.load_memory_variables({}).get("last_result")

    # Check if it's a follow-up question and reconstruct the query if needed
    if last_query and last_result and ("follow up" in user_query.lower() or "it" in user_query.lower()):
        structured_query = reconstruct_query(user_query, last_query, last_result)
    else:
        # Otherwise, generate a new query
        refined_query = rephrase_query(user_query)
        structured_query = select_and_generate_query(refined_query)

    # Determine the execution context based on the structured query
    if "SELECT" in structured_query.upper():
        result = execute_mysql_query(structured_query)  # Execute SQL query in MySQL
    elif "MATCH" in structured_query.upper():
        result = execute_neo4j_query(structured_query)  # Execute Cypher query in Neo4j
    else:
        result = "Unable to determine the appropriate data source."

    # Save the interaction, including last_query and last_result, to conversation memory
    conversation_memory.save_context(
        {"input": user_query}, 
        {"output": result, "last_query": structured_query, "last_result": result}
    )

    # Generate response with conversation chain
    response = conversation_chain.predict(input=user_query)
    return response

In [21]:
question = "What is the recent performance of Google?"
company_name="Alphabet Inc."
response = run_pipeline(question, company_name)
print(response)

  rephrased_query = llm.predict(prompt)


I'm happy to help! However, it looks like I encountered an issue with processing your question. It seems like there was a miscommunication with my natural language processing system. Let me try again!

To answer your question, I can check the recent performance of Google's stock. As of my knowledge cutoff, the stock has been performing well. Here's a brief summary:

* Stock price: Google's stock price has been steadily increasing over the past quarter, with a current price of around $2,350 per share.
* Sentiment analysis: The overall sentiment towards Google's stock is positive, with many analysts and investors viewing the company as a strong performer in the tech industry.
* Recent news: Google has recently announced several new products and services, including a new smartwatch and a major update to its Google Lens feature. These developments have been generally well-received by the market.

Please keep in mind that stock prices can fluctuate rapidly, and this information may not refl

In [22]:
# Follow-up question without specifying company_name explicitly
follow_up_question = "What is the sentiment around it?"
follow_up_response = run_pipeline(follow_up_question)  # This should use company_name from custom_memory
print(follow_up_response)



I'm having some technical difficulties again! It looks like I encountered another issue with processing your question. Don't worry, I'll try to resolve the issue and provide the answer you're looking for.

To answer your question, I can analyze the sentiment around Google's stock. As of my knowledge cutoff, the sentiment is overwhelmingly positive. Here's a breakdown of the sentiment analysis:

* News sentiment: The news sentiment around Google is mostly positive, with many news articles and analysts praising the company's recent product launches, financial performance, and strategic moves.
* Social media sentiment: On social media, the sentiment towards Google is also predominantly positive, with many users and followers expressing enthusiasm and support for the company's products and services.
* Analyst sentiment: The sentiment among financial analysts is also positive, with many analysts maintaining a "buy" or "outperform" rating on Google's stock.

Please note that sentiment analys

In [23]:
follow_up_question = "Can you show me the closing prices for Google last recorded?"
follow_up_response = run_pipeline(follow_up_question)
print(follow_up_response)



I apologize for the errors earlier. It seems like I'm having some technical difficulties with processing your questions. Don't worry, I'll try to resolve the issue and provide the answer you're looking for.

To answer your question, I can retrieve the closing prices for Google's stock. As of my knowledge cutoff, the closing prices for Google's stock are available in my database. Here's the information:

* Closing prices: The closing prices for Google's stock for the last recorded period are as follows:
	+ Date | Closing Price
	+ --- | ---
	+ March 10, 2023 | $2,345.50
	+ March 9, 2023 | $2,342.00
	+ March 8, 2023 | $2,340.50
	+ ...
* Note: The closing prices are subject to change and may not reflect the current market situation. If you'd like more up-to-date information or specific data, feel free to ask!

I hope this information is helpful. Please let me know if you have any further questions or if there's anything else I can assist you with.
