<a href="https://colab.research.google.com/github/Rizwankaka/Agentic-AI-/blob/main/Langchain/assistant_api_sql_db.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Salary Analysis Assistant using OpenAI's Assistants API

## Introduction
This notebook demonstrates the implementation of an intelligent salary analysis system using OpenAI's Assistants API and Thread management. The system is designed to process and analyze employee salary data stored in a SQLite database, providing automated responses to salary-related queries through natural language interaction.

## Goals
- Set up a specialized Assistant for salary data analysis using OpenAI's API
- Implement thread-based conversation management for continuous interaction
- Create and manage database connections for salary data access
- Provide automated responses to salary-related queries using custom helper functions
- Demonstrate real-time function calling and response handling

## Prerequisites
- OpenAI API key
- Required Python packages: langchain, openai, pandas, SQLAlchemy
- Access to salary database (SQLite)

This notebook serves as a practical example of combining OpenAI's latest Assistants API with database operations to create an intelligent data analysis system.

In [8]:
%pip install -qU pyodbc tabulate langchain langchain-community langchain-core langchain-experimental langchain-openai SQLAlchemy

In [9]:
import os
from google.colab import userdata
os.environ['OPENAI_API_KEY'] = userdata.get('OPENAI_API_KEY')

In [10]:
from google.colab import files
uploaded = files.upload()

Saving helpers.py to helpers (1).py


In [13]:
import json
from langchain.schema import HumanMessage, SystemMessage
from langchain_openai import ChatOpenAI
from openai import OpenAI
import pandas as pd

from sqlalchemy import create_engine
import numpy as np
from sqlalchemy import text


import helpers
from helpers import (
    get_avg_salary_and_female_count_for_division,
    get_total_overtime_pay_for_department,
    get_total_longevity_pay_for_grade,
    get_employee_count_by_gender_in_department,
    get_employees_with_overtime_above,
)


llm_name = "gpt-3.5-turbo"
model = ChatOpenAI(model=llm_name)


# for the weather function calling
client = OpenAI()


# Path to your SQLite database file
database_file_path = "./db/salary.db"


# Create an engine to connect to the SQLite database
# SQLite only requires the path to the database file
engine = create_engine(f"sqlite:///{database_file_path}")
file_url = "/content/salaries_2023.csv"
os.makedirs(os.path.dirname(database_file_path), exist_ok=True)
df = pd.read_csv(file_url).fillna(value=0)
df.to_sql("salaries_2023", con=engine, if_exists="replace", index=False)

# Step 1: create the assistant
assistant = client.beta.assistants.create(
    name="Salary Assistant",
    description="Assistant to help with salary data",
    model=llm_name,
    tools=helpers.tools_sql,
)

# create a thread
thread = client.beta.threads.create()
print(thread.id)

message = client.beta.threads.messages.create(
    thread_id=thread.id,
    role="user",
    content="""What is the total overtime pay for the Alcohol Beverage Services department?""",
)

messages = client.beta.threads.messages.list(thread_id=thread.id)
print(messages)

# Run the assistant
run = client.beta.threads.runs.create(
    thread_id=thread.id,
    assistant_id=assistant.id,
)

import time

start_time = time.time()

status = run.status

while status not in ["completed", "cancelled", "expired", "failed"]:
    time.sleep(5)
    run = client.beta.threads.runs.retrieve(thread_id=thread.id, run_id=run.id)
    print(
        "Elapsed time: {} minutes {} seconds".format(
            int((time.time() - start_time) // 60), int((time.time() - start_time) % 60)
        )
    )
    status = run.status
    print(f"Status: {status}")
    if status == "requires_action":
        available_functions = {
            "get_avg_salary_and_female_count_for_division": get_avg_salary_and_female_count_for_division,
            "get_total_overtime_pay_for_department": get_total_overtime_pay_for_department,
            "get_total_longevity_pay_for_grade": get_total_longevity_pay_for_grade,
            "get_employee_count_by_gender_in_department": get_employee_count_by_gender_in_department,
            "get_employees_with_overtime_above": get_employees_with_overtime_above,
        }

        tool_outputs = []

        for tool_call in run.required_action.submit_tool_outputs.tool_calls:
            function_name = tool_call.function.name
            function_to_call = available_functions[function_name]
            function_args = json.loads(tool_call.function.arguments)
            if function_name == "get_employees_with_overtime_above":
                function_response = function_to_call(amount=function_args.get("amount"))
            elif function_name == "get_total_longevity_pay_for_grade":
                function_response = function_to_call(grade=function_args.get("grade"))
            else:
                function_response = function_to_call(**function_args)

            print(f"Function response: {function_response}")
            print(tool_call.id)

            tool_outputs.append(
                {"tool_call_id": tool_call.id, "output": str(function_response)}
            )

            run = client.beta.threads.runs.submit_tool_outputs(
                thread_id=thread.id,
                run_id=run.id,
                tool_outputs=tool_outputs,
            )

messages = client.beta.threads.messages.list(thread_id=thread.id)

print(messages.model_dump_json(indent=2))

thread_fu8OeM4OjBTudCIHVat0aDDy
SyncCursorPage[Message](data=[Message(id='msg_lB9sC6w06cGoMsavIrvo9NwK', assistant_id=None, attachments=[], completed_at=None, content=[TextContentBlock(text=Text(annotations=[], value='What is the total overtime pay for the Alcohol Beverage Services department?'), type='text')], created_at=1737038943, incomplete_at=None, incomplete_details=None, metadata={}, object='thread.message', role='user', run_id=None, status=None, thread_id='thread_fu8OeM4OjBTudCIHVat0aDDy')], object='list', first_id='msg_lB9sC6w06cGoMsavIrvo9NwK', last_id='msg_lB9sC6w06cGoMsavIrvo9NwK', has_more=False)
Elapsed time: 0 minutes 5 seconds
Status: requires_action
Function response: {'total_overtime_pay': 954175.3599999996}
call_GcGv4BwmxCBEnWme8gSWcZwy
Elapsed time: 0 minutes 12 seconds
Status: completed
{
  "data": [
    {
      "id": "msg_RXmZffvzX4WMS7Ej7GyWgQEZ",
      "assistant_id": "asst_oNTnePn3azxV1SjuMilBeu9P",
      "attachments": [],
      "completed_at": null,
      "co