In [2]:
from google.cloud import bigquery
from sqlalchemy import *
from sqlalchemy.engine import create_engine
from sqlalchemy.schema import *
import os
from langchain.agents import create_sql_agent
from langchain.agents.agent_toolkits import SQLDatabaseToolkit
from langchain.sql_database import SQLDatabase
from langchain.llms.openai import OpenAI
from langchain.chat_models import ChatOpenAI
from langchain.agents import AgentExecutor
import json
from packages.paths import DS_LLM_PATHS

import pyarrow
import streamlit as st

In [3]:
PATHS = DS_LLM_PATHS()

with open(PATHS.OPEN_AI) as creds:
    openai_keys = json.load(creds)

OPENAI_API_KEY = openai_keys['OPENAI_API_KEY']

with open(PATHS.EXTRACT_KEYS) as creds:
    bq_keys = json.load(creds)

project_id = bq_keys['project_id_gcp']
dataset_bq = bq_keys['dataset_bq']
table_bq = bq_keys['table_bq']

In [4]:
sqlalchemy_url = f'bigquery://{project_id}/{dataset_bq}?credentials_path={PATHS.GCP_KEYS}'

In [5]:
db = SQLDatabase.from_uri(sqlalchemy_url)

#llm = OpenAI(temperature=0, model="gpt-4", openai_api_key= OPENAI_API_KEY)
llm = ChatOpenAI(model_name="gpt-4", openai_api_key=OPENAI_API_KEY, temperature=0)

toolkit = SQLDatabaseToolkit(db=db, llm=llm)

agent_executor = create_sql_agent(
llm=llm,
toolkit=toolkit,
verbose=True,
top_k=1000,
)

In [6]:
agent_executor.run("How many tables are in this dataset?")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mAction: sql_db_list_tables
Action Input: ""[0m
Observation: [38;5;200m[1;3mmmm_budget_opt_endur, mmm_budget_opt_incrediwear, mmm_endur, mmm_incrediwear[0m
Thought:[32;1m[1;3mThere are four tables in this dataset.
Final Answer: There are 4 tables in this dataset.[0m

[1m> Finished chain.[0m


'There are 4 tables in this dataset.'

In [7]:
agent_executor.run("I want to know the net sales for Amazon for only the channel Amazon Ads")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mAction: sql_db_list_tables
Action Input: ""[0m
Observation: [38;5;200m[1;3mmmm_budget_opt_endur, mmm_budget_opt_incrediwear, mmm_endur, mmm_incrediwear[0m
Thought:[32;1m[1;3mThe table names suggest that they are related to marketing mix modeling (mmm) for different brands or campaigns. The 'mmm_endur' and 'mmm_incrediwear' tables might contain data for different brands or campaigns. The 'mmm_budget_opt_endur' and 'mmm_budget_opt_incrediwear' tables might contain budget optimization data for the respective brands or campaigns. Since the question is about Amazon's net sales, I should check the schema of the 'mmm_endur' and 'mmm_incrediwear' tables to see if they contain relevant data.
Action: sql_db_schema
Action Input: "mmm_endur, mmm_incrediwear"[0m
Observation: [33;1m[1;3m
CREATE TABLE `mmm_endur` (
	`Username` STRING, 
	`Date` DATE, 
	`Channel` STRING, 
	`Campaign` STRING, 
	`Sales_web` FLOAT64, 
	`Amazon` FLOAT64,

"The total net sales for Amazon for the channel 'Amazon Ads' is approximately 4,832,412.78."

In [8]:
agent_executor.run("Which is the recommended budget for incrediwear? Provide the values as percentages form")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mAction: sql_db_list_tables
Action Input: ""[0m
Observation: [38;5;200m[1;3mmmm_budget_opt_endur, mmm_budget_opt_incrediwear, mmm_endur, mmm_incrediwear[0m
Thought:[32;1m[1;3mThe table 'mmm_budget_opt_incrediwear' seems to be the most relevant to the question as it likely contains budget recommendations for 'incrediwear'. I should check its schema to understand its structure and the data it contains.
Action: sql_db_schema
Action Input: "mmm_budget_opt_incrediwear"[0m
Observation: [33;1m[1;3m
CREATE TABLE `mmm_budget_opt_incrediwear` (
	`Username` STRING, 
	`Channel` STRING, 
	`Current_budget` FLOAT64, 
	`Recommended_budget` FLOAT64
)

/*
3 rows from mmm_budget_opt_incrediwear table:
Username	Channel	Current_budget	Recommended_budget
incrediwear@dataspeaks.io	Facebook Ads	0.53	0.47
incrediwear@dataspeaks.io	Google Ads	0.12	0.14
incrediwear@dataspeaks.io	Bing Ads	0.01	0.01
*/[0m
Thought:[32;1m[1;3mThe 'Recommended_bu

"The recommended budget for 'incrediwear' is as follows: Facebook Ads: 47%, Amazon Ads: 39%, Google Ads: 14%, Bing Ads: 1%."