In [1]:
from google.cloud import bigquery
from sqlalchemy import *
from sqlalchemy.engine import create_engine
from sqlalchemy.schema import *
import os
from langchain.agents import create_sql_agent
from langchain.agents.agent_toolkits import SQLDatabaseToolkit
from langchain.sql_database import SQLDatabase
from langchain.llms.openai import OpenAI
from langchain.chat_models import ChatOpenAI
from langchain.agents import AgentExecutor
import json
from packages.paths import DS_LLM_PATHS

import pyarrow

In [2]:
PATHS = DS_LLM_PATHS()

with open(PATHS.OPEN_AI) as creds:
    openai_keys = json.load(creds)

OPENAI_API_KEY = openai_keys['OPENAI_API_KEY']

with open(PATHS.EXTRACT_KEYS) as creds:
    bq_keys = json.load(creds)

project_id = bq_keys['project_id_gcp']
dataset_bq = bq_keys['dataset_bq']
table_bq = bq_keys['table_bq']

In [3]:
sqlalchemy_url = f'bigquery://{project_id}/{dataset_bq}?credentials_path={PATHS.GCP_KEYS}'

In [4]:
db = SQLDatabase.from_uri(sqlalchemy_url)

#llm = OpenAI(temperature=0, model="gpt-4", openai_api_key= OPENAI_API_KEY)
llm = ChatOpenAI(model_name="gpt-4", openai_api_key=OPENAI_API_KEY, temperature=0)

toolkit = SQLDatabaseToolkit(db=db, llm=llm)

agent_executor = create_sql_agent(
llm=llm,
toolkit=toolkit,
verbose=True,
top_k=1000,
)

In [22]:
agent_executor.run("How many tables are in this dataset?")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mAction: sql_db_list_tables
Action Input: ""[0m
Observation: [38;5;200m[1;3mmmm_budget_opt_endur, mmm_budget_opt_incrediwear, mmm_endur, mmm_incrediwear[0m
Thought:[32;1m[1;3mThere are four tables in this dataset.
Final Answer: There are 4 tables in this dataset.[0m

[1m> Finished chain.[0m


'There are 4 tables in this dataset.'

In [5]:
agent_executor.run("I want to know the net sales for Amazon for only the channel Amazon Ads")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mAction: sql_db_list_tables
Action Input: ""[0m
Observation: [38;5;200m[1;3mmmm_budget_opt_endur, mmm_budget_opt_incrediwear, mmm_endur, mmm_incrediwear[0m
Thought:[32;1m[1;3mThe table names suggest that they are related to marketing budget optimization for two brands: Endur and Incrediwear. There is no table that seems to directly relate to Amazon sales data. However, it's possible that one of these tables contains the information we need. I should check the schema of these tables to see if they contain relevant columns.
Action: sql_db_schema
Action Input: "mmm_budget_opt_endur, mmm_budget_opt_incrediwear, mmm_endur, mmm_incrediwear"[0m
Observation: [33;1m[1;3m
CREATE TABLE `mmm_budget_opt_endur` (
	`Username` STRING, 
	`Channel` STRING, 
	`Current_budget` FLOAT64, 
	`Recommended_budget` FLOAT64
)

/*
3 rows from mmm_budget_opt_endur table:
Username	Channel	Current_budget	Recommended_budget
endur@dataspeaks.io	Facebo

"I'm sorry, but the database does not contain the specific net sales data for Amazon Ads."

In [7]:
agent_executor.run("Which is the recommended budget for incrediwear? Provide the output in percetual form, not as factors like '0.4'. I want the result like 40%")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mAction: sql_db_list_tables
Action Input: ""[0m
Observation: [38;5;200m[1;3mmmm_budget_opt_endur, mmm_budget_opt_incrediwear, mmm_endur, mmm_incrediwear[0m
Thought:[32;1m[1;3mThe table 'mmm_budget_opt_incrediwear' seems to be the most relevant to the question as it likely contains budget optimization data for 'incrediwear'. I should check its schema to understand its structure and the data it contains.
Action: sql_db_schema
Action Input: "mmm_budget_opt_incrediwear"[0m
Observation: [33;1m[1;3m
CREATE TABLE `mmm_budget_opt_incrediwear` (
	`Username` STRING, 
	`Channel` STRING, 
	`Current_budget` FLOAT64, 
	`Recommended_budget` FLOAT64
)

/*
3 rows from mmm_budget_opt_incrediwear table:
Username	Channel	Current_budget	Recommended_budget
incrediwear@dataspeaks.io	Facebook Ads	0.53	0.47
incrediwear@dataspeaks.io	Google Ads	0.12	0.14
incrediwear@dataspeaks.io	Bing Ads	0.01	0.01
*/[0m
Thought:[32;1m[1;3mThe 'Recommended_

'The recommended budget for incrediwear is as follows: 47% for Facebook Ads, 14% for Google Ads, 1% for Bing Ads, and 39% for Amazon Ads.'