In [None]:
# Learning Box - Google Cloud - Colab
# Copyright 2023 Boris-Wilfried
#
# This Google Colab notebook is part of my "Learning Box" github repository, created for my learning purposes. 
# It is not designed for production use. 
# This notebook may serve as an inspiration or a starting point for your own explorations.
# For any queries, suggestions, or contributions, feel free to reach out to me on GitHub: https://github.com/bwnyasse

<a href="https://colab.research.google.com/github/bwnyasse/learning-box/blob/main/google-cloud/colab/langchain_bigquery/langchain_bigquery_chat_agent.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Using LangChain and GPT to chat with BigQuery data**

---

In this Colab, I want to demonstrate how to use LangChain and OpenAI GPT for querying data from Google BigQuery.

## 📒 Before I begin.








#### Setup Google Cloud Project.

1.   Create a Cloud Platform project if you do not already have one.
2.   Enable billing for the project.
3.   Enable BigQuery APIs for the project.

### Authentication and service account.

1. Create a service account if you do not already have one, with the following Bigquery Roles:

  *  BigQuery User
  *  BigQuery Data Viewer
  *  BigQuery Job User


2. Download the service account key json and put it `/content/sa-keys/sa-langchain-test-over-bigquery.json`

## 🐍 Getting Started

### Installation of required libraries.

In [None]:
!pip install langchain openai chromadb tiktoken tabulate sqlalchemy sqlalchemy-bigquery google-cloud-bigquery &> /dev/null

### Importing libraries.

In [4]:
from google.colab import userdata
from google.cloud import bigquery
from sqlalchemy import *
from sqlalchemy.engine import create_engine
from sqlalchemy.schema import *
import os
from langchain.agents import create_sql_agent
from langchain.agents.agent_toolkits import SQLDatabaseToolkit
from langchain.sql_database import SQLDatabase
from langchain.llms.openai import OpenAI
from langchain.agents import AgentExecutor

## 🔐 Authenticate

### BigQuery Service Account : Option 1

In [5]:
# Option 1: For Personal Use (with Google Drive)
# Uncomment the following lines if you have stored your service account key in Google Drive.
# This is how I, the author, personally use it for demonstration purposes.
# from google.colab import drive
# drive.mount('/content/gdrive')
# service_account_file = "/content/gdrive/MyDrive/Colab Notebooks/sa-keys/sa-langchain-test-over-bigquery.json"

### BigQuery Service Account : Option 2

In [None]:
# Option 2: General User Setup
# Uncomment and use the following line if you have placed your service account key in the /content directory of this Colab notebook.
# This is the recommended way for general users who are following the instructions from the documentation.
# service_account_file = "/content/sa-keys/sa-langchain-test-over-bigquery.json"

### OpenAI API Key

In order to interact with OpenAI's GPT models, you need to have an OpenAI API key. Here's how you can set it up:

1. **Generate an OpenAI API Key**: If you don't have one, you need to create an API key from your OpenAI account. Visit the OpenAI website, log in to your account, and navigate to the API section to generate a new key.

2. **Storing the OpenAI API Key**: For the purpose of this Colab notebook, store your OpenAI API key in a secure location. If you are using Google Colab's secret section for sensitive data, you can add your key there. Alternatively, you can store it in an environment variable or a secure file within the Colab file system.

In [8]:
from google.colab import auth
auth.authenticate_user()

# Use the following line to access your secret key (replace 'your_secret_key_name' with the actual name of your key)
# openai_key = 'your_secret_key'  # Replace with the name of your key in the secret section
openai_key = userdata.get('openAiApiKey')
os.environ["OPENAI_API_KEY"] = openai_key

## 💻 SQL Achemy Configuration

In [6]:
# Configuration
project = "learning-box-369917"
dataset = "langchain_test"
table = "churn_table"
sqlalchemy_url = f'bigquery://{project}/{dataset}?credentials_path={service_account_file}'

## 🔗 Initialize LangChain

Set up the LangChain with the specified configurations to prepare for executing queries.


In [9]:
db = SQLDatabase.from_uri(sqlalchemy_url)
llm = OpenAI(temperature=0, model="text-davinci-003")
toolkit = SQLDatabaseToolkit(db=db, llm=llm)
agent_executor = create_sql_agent(
    llm=llm,
    toolkit=toolkit,
    verbose=True,
    top_k=1000,
)

## 💬 Chat : Execute and Display Queries

Execute sample queries using the LangChain agent and display the responses.


In [11]:
#@markdown Please fill in the value below with your request.

# Please fill in these values.
request = "How many users churned which had internet service?" #@param {type:"string"}

# Quick input validations.
assert request, "⚠️ Please provide a request"

agent_executor.run({request})



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: I should check the query before executing it.
Action: sql_db_list_tables
Action Input: ""[0m
Observation: [38;5;200m[1;3mchurn_table[0m
Thought:[32;1m[1;3m I should query the schema of the churn_table
Action: sql_db_schema
Action Input: churn_table[0m
Observation: [33;1m[1;3m
CREATE TABLE `churn_table` (
	`customerID` STRING, 
	`gender` STRING, 
	`SeniorCitizen` INT64, 
	`Partner` BOOL, 
	`Dependents` BOOL, 
	`tenure` INT64, 
	`PhoneService` BOOL, 
	`MultipleLines` STRING, 
	`InternetService` STRING, 
	`OnlineSecurity` STRING, 
	`OnlineBackup` STRING, 
	`DeviceProtection` STRING, 
	`TechSupport` STRING, 
	`StreamingTV` STRING, 
	`StreamingMovies` STRING, 
	`Contract` STRING, 
	`PaperlessBilling` BOOL, 
	`PaymentMethod` STRING, 
	`MonthlyCharges` FLOAT64, 
	`TotalCharges` STRING, 
	`Churn` BOOL
)

/*
3 rows from churn_table table:
customerID	gender	SeniorCitizen	Partner	Dependents	tenure	PhoneService	Multiple

'1756 users churned which had internet service.'