## Load environment variable

In [18]:
# Load environments variables
from dotenv import load_dotenv
load_dotenv("../.streamlit/secrets.toml") 

True

In [19]:
import re

# function for retrieve sql from markdown
def get_sql(text):
    sql_match = re.search(r"```sql\n(.*)\n```", text, re.DOTALL)
    return sql_match.group(1) if sql_match else None

# Create chat

In [20]:
from langchain.prompts.prompt import PromptTemplate

template_qa = """ 
You're an AI assistant specializing in data analysis with Snowflake SQL. When providing responses, strive to exhibit friendliness and adopt a conversational tone, similar to how a friend or tutor would communicate.
When asked about your capabilities, provide a general overview of your ability to assist with data analysis tasks using Snowflake SQL, instead of performing specific SQL queries. 
Based on the question provided, if it pertains to data analysis or SQL tasks, generate SQL code that is compatible with the Snowflake environment. Additionally, offer a brief explanation about how you arrived at the SQL code. If the required column isn't explicitly stated in the context, suggest an alternative using available columns, but do not assume the existence of any columns that are not mentioned. Also, do not modify the database in any way (no insert, update, or delete operations). You are only allowed to query the database. Refrain from using the information schema.
If the question or context does not clearly involve SQL or data analysis tasks, respond appropriately without generating SQL queries. 
When the user expresses gratitude or says "Thanks", interpret it as a signal to conclude the conversation. Respond with an appropriate closing statement without generating further SQL queries.
If you don't know the answer, simply state, "I'm sorry, I don't know the answer to your question."
Write your response in markdown format.


CREATE TABLE contact (
	contact_key DECIMAL(38, 0) NOT NULL, 
	contact_id VARCHAR(30), 
	contact_name VARCHAR(100), 
	client_id VARCHAR(30), 
	client_name VARCHAR(100), 
	tier VARCHAR(10), 
	region VARCHAR(50), 
	active_flag VARCHAR(1), 
	CONSTRAINT "SYS_CONSTRAINT_4bb18bf2-e5e8-4cf0-b4eb-ad7e20161aef" PRIMARY KEY (contact_key)
	FOREIGN KEY (contact_key) REFERENCES interaction(contact_key),
	FOREIGN KEY (contact_key) REFERENCES priority_contact(contact_key),
	FOREIGN KEY (contact_key) REFERENCES readership(contact_key)
)

/*
3 rows from contact table:
contact_key	contact_id	contact_name	client_id	client_name	tier	region	active_flag
1	CT101	John Doe	CL001	ABC Corp	Gold	North America	Y
2	CT102	Jane Smith	CL002	XYZ Corp	Silver	Europe	Y
3	CT103	Mike Johnson	CL003	123 Industries	Bronze	Asia	Y
*/


CREATE TABLE employee (
	employee_key DECIMAL(38, 0) NOT NULL, 
	emp_ms_code VARCHAR(10), 
	user_code VARCHAR(20), 
	employee_name VARCHAR(100), 
	region VARCHAR(50), 
	active_flag VARCHAR(1), 
	business_unit VARCHAR(50), 
	producer_flag VARCHAR(1), 
	CONSTRAINT "SYS_CONSTRAINT_73534a84-ffab-479a-9b33-b415af0645d0" PRIMARY KEY (employee_key),
	FOREIGN KEY (employee_key) REFERENCES interaction(employee_key),
	FOREIGN KEY (employee_key) REFERENCES priority_contact(employee_key),
	FOREIGN KEY (employee_key) REFERENCES readership(employee_key)
)

/*
3 rows from employee table:
employee_key	emp_ms_code	user_code	employee_name	region	active_flag	business_unit	producer_flag
1	MS001	UC001	John Doe	North America	Y	Sales	Y
2	MS002	UC002	Jane Smith	Europe	Y	Marketing	N
3	MS003	UC003	Mike Johnson	Asia	Y	Operations	Y
*/


CREATE TABLE interaction (
	interaction_id VARCHAR(30) NOT NULL, 
	interaction_start_date TIMESTAMP_NTZ NOT NULL, 
	interaction_duration DECIMAL(38, 0), 
	interaction_region VARCHAR(30), 
	live_interaction VARCHAR(1), 
	employee_key DECIMAL(38, 0) NOT NULL, 
	contact_key DECIMAL(38, 0) NOT NULL, 
	interaction_type VARCHAR(30), 
	CONSTRAINT "SYS_CONSTRAINT_d1a63a04-1ba3-4026-8fdf-4e78752f0ea2" PRIMARY KEY (contact_key, employee_key, interaction_id, interaction_start_date),
	FOREIGN KEY (contact_key) REFERENCES contact(contact_key),
	FOREIGN KEY (employee_key) REFERENCES employee(employee_key)
)

/*
3 rows from interaction table:
interaction_id	interaction_start_date	interaction_duration	interaction_region	live_interaction	employee_key	contact_key	interaction_type
INT0	2023-07-01 00:00:00	24	ASIA	Y	11	13	Phone-Call(1-M)
INT1	2023-06-08 00:00:00	26	NORTH AMERICA	Y	2	24	Phone-Call(1-M)
INT2	2023-06-14 00:00:00	18	ASIA	Y	12	9	Phone-Call(1-M)
*/


CREATE TABLE priority_contact (
	employee_key DECIMAL(38, 0) NOT NULL, 
	contact_key DECIMAL(38, 0) NOT NULL, 
	is_priority_flag VARCHAR(1), 
	CONSTRAINT "SYS_CONSTRAINT_473fd803-e6ca-449a-b28b-9690a79af894" PRIMARY KEY (contact_key, employee_key),
	FOREIGN KEY (contact_key) REFERENCES contact(contact_key),
	FOREIGN KEY (employee_key) REFERENCES employee(employee_key)
)

/*
3 rows from priority_contact table:
employee_key	contact_key	is_priority_flag
6	38	N
5	32	N
5	18	N
*/


CREATE TABLE readership (
	readership_key DECIMAL(38, 0) NOT NULL, 
	document_key DECIMAL(38, 0) NOT NULL, 
	contact_key DECIMAL(38, 0) NOT NULL, 
	doc_read_datetime TIMESTAMP_NTZ, 
	document_id VARCHAR(255), 
	headline VARCHAR(255), 
	published_date TIMESTAMP_NTZ, 
	application_name VARCHAR(255), 
	report_type VARCHAR(100), 
	employee_key DECIMAL(38, 0), 
	ticker VARCHAR(20), 
	document_region VARCHAR(200), 
	CONSTRAINT "SYS_CONSTRAINT_d22921d3-f6c8-47ce-8e17-fa284b914c90" PRIMARY KEY (readership_key),
	FOREIGN KEY (contact_key) REFERENCES contact(contact_key),
	FOREIGN KEY (employee_key) REFERENCES employee(employee_key)
)

/*
3 rows from readership table:
readership_key	document_key	contact_key	doc_read_datetime	document_id	headline	published_date	application_name	report_type	employee_key	ticker	document_region
0	6	5	2023-06-02 00:00:00	Doc-890	Cloud Computing and Security	2023-07-08 09:30:20	None	RESEARCH	4	TCK-006	Australia
1	2	25	2023-06-28 00:00:00	Doc-456	Machine Learning Basics	2023-06-30 14:45:10	None	RESEARCH	8	TCK-002	Europe
2	2	25	2023-07-12 00:00:00	Doc-456	Machine Learning Basics	2023-06-30 14:45:10	None	RESEARCH	14	TCK-002	Europe
*/

My employee_name is "John Doe" and my emp_ms_code is "MS001"

History: ```{history}```

Input: ```{input}```

Answer:
"""

prompt_qa = PromptTemplate(template=template_qa, input_variables=["history", "input"])

from langchain.chat_models import ChatOpenAI
from langchain.chains import ConversationChain

q_llm = ChatOpenAI(
    model_name="gpt-3.5-turbo",
    temperature=0.1,
    max_tokens=500
)

conv_chain = ConversationChain(
   llm=q_llm,
   prompt=prompt_qa,
#    verbose=True
)

output = conv_chain.predict(input="top 5 contacts name who read most")
print(output)

To find the top 5 contacts who have read the most, we can use the `readership` table to count the number of times each contact appears and then sort them in descending order. Here's the SQL code to achieve that:

```sql
SELECT contact_name, COUNT(*) AS read_count
FROM contact c
JOIN readership r ON c.contact_key = r.contact_key
GROUP BY contact_name
ORDER BY read_count DESC
LIMIT 5;
```

This query joins the `contact` and `readership` tables on the `contact_key` column and groups the results by `contact_name`. It then counts the number of occurrences for each contact and sorts them in descending order. Finally, the `LIMIT 5` clause limits the result to the top 5 contacts.

Please note that this query assumes that the `contact_name` column in the `contact` table corresponds to the name of the contact. If the column name is different, please let me know so I can modify the query accordingly.

Let me know if there's anything else I can help you with!


## Executing sql into snowflake and display dataframe

In [21]:
import os

from sqlalchemy import create_engine
from snowflake.sqlalchemy import URL
import pandas as pd 

# create snowflake connection
uri_snow = URL(
    account=os.getenv("account"),
    user=os.getenv("user"),
    password=os.getenv("password"),
    database=os.getenv("database"),
    schema=os.getenv("schema"),
    warehouse=os.getenv("warehouse"),
    role=os.getenv("role"),
)

engine = create_engine(uri_snow)
df = pd.read_sql(get_sql(output), engine)
df

Unnamed: 0,contact_name,read_count
0,Sophia Smith,306
1,James Lee,161
2,Olivia Kim,161
3,William Smith,160
4,Noah Johnson,159
