# Vanna.AI Test

https://vanna.ai/docs/snowflake-openai-vanna-vannadb.html

In [None]:
import os
import vanna
from vanna.remote import VannaDefault

In [None]:
SNOW_ACCOUNT = os.environ.get("SNOW_ACCOUNT")
SNOW_USER = os.environ.get("SNOW_USER")
SNOW_PASS = os.environ.get("SNOW_PASS")
SNOW_WAREHOUSE = "DEADPOOL"
SNOW_DATABASE = "DEADPOOL"

api_key = vanna.get_api_key("VANNA_API_KEY")

In [None]:
vanna_model_name = "deadpool-test"
vn = VannaDefault(model=vanna_model_name, api_key=api_key)

In [None]:
snow_account = os.environ.get("VANNA_API_KEY")

vn.connect_to_snowflake(
    account=SNOW_ACCOUNT,
    username=SNOW_USER,
    password=SNOW_PASS,
    database=SNOW_DATABASE,
    role="ENGINEER",
)

In [None]:
# The information schema query may need some tweaking depending on your database. This is a good starting point.
df_information_schema = vn.run_sql("SELECT * FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_NAME = 'PLAYERS' OR TABLE_NAME = 'PICKS'")

# This will break up the information schema into bite-sized chunks that can be referenced by the LLM
plan = vn.get_training_plan_generic(df_information_schema)
plan

# If you like the plan, then uncomment this and run it to train
vn.train(plan=plan)

In [None]:
plan

In [None]:
# The following are methods for adding training data. Make sure you modify the examples to match your database.

# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships
vn.train(ddl="""
    CREATE TABLE IF NOT EXISTS DEADPOOL.PROD.PICKS (
	NAME VARCHAR(256),
	BIRTH_DATE DATE,
	DEATH_DATE DATE,
	PICKED_BY VARCHAR(256),
	WIKI_PAGE VARCHAR(256),
	YEAR NUMBER(38,0),
	TIMESTAMP TIMESTAMP_NTZ(9),
	WIKI_ID VARCHAR(256),
	AGE NUMBER(10,1)
);
""")

vn.train(ddl="""
    CREATE TABLE IF NOT EXISTS DEADPOOL.PROD.PLAYERS (
	FIRST_NAME VARCHAR(256),
	LAST_NAME VARCHAR(256),
	EMAIL VARCHAR(256),
	OPT_IN BOOLEAN,
	SMS VARCHAR(256),
	ID VARCHAR(36),
	PASSWORD VARCHAR(256),
	YEAR_ONE NUMBER(10,1),
	YEAR_TWO NUMBER(10,1)
);
""")

# Sometimes you may want to add documentation about your business terminology or definitions.
vn.train(documentation="Deadpool app for the 2024 game")

# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.
vn.train(sql="""
select concat(first_name || ' ' || last_name), count(name) as total_picks 
from picks p
left join players pl 
on p.picked_by = pl.id
where year = 2024 
group by 1         
""")

In [None]:
# At any time you can inspect what training data the package is able to reference
training_data = vn.get_training_data()
training_data

In [None]:
# for id in training_data['id']:
#     vn.remove_training_data(id)

In [None]:
vn.ask(question="How many players are there in the game?")

In [88]:
from vanna.flask import VannaFlaskApp
app = VannaFlaskApp(vn)
app.run()