In [1]:
import pandas as pd
import requests
import json

In [2]:
TRAIN_DATA_URL = "https://raw.githubusercontent.com/PolyAI-LDN/task-specific-datasets/master/banking_data/train.csv"
TEST_DATA_URL = "https://raw.githubusercontent.com/PolyAI-LDN/task-specific-datasets/master/banking_data/test.csv"
LABELS_URL = "https://raw.githubusercontent.com/PolyAI-LDN/task-specific-datasets/master/banking_data/categories.json"

In [3]:
response = requests.get(LABELS_URL)

In [5]:
if response.status_code == 200:
    data = json.loads(response.text)
    categories = list(data)
    # do something with my_list
else:
    print(f'Error downloading data. Status code: {response.status_code}')

In [6]:
train_df = pd.read_csv(TRAIN_DATA_URL)
test_df = pd.read_csv(TEST_DATA_URL)

In [7]:
train_df

Unnamed: 0,text,category
0,I am still waiting on my card?,card_arrival
1,What can I do if my card still hasn't arrived ...,card_arrival
2,I have been waiting over a week. Is the card s...,card_arrival
3,Can I track my card while it is in the process...,card_arrival
4,"How do I know if I will get my card, or if it ...",card_arrival
...,...,...
9998,You provide support in what countries?,country_support
9999,What countries are you supporting?,country_support
10000,What countries are getting support?,country_support
10001,Are cards available in the EU?,country_support


In [8]:
train_df['source'] = 'train'
test_df['source'] = 'test'

In [9]:
# concatenate the dataframes
combined_df = pd.concat([train_df, test_df])

In [10]:
combined_df

Unnamed: 0,text,category,source
0,I am still waiting on my card?,card_arrival,train
1,What can I do if my card still hasn't arrived ...,card_arrival,train
2,I have been waiting over a week. Is the card s...,card_arrival,train
3,Can I track my card while it is in the process...,card_arrival,train
4,"How do I know if I will get my card, or if it ...",card_arrival,train
...,...,...,...
3075,"If i'm not in the UK, can I still get a card?",country_support,test
3076,How many countries do you support?,country_support,test
3077,What countries do you do business in?,country_support,test
3078,What are the countries you operate in.,country_support,test


# Upload Data to Snowflake

In [21]:
import snowflake.connector
from dotenv import load_dotenv
import os

In [13]:
load_dotenv()

True

In [14]:
print(os.environ["SF_ACCOUNT"])

ylijtly-jsb79052


In [15]:
conn = snowflake.connector.connect(
    user=os.environ["SF_USER"],
    password=os.environ["SF_PWD"],
    account=os.environ["SF_ACCOUNT"],
    database=os.environ["SF_DB"],
    schema=os.environ["SF_SCHEMA"]
)


In [16]:
# create a cursor object
cursor = conn.cursor()

In [17]:
# create a table in Snowflake to store the data
create_table_query = 'CREATE TABLE intent_dataset (text STRING, category STRING, source STRING);'
cursor.execute(create_table_query)

<snowflake.connector.cursor.SnowflakeCursor at 0x12a45c880>

In [18]:
# insert the data from the Pandas dataframe into the Snowflake table
cursor.executemany('INSERT INTO intent_dataset (text, category, source) VALUES (%s, %s, %s)', combined_df.values.tolist())

<snowflake.connector.cursor.SnowflakeCursor at 0x12a45c880>

In [19]:
# commit the changes
conn.commit()

In [20]:
# close the cursor and connection
cursor.close()
conn.close()