# Setup database

In [1]:
import json
import zipfile
import pandas as pd
from snowflake.snowpark.session import Session
import sqlparse

In [2]:
# Create a Snowflake session
session = Session.builder.configs(json.load(open("./creds.json"))).create()

## Execute SQL Script to Initialize Database

In [7]:
with open("../sql/init_database.sql", "r") as f:
    init_sql = f.read()

# Split the SQL script into individual statements
statements = sqlparse.split(init_sql)

# Execute each statement
for statement in statements:
    if statement and statement.strip() != "":
        try:
            session.sql(statement.strip()).collect()
            print("Statement executed successfully")
        except Exception as e:
            print(f"Error executing statement: {str(e)}")

## Load train dataset

In [8]:
# open zipped dataset
with zipfile.ZipFile("../data/TRAIN_DATASET.zip") as z:
  # open the csv file in the dataset
  with z.open("TRAIN_DATASET.csv") as f:
    # read the dataset
    pandas_df = pd.read_csv(f)
    session.write_pandas(pandas_df, "TRAIN_DATASET", auto_create_table=False, overwrite=True)


## Load test dataset

In [9]:
with zipfile.ZipFile("../data/TEST_DATASET.zip") as z:
  # open the csv file in the dataset
  with z.open("TEST_DATASET.csv") as f:
    # read the dataset
    pandas_df = pd.read_csv(f)
    session.write_pandas(pandas_df, "TEST_DATASET", auto_create_table=False, overwrite=True)


In [10]:
session.close()