## TPC Dataset Import
Import data for TPC test from GCS into Snowflake database

### Set Configs

In [1]:
import config, sf, datetime

# In "Dry Run" mode we generate and print SQL queries but not run them.
# You can manually run them in "workbench" if you want
DRY_RUN = False
TEST = 'ds'  # we want to run TPC-H
SIZE = '10000GB'  # dataset size to use in test

`Note: that we use configuration data from config.py`

### Start Snowflake WAREHOUSE

In [2]:
# initiate SnowflakeHelper with Test type and dataset size specified
sf_helper = sf.SnowflakeHelper(TEST, SIZE, config)

# start Warehouse
sf_helper.warehouse_start(verbose=True)

Preparing to open connection to Snowflake...
Connection opened.
['customer_address', 'customer_demographics', 'ship_mode', 'time_dim', 'reason', 'income_band', 'item', 'store', 'call_center', 'customer', 'web_site', 'store_returns', 'household_demographics', 'web_page', 'promotion', 'catalog_page', 'inventory', 'catalog_returns', 'web_returns', 'web_sales', 'catalog_sales', 'store_sales']
running query: USE ROLE ACCOUNTADMIN
running query: ALTER WAREHOUSE TEST1 RESUME;
running query: USE WAREHOUSE TEST1
running query: CREATE DATABASE IF NOT EXISTS test_concurrent_ds_10000GB
running query: USE DATABASE test_concurrent_ds_10000GB


### Setup STAGE: Link to GCS data source and stages files for uploading

In [None]:
# if not sf_helper.is_integrated:
#     # integrate Snoflake with GCS.
integration_id = sf_helper.create_integration(is_dry_run=DRY_RUN)
print(f'integrated with gcs: {integration_id}')

### Test STAGE

In [None]:
db_files = sf_helper.list_integration(integration_id)
for table, files in db_files.items():
    print(f'{table}:')
    for file in files:
        print(f'\t{file}')
    print(f'\tmissing {sf_helper.gcs_file_range - len(files)} files\n\n')

### Create tables in Snowflake if needed

In [None]:
#if not sf_helper.is_integrated():
sf_helper.create_schema(is_dry_run=DRY_RUN)

### Import Data from STAGE to target table

In [None]:
for table, files in db_files.items():
    print(f'Starting to import table: {table}')
    for file in sorted(files):
        print(f'\timporting file: {file}')
        sf_helper.import_data(table, file, integration_id)
        print(f'\tfinished @ {datetime.datetime.now().time()}')

### Suspend WAREHOUSE

In [None]:
sf_helper.warehouse_suspend()