## TPC Dataset Import
Import data for TPC test from GCS into Snowflake database

### Set Configs

In [1]:
import config, sf, datetime

# In "Dry Run" mode we generate and print SQL queries but not run them.
# You can manually run them in "workbench" if you want
DRY_RUN = False
TEST = sf.TEST_H  # we want to run TPC-H
SIZE = '1GB'  # dataset size to use in test

`Note: that we use configuration data from config.py`

### Start Snowflake WAREHOUSE

In [2]:
# initiate SnowflakeHelper with Test type and dataset size specified
sf_helper = sf.SnowflakeHelper(TEST, SIZE, config)

# start Warehouse
sf_helper.warehouse_start()

preparing to open connection to Snowflake
using config: user:dauren, pass: 239nj8834uffe, account: wja13212
connection opened
running query: USE ROLE ACCOUNTADMIN
result: Statement executed successfully.
running query: ALTER WAREHOUSE TEST1 RESUME;
warehouse start: Statement executed successfully.
running query: USE WAREHOUSE TEST1
result: Statement executed successfully.
running query: CREATE DATABASE IF NOT EXISTS h_1GB
result: Database H_1GB successfully created.
running query: USE DATABASE h_1GB
result: Statement executed successfully.


### Setup STAGE: Link to GCS data source and stages files for uploading

In [3]:
if not sf_helper.is_integrated():
    # integrate Snoflake with GCS.
    integration_id = sf_helper.create_integration(is_dry_run=DRY_RUN)
    print(f'integrated with gcs: {integration_id}')



--creating named file format: "@csv_file_format"
running query: create or replace file format csv_file_format
            type = csv
            field_delimiter = '|'
            skip_header = 1
            null_if = ('NULL', 'null')
            empty_field_as_null = true
            encoding = 'iso-8859-1' 
            compression = none;
result File format CSV_FILE_FORMAT successfully created.


--done creating named file format


--integrating "gcs_h_1GB_integration" ... 


--creating storage integration: "gcs_h_1GB_integration"
running query: CREATE STORAGE INTEGRATION gcs_h_1GB_integration TYPE=EXTERNAL_STAGE STORAGE_PROVIDER=GCS ENABLED=TRUE STORAGE_ALLOWED_LOCATIONS=('gcs://tpc-benchmark-5947/');
result Integration GCS_H_1GB_INTEGRATION successfully created.


--finished creating storage integration
running query: GRANT CREATE STAGE on schema public to ROLE ACCOUNTADMIN;
result Statement executed successfully.
running query: GRANT USAGE on INTEGRATION gcs_h_1GB_integration to 

### Test STAGE

In [4]:
db_files = sf_helper.list_integration(integration_id)
for table, files in db_files.items():
    print(f'{table}:')
    for file in files:
        print(f'\t{file}')
    print(f'\tmissing {sf_helper.gcs_file_range - len(files)} files\n\n')



--listing stage: "@gcs_h_1GB_integration_stage"
nation:
	gcs://tpc-benchmark-5947/h_1GB_nation.tbl
	missing 95 files


lineitem:
	gcs://tpc-benchmark-5947/h_1GB_lineitem.tbl.1
	gcs://tpc-benchmark-5947/h_1GB_lineitem.tbl.2
	gcs://tpc-benchmark-5947/h_1GB_lineitem.tbl.3
	gcs://tpc-benchmark-5947/h_1GB_lineitem.tbl.4
	gcs://tpc-benchmark-5947/h_1GB_lineitem.tbl.5
	gcs://tpc-benchmark-5947/h_1GB_lineitem.tbl.6
	gcs://tpc-benchmark-5947/h_1GB_lineitem.tbl.7
	gcs://tpc-benchmark-5947/h_1GB_lineitem.tbl.8
	gcs://tpc-benchmark-5947/h_1GB_lineitem.tbl.9
	missing 87 files


customer:
	gcs://tpc-benchmark-5947/h_1GB_customer.tbl.1
	gcs://tpc-benchmark-5947/h_1GB_customer.tbl.2
	gcs://tpc-benchmark-5947/h_1GB_customer.tbl.3
	gcs://tpc-benchmark-5947/h_1GB_customer.tbl.4
	gcs://tpc-benchmark-5947/h_1GB_customer.tbl.5
	gcs://tpc-benchmark-5947/h_1GB_customer.tbl.6
	gcs://tpc-benchmark-5947/h_1GB_customer.tbl.7
	gcs://tpc-benchmark-5947/h_1GB_customer.tbl.8
	gcs://tpc-benchmark-5947/h_1GB_customer

### Create tables in Snowflake if needed

In [5]:
if not sf_helper.is_integrated():
    sf_helper.create_schema(is_dry_run=DRY_RUN)



--pushing schema: "/home/vagrant/bq_snowflake_benchmark/h/2.18.0_rc2/dbgen/dss.ddl"
running query: -- Sccsid:     @(#)dss.ddl	2.1.8.1
CREATE TABLE NATION  ( N_NATIONKEY  INTEGER NOT NULL,
                            N_NAME       CHAR(25) NOT NULL,
                            N_REGIONKEY  INTEGER NOT NULL,
                            N_COMMENT    VARCHAR(152));

result Table NATION successfully created.
running query: 
CREATE TABLE REGION  ( R_REGIONKEY  INTEGER NOT NULL,
                            R_NAME       CHAR(25) NOT NULL,
                            R_COMMENT    VARCHAR(152));

result Table REGION successfully created.
running query: 
CREATE TABLE PART  ( P_PARTKEY     INTEGER NOT NULL,
                          P_NAME        VARCHAR(55) NOT NULL,
                          P_MFGR        CHAR(25) NOT NULL,
                          P_BRAND       CHAR(10) NOT NULL,
                          P_TYPE        VARCHAR(25) NOT NULL,
                          P_SIZE        INTEGER NOT 

### Import Data from STAGE to target table

In [6]:
for table, files in db_files.items():
    print(f'Starting to import table: {table}')
    for file in sorted(files):
        print(f'\timporting file: {file}')
        sf_helper.import_data(table, file, integration_id)
        print(f'\tfinished @ {datetime.datetime.now().time()}')

Starting to import table: nation
	importing file: gcs://tpc-benchmark-5947/h_1GB_nation.tbl
running query: copy into nation from 'gcs://tpc-benchmark-5947/h_1GB_nation.tbl' storage_integration=gcs_h_1GB_integration file_format=(format_name=csv_file_format);
result gcs://tpc-benchmark-5947/h_1GB_nation.tbl
	finished @ 15:09:16.716946
Starting to import table: lineitem
	importing file: gcs://tpc-benchmark-5947/h_1GB_lineitem.tbl.1
running query: copy into lineitem from 'gcs://tpc-benchmark-5947/h_1GB_lineitem.tbl.1' storage_integration=gcs_h_1GB_integration file_format=(format_name=csv_file_format);
result gcs://tpc-benchmark-5947/h_1GB_lineitem.tbl.1
	finished @ 15:09:45.420766
	importing file: gcs://tpc-benchmark-5947/h_1GB_lineitem.tbl.2
running query: copy into lineitem from 'gcs://tpc-benchmark-5947/h_1GB_lineitem.tbl.2' storage_integration=gcs_h_1GB_integration file_format=(format_name=csv_file_format);
result gcs://tpc-benchmark-5947/h_1GB_lineitem.tbl.2
	finished @ 15:10:06.22002

result Copy executed with 0 files processed.
	finished @ 15:12:58.003596
	importing file: gcs://tpc-benchmark-5947/h_1GB_orders.tbl.9
running query: copy into orders from 'gcs://tpc-benchmark-5947/h_1GB_orders.tbl.9' storage_integration=gcs_h_1GB_integration file_format=(format_name=csv_file_format);
result Copy executed with 0 files processed.
	finished @ 15:13:04.455264
Starting to import table: part
	importing file: gcs://tpc-benchmark-5947/h_1GB_part.tbl.1
running query: copy into part from 'gcs://tpc-benchmark-5947/h_1GB_part.tbl.1' storage_integration=gcs_h_1GB_integration file_format=(format_name=csv_file_format);
result gcs://tpc-benchmark-5947/h_1GB_part.tbl.1
	finished @ 15:13:12.075817
	importing file: gcs://tpc-benchmark-5947/h_1GB_part.tbl.2
running query: copy into part from 'gcs://tpc-benchmark-5947/h_1GB_part.tbl.2' storage_integration=gcs_h_1GB_integration file_format=(format_name=csv_file_format);
result gcs://tpc-benchmark-5947/h_1GB_part.tbl.2
	finished @ 15:13:20.2

result Copy executed with 0 files processed.
	finished @ 15:16:07.957358
	importing file: gcs://tpc-benchmark-5947/h_1GB_supplier.tbl.8
running query: copy into supplier from 'gcs://tpc-benchmark-5947/h_1GB_supplier.tbl.8' storage_integration=gcs_h_1GB_integration file_format=(format_name=csv_file_format);
result Copy executed with 0 files processed.
	finished @ 15:16:14.510768
	importing file: gcs://tpc-benchmark-5947/h_1GB_supplier.tbl.9
running query: copy into supplier from 'gcs://tpc-benchmark-5947/h_1GB_supplier.tbl.9' storage_integration=gcs_h_1GB_integration file_format=(format_name=csv_file_format);
result Copy executed with 0 files processed.
	finished @ 15:16:20.962326


### Suspend WAREHOUSE

In [7]:
sf_helper.warehouse_suspend()

warehouse suspend: Statement executed successfully.
