## TPC Dataset Import
Import data for TPC test from GCS into Snowflake database

### Set Configs

In [1]:
import config, sf, datetime

# In "Dry Run" mode we generate and print SQL queries but not run them.
# You can manually run them in "workbench" if you want
DRY_RUN = False
TEST = sf.TEST_DS  # we want to run TPC-DS
SIZE = '1000GB'  # dataset size to use in test

`Note: that we use configuration data from config.py`

### Start Snowflake WAREHOUSE

In [2]:
# initiate SnowflakeHelper with Test type and dataset size specified
sf_helper = sf.SnowflakeHelper(TEST, SIZE, config)

# start Warehouse
sf_helper.warehouse_start()

preparing to open connection to Snowflake
using config: user:dauren, pass: 239nj8834uffe, account: wja13212
connection opened
running query: USE ROLE ACCOUNTADMIN
result: Statement executed successfully.
running query: ALTER WAREHOUSE TEST1 RESUME;
warehouse start: Statement executed successfully.
running query: USE WAREHOUSE TEST1
result: Statement executed successfully.
running query: CREATE DATABASE IF NOT EXISTS ds_1000GB
result: Database DS_1000GB successfully created.
running query: USE DATABASE ds_1000GB
result: Statement executed successfully.


### Setup STAGE: Link to GCS data source and stages files for uploading

In [3]:
if not sf_helper.is_integrated():
    # integrate Snoflake with GCS.
    integration_id = sf_helper.create_integration(is_dry_run=DRY_RUN)
    print(f'integrated with gcs: {integration_id}')



--creating named file format: "@csv_file_format"
running query: create or replace file format csv_file_format
            type = csv
            field_delimiter = '|'
            skip_header = 1
            null_if = ('NULL', 'null')
            empty_field_as_null = true
            encoding = 'iso-8859-1' 
            compression = none;
result File format CSV_FILE_FORMAT successfully created.


--done creating named file format


--integrating "gcs_ds_1000GB_integration" ... 


--creating storage integration: "gcs_ds_1000GB_integration"
running query: CREATE STORAGE INTEGRATION gcs_ds_1000GB_integration TYPE=EXTERNAL_STAGE STORAGE_PROVIDER=GCS ENABLED=TRUE STORAGE_ALLOWED_LOCATIONS=('gcs://tpc-benchmark-5947/');
result Integration GCS_DS_1000GB_INTEGRATION successfully created.


--finished creating storage integration
running query: GRANT CREATE STAGE on schema public to ROLE ACCOUNTADMIN;
result Statement executed successfully.
running query: GRANT USAGE on INTEGRATION gcs_ds_10

### Test STAGE

In [4]:
db_files = sf_helper.list_integration(integration_id)
for table, files in db_files.items():
    print(f'{table}:')
    for file in files:
        print(f'\t{file}')
    print(f'\tmissing {sf_helper.gcs_file_range - len(files)} files\n\n')



--listing stage: "@gcs_ds_1000GB_integration_stage"
unknown table!!!! gcs://tpc-benchmark-5947/ds_1000GB_date_dim_1_96.dat
unknown table!!!! gcs://tpc-benchmark-5947/ds_1000GB_dbgen_version_1_96.dat
unknown table!!!! gcs://tpc-benchmark-5947/ds_1000GB_warehouse_1_96.dat


--done listing stage
call_center:
	gcs://tpc-benchmark-5947/ds_1000GB_call_center_1_96.dat
	missing 95 files


catalog_page:
	gcs://tpc-benchmark-5947/ds_1000GB_catalog_page_1_96.dat
	missing 95 files


catalog_returns:
	gcs://tpc-benchmark-5947/ds_1000GB_catalog_returns_10_96.dat
	gcs://tpc-benchmark-5947/ds_1000GB_catalog_returns_11_96.dat
	gcs://tpc-benchmark-5947/ds_1000GB_catalog_returns_12_96.dat
	gcs://tpc-benchmark-5947/ds_1000GB_catalog_returns_13_96.dat
	gcs://tpc-benchmark-5947/ds_1000GB_catalog_returns_14_96.dat
	gcs://tpc-benchmark-5947/ds_1000GB_catalog_returns_15_96.dat
	gcs://tpc-benchmark-5947/ds_1000GB_catalog_returns_16_96.dat
	gcs://tpc-benchmark-5947/ds_1000GB_catalog_returns_17_96.dat
	gcs://tp

### Create tables in Snowflake if needed

In [5]:
if not sf_helper.is_integrated():
    sf_helper.create_schema(is_dry_run=DRY_RUN)



--pushing schema: "/home/vagrant/bq_snowflake_benchmark/h/2.18.0_rc2/dbgen/dss.ddl"
running query: -- 
-- Legal Notice 
-- 
-- This document and associated source code (the "Work") is a part of a 
-- benchmark specification maintained by the TPC. 
-- 
-- The TPC reserves all right, title, and interest to the Work as provided 
-- under U.S. and international laws, including without limitation all patent 
-- and trademark rights therein. 
-- 
-- No Warranty 
-- 
-- 1.1 TO THE MAXIMUM EXTENT PERMITTED BY APPLICABLE LAW, THE INFORMATION 
--     CONTAINED HEREIN IS PROVIDED "AS IS" AND WITH ALL FAULTS, AND THE 
--     AUTHORS AND DEVELOPERS OF THE WORK HEREBY DISCLAIM ALL OTHER 
--     WARRANTIES AND CONDITIONS, EITHER EXPRESS, IMPLIED OR STATUTORY, 
--     INCLUDING, BUT NOT LIMITED TO, ANY (IF ANY) IMPLIED WARRANTIES, 
--     DUTIES OR CONDITIONS OF MERCHANTABILITY, OF FITNESS FOR A PARTICULAR 
--     PURPOSE, OF ACCURACY OR COMPLETENESS OF RESPONSES, OF RESULTS, OF 
--     WORKMANLIKE 

result Table INCOME_BAND successfully created.
running query: 
create table item
(
    i_item_sk                 integer               not null,
    i_item_id                 char(16)              not null,
    i_rec_start_date          date                          ,
    i_rec_end_date            date                          ,
    i_item_desc               varchar(200)                  ,
    i_current_price           decimal(7,2)                  ,
    i_wholesale_cost          decimal(7,2)                  ,
    i_brand_id                integer                       ,
    i_brand                   char(50)                      ,
    i_class_id                integer                       ,
    i_class                   char(50)                      ,
    i_category_id             integer                       ,
    i_category                char(50)                      ,
    i_manufact_id             integer                       ,
    i_manufact                char(50)           

result Table WEB_SITE successfully created.
running query: 
create table store_returns
(
    sr_returned_date_sk       integer                       ,
    sr_return_time_sk         integer                       ,
    sr_item_sk                integer               not null,
    sr_customer_sk            integer                       ,
    sr_cdemo_sk               integer                       ,
    sr_hdemo_sk               integer                       ,
    sr_addr_sk                integer                       ,
    sr_store_sk               integer                       ,
    sr_reason_sk              integer                       ,
    sr_ticket_number          integer               not null,
    sr_return_quantity        integer                       ,
    sr_return_amt             decimal(7,2)                  ,
    sr_return_tax             decimal(7,2)                  ,
    sr_return_amt_inc_tax     decimal(7,2)                  ,
    sr_fee                    decimal(7,2) 

result Table WEB_RETURNS successfully created.
running query: 
create table web_sales
(
    ws_sold_date_sk           integer                       ,
    ws_sold_time_sk           integer                       ,
    ws_ship_date_sk           integer                       ,
    ws_item_sk                integer               not null,
    ws_bill_customer_sk       integer                       ,
    ws_bill_cdemo_sk          integer                       ,
    ws_bill_hdemo_sk          integer                       ,
    ws_bill_addr_sk           integer                       ,
    ws_ship_customer_sk       integer                       ,
    ws_ship_cdemo_sk          integer                       ,
    ws_ship_hdemo_sk          integer                       ,
    ws_ship_addr_sk           integer                       ,
    ws_web_page_sk            integer                       ,
    ws_web_site_sk            integer                       ,
    ws_ship_mode_sk           integer       

### Import Data from STAGE to target table

In [None]:
for table, files in db_files.items():
    print(f'Starting to import table: {table}')
    for file in sorted(files):
        print(f'\timporting file: {file}')
        sf_helper.import_data(table, file, integration_id)
        print(f'\tfinished @ {datetime.datetime.now().time()}')

Starting to import table: call_center
	importing file: gcs://tpc-benchmark-5947/ds_1000GB_call_center_1_96.dat
running query: copy into call_center from 'gcs://tpc-benchmark-5947/ds_1000GB_call_center_1_96.dat' storage_integration=gcs_ds_1000GB_integration file_format=(format_name=csv_file_format);
result gcs://tpc-benchmark-5947/ds_1000GB_call_center_1_96.dat
	finished @ 19:11:42.797362
Starting to import table: catalog_page
	importing file: gcs://tpc-benchmark-5947/ds_1000GB_catalog_page_1_96.dat
running query: copy into catalog_page from 'gcs://tpc-benchmark-5947/ds_1000GB_catalog_page_1_96.dat' storage_integration=gcs_ds_1000GB_integration file_format=(format_name=csv_file_format);
result gcs://tpc-benchmark-5947/ds_1000GB_catalog_page_1_96.dat
	finished @ 19:11:52.363649
Starting to import table: catalog_returns
	importing file: gcs://tpc-benchmark-5947/ds_1000GB_catalog_returns_10_96.dat
running query: copy into catalog_returns from 'gcs://tpc-benchmark-5947/ds_1000GB_catalog_ret

result gcs://tpc-benchmark-5947/ds_1000GB_catalog_returns_29_96.dat
	finished @ 19:20:50.175639
	importing file: gcs://tpc-benchmark-5947/ds_1000GB_catalog_returns_2_96.dat
running query: copy into catalog_returns from 'gcs://tpc-benchmark-5947/ds_1000GB_catalog_returns_2_96.dat' storage_integration=gcs_ds_1000GB_integration file_format=(format_name=csv_file_format);
result gcs://tpc-benchmark-5947/ds_1000GB_catalog_returns_2_96.dat
	finished @ 19:21:07.923731
	importing file: gcs://tpc-benchmark-5947/ds_1000GB_catalog_returns_30_96.dat
running query: copy into catalog_returns from 'gcs://tpc-benchmark-5947/ds_1000GB_catalog_returns_30_96.dat' storage_integration=gcs_ds_1000GB_integration file_format=(format_name=csv_file_format);
result gcs://tpc-benchmark-5947/ds_1000GB_catalog_returns_30_96.dat
	finished @ 19:21:25.639787
	importing file: gcs://tpc-benchmark-5947/ds_1000GB_catalog_returns_31_96.dat
running query: copy into catalog_returns from 'gcs://tpc-benchmark-5947/ds_1000GB_cat

result gcs://tpc-benchmark-5947/ds_1000GB_catalog_returns_4_96.dat
	finished @ 19:28:15.315664
	importing file: gcs://tpc-benchmark-5947/ds_1000GB_catalog_returns_50_96.dat
running query: copy into catalog_returns from 'gcs://tpc-benchmark-5947/ds_1000GB_catalog_returns_50_96.dat' storage_integration=gcs_ds_1000GB_integration file_format=(format_name=csv_file_format);
result gcs://tpc-benchmark-5947/ds_1000GB_catalog_returns_50_96.dat
	finished @ 19:28:32.971242
	importing file: gcs://tpc-benchmark-5947/ds_1000GB_catalog_returns_51_96.dat
running query: copy into catalog_returns from 'gcs://tpc-benchmark-5947/ds_1000GB_catalog_returns_51_96.dat' storage_integration=gcs_ds_1000GB_integration file_format=(format_name=csv_file_format);
result gcs://tpc-benchmark-5947/ds_1000GB_catalog_returns_51_96.dat
	finished @ 19:28:50.951406
	importing file: gcs://tpc-benchmark-5947/ds_1000GB_catalog_returns_52_96.dat
running query: copy into catalog_returns from 'gcs://tpc-benchmark-5947/ds_1000GB_c

result gcs://tpc-benchmark-5947/ds_1000GB_catalog_returns_70_96.dat
	finished @ 19:35:14.872355
	importing file: gcs://tpc-benchmark-5947/ds_1000GB_catalog_returns_71_96.dat
running query: copy into catalog_returns from 'gcs://tpc-benchmark-5947/ds_1000GB_catalog_returns_71_96.dat' storage_integration=gcs_ds_1000GB_integration file_format=(format_name=csv_file_format);
result gcs://tpc-benchmark-5947/ds_1000GB_catalog_returns_71_96.dat
	finished @ 19:35:32.327682
	importing file: gcs://tpc-benchmark-5947/ds_1000GB_catalog_returns_72_96.dat
running query: copy into catalog_returns from 'gcs://tpc-benchmark-5947/ds_1000GB_catalog_returns_72_96.dat' storage_integration=gcs_ds_1000GB_integration file_format=(format_name=csv_file_format);
result gcs://tpc-benchmark-5947/ds_1000GB_catalog_returns_72_96.dat
	finished @ 19:35:50.747987
	importing file: gcs://tpc-benchmark-5947/ds_1000GB_catalog_returns_73_96.dat
running query: copy into catalog_returns from 'gcs://tpc-benchmark-5947/ds_1000GB_

result gcs://tpc-benchmark-5947/ds_1000GB_catalog_returns_91_96.dat
	finished @ 19:42:15.307616
	importing file: gcs://tpc-benchmark-5947/ds_1000GB_catalog_returns_92_96.dat
running query: copy into catalog_returns from 'gcs://tpc-benchmark-5947/ds_1000GB_catalog_returns_92_96.dat' storage_integration=gcs_ds_1000GB_integration file_format=(format_name=csv_file_format);
result gcs://tpc-benchmark-5947/ds_1000GB_catalog_returns_92_96.dat
	finished @ 19:42:33.159503
	importing file: gcs://tpc-benchmark-5947/ds_1000GB_catalog_returns_93_96.dat
running query: copy into catalog_returns from 'gcs://tpc-benchmark-5947/ds_1000GB_catalog_returns_93_96.dat' storage_integration=gcs_ds_1000GB_integration file_format=(format_name=csv_file_format);
result gcs://tpc-benchmark-5947/ds_1000GB_catalog_returns_93_96.dat
	finished @ 19:42:50.767576
	importing file: gcs://tpc-benchmark-5947/ds_1000GB_catalog_returns_94_96.dat
running query: copy into catalog_returns from 'gcs://tpc-benchmark-5947/ds_1000GB_

result gcs://tpc-benchmark-5947/ds_1000GB_catalog_sales_25_96.dat
	finished @ 20:18:58.983529
	importing file: gcs://tpc-benchmark-5947/ds_1000GB_catalog_sales_26_96.dat
running query: copy into catalog_sales from 'gcs://tpc-benchmark-5947/ds_1000GB_catalog_sales_26_96.dat' storage_integration=gcs_ds_1000GB_integration file_format=(format_name=csv_file_format);
result gcs://tpc-benchmark-5947/ds_1000GB_catalog_sales_26_96.dat
	finished @ 20:21:03.915678
	importing file: gcs://tpc-benchmark-5947/ds_1000GB_catalog_sales_27_96.dat
running query: copy into catalog_sales from 'gcs://tpc-benchmark-5947/ds_1000GB_catalog_sales_27_96.dat' storage_integration=gcs_ds_1000GB_integration file_format=(format_name=csv_file_format);
result gcs://tpc-benchmark-5947/ds_1000GB_catalog_sales_27_96.dat
	finished @ 20:23:15.456257
	importing file: gcs://tpc-benchmark-5947/ds_1000GB_catalog_sales_28_96.dat
running query: copy into catalog_sales from 'gcs://tpc-benchmark-5947/ds_1000GB_catalog_sales_28_96.da

result gcs://tpc-benchmark-5947/ds_1000GB_catalog_sales_46_96.dat
	finished @ 21:06:32.700103
	importing file: gcs://tpc-benchmark-5947/ds_1000GB_catalog_sales_47_96.dat
running query: copy into catalog_sales from 'gcs://tpc-benchmark-5947/ds_1000GB_catalog_sales_47_96.dat' storage_integration=gcs_ds_1000GB_integration file_format=(format_name=csv_file_format);
result gcs://tpc-benchmark-5947/ds_1000GB_catalog_sales_47_96.dat
	finished @ 21:08:55.035586
	importing file: gcs://tpc-benchmark-5947/ds_1000GB_catalog_sales_48_96.dat
running query: copy into catalog_sales from 'gcs://tpc-benchmark-5947/ds_1000GB_catalog_sales_48_96.dat' storage_integration=gcs_ds_1000GB_integration file_format=(format_name=csv_file_format);
result gcs://tpc-benchmark-5947/ds_1000GB_catalog_sales_48_96.dat
	finished @ 21:10:55.508065
	importing file: gcs://tpc-benchmark-5947/ds_1000GB_catalog_sales_49_96.dat
running query: copy into catalog_sales from 'gcs://tpc-benchmark-5947/ds_1000GB_catalog_sales_49_96.da

result gcs://tpc-benchmark-5947/ds_1000GB_catalog_sales_67_96.dat
	finished @ 21:54:52.799519
	importing file: gcs://tpc-benchmark-5947/ds_1000GB_catalog_sales_68_96.dat
running query: copy into catalog_sales from 'gcs://tpc-benchmark-5947/ds_1000GB_catalog_sales_68_96.dat' storage_integration=gcs_ds_1000GB_integration file_format=(format_name=csv_file_format);
result gcs://tpc-benchmark-5947/ds_1000GB_catalog_sales_68_96.dat
	finished @ 21:57:38.793156
	importing file: gcs://tpc-benchmark-5947/ds_1000GB_catalog_sales_69_96.dat
running query: copy into catalog_sales from 'gcs://tpc-benchmark-5947/ds_1000GB_catalog_sales_69_96.dat' storage_integration=gcs_ds_1000GB_integration file_format=(format_name=csv_file_format);
result gcs://tpc-benchmark-5947/ds_1000GB_catalog_sales_69_96.dat
	finished @ 21:59:39.831698
	importing file: gcs://tpc-benchmark-5947/ds_1000GB_catalog_sales_6_96.dat
running query: copy into catalog_sales from 'gcs://tpc-benchmark-5947/ds_1000GB_catalog_sales_6_96.dat'

result gcs://tpc-benchmark-5947/ds_1000GB_catalog_sales_88_96.dat
	finished @ 22:43:26.207930
	importing file: gcs://tpc-benchmark-5947/ds_1000GB_catalog_sales_89_96.dat
running query: copy into catalog_sales from 'gcs://tpc-benchmark-5947/ds_1000GB_catalog_sales_89_96.dat' storage_integration=gcs_ds_1000GB_integration file_format=(format_name=csv_file_format);
result gcs://tpc-benchmark-5947/ds_1000GB_catalog_sales_89_96.dat
	finished @ 22:45:27.760253
	importing file: gcs://tpc-benchmark-5947/ds_1000GB_catalog_sales_8_96.dat
running query: copy into catalog_sales from 'gcs://tpc-benchmark-5947/ds_1000GB_catalog_sales_8_96.dat' storage_integration=gcs_ds_1000GB_integration file_format=(format_name=csv_file_format);
result gcs://tpc-benchmark-5947/ds_1000GB_catalog_sales_8_96.dat
	finished @ 22:48:05.764297
	importing file: gcs://tpc-benchmark-5947/ds_1000GB_catalog_sales_90_96.dat
running query: copy into catalog_sales from 'gcs://tpc-benchmark-5947/ds_1000GB_catalog_sales_90_96.dat' 

### Suspend WAREHOUSE

In [8]:
sf_helper.warehouse_suspend()

warehouse suspend: Statement executed successfully.
