#### Dataset Replication  
Use predefined SQL table copies to partion and cluster new tables

In [None]:
from google.cloud import bigquery

import config, bq_tpc, sf_tpc

#### Set Source and Destination

In [None]:
source_dataset      = "ds_100GB_01"
destination_dataset = "ds_100GB_01C"

#### Snowflake Database Copy

In [None]:
source_dataset.upper()

In [None]:
# warehouse name
warehouse = config.sf_warehouse[1]
warehouse

In [None]:
%%time
sf = sf_tpc.SFTPC(test="ds",  # class attributes are placeholders
                  scale=1,
                  cid="A",
                  warehouse=warehouse, 
                  desc="copy-database",
                  verbose=True,
                  verbose_query=True)
sf.database = source_dataset.upper()
sf.connect()

query_result = sf.copy(destination_dataset.upper())
print(query_result.sfqid)

sf.close()

In [None]:
query_result

In [None]:
query_result.sfqid

#### BigQuery Dataset Copy

In [None]:
# schema definition copies AND applies clustering/partitioning
if "ds" in source_dataset:
    schema_name = "bq_ds_01_clone.sql"
else:
    schema_name = "bq_h_01_clone.sql"

In [None]:
query_text = bq_tpc.create_table_remix(schema_name=schema_name,
                                       source=source_dataset,
                                       destination=destination_dataset,
                                       dot=True)

In [None]:
[print(line) for line in query_text.split("\n")[0:10]];

In [None]:
bq_tpc.create_dataset(destination_dataset)

In [None]:
%%time
query_job = bq_tpc.query(query_text=query_text,
                         project=config.gcp_project.lower(),
                         dataset=destination_dataset,
                         dry_run=False,
                         use_cache=False)
query_job.result()