In [8]:
from google.cloud import bigquery
from google.cloud.exceptions import NotFound
from google.oauth2 import service_account

In [10]:
def bq_create_dataset(client, dataset):
    dataset_ref = bigquery_client.dataset(dataset)
    
    try:
        dataset = bigquery_client.get_dataset(dataset_ref)
        print(f"Dataset {dataset} already exists.")
    except NotFound:
        dataset = bigquery.Dataset(dataset_ref)
        dataset.location = 'US'
        dataset = bigquery_client.create_dataset(dataset)
        print(f"Dataset {dataset.dataset_id} created.")
    return dataset

def bq_create_table(client, dataset, table_name):
    dataset_ref = bigquery_client.dataset(dataset)
    
    # Prepare a referece to the table
    table_ref = dataset_ref.table(table_name)
    
    try:
        table = bigquery_client.get_table(table_ref)
        print(f"table {table} already exists.")
    except NotFound:
        schema = [
            bigquery.SchemaField('S_No', 'INTEGER', mode='REQUIRED'),
            bigquery.SchemaField('Age_in_cm', 'INTEGER', mode='REQUIRED'),
            bigquery.SchemaField('Weight_in_Kg', 'INTEGER', mode='REQUIRED')
        ]
        table = bigquery.Table(table_ref, schema=schema)
        table = bigquery_client.create_table(table)
        print(f"table {table.table_id} created.")
    return table

key_path = './service_account/gentle-keyword-423715-j0-03be08ad6412.json'
credentials = service_account.Credentials.from_service_account_file(
    key_path,
    scopes=["https://www.googleapis.com/auth/bigquery"]
)
    
bigquery_client = bigquery.Client(
    credentials = credentials,
    project=credentials.project_id
)
if __name__ == '__main__':
    dataset = 'demo_dataset01'
    table_name = 'demo_table01'
    data = bq_create_dataset(bigquery_client, dataset)
    table = bq_create_table(bigquery_client, dataset, table_name)

Dataset Dataset(DatasetReference('gentle-keyword-423715-j0', 'demo_dataset01')) already exists.
table demo_table01 created.


In [15]:
dataset_ref = bigquery_client.dataset('google_analytics_sample')
table_ref = dataset_ref.table('ga_sessions_20170801')

example = bigquery_client.get_table(table_ref)
example.schema

[SchemaField('visitorId', 'INTEGER', 'NULLABLE', None, None, (), None),
 SchemaField('visitNumber', 'INTEGER', 'NULLABLE', None, None, (), None),
 SchemaField('visitId', 'INTEGER', 'NULLABLE', None, None, (), None),
 SchemaField('visitStartTime', 'INTEGER', 'NULLABLE', None, None, (), None),
 SchemaField('date', 'STRING', 'NULLABLE', None, None, (), None),
 SchemaField('totals', 'RECORD', 'NULLABLE', None, None, (SchemaField('visits', 'INTEGER', 'NULLABLE', None, None, (), None), SchemaField('hits', 'INTEGER', 'NULLABLE', None, None, (), None), SchemaField('pageviews', 'INTEGER', 'NULLABLE', None, None, (), None), SchemaField('timeOnSite', 'INTEGER', 'NULLABLE', None, None, (), None), SchemaField('bounces', 'INTEGER', 'NULLABLE', None, None, (), None), SchemaField('transactions', 'INTEGER', 'NULLABLE', None, None, (), None), SchemaField('transactionRevenue', 'INTEGER', 'NULLABLE', None, None, (), None), SchemaField('newVisits', 'INTEGER', 'NULLABLE', None, None, (), None), SchemaField(

In [13]:
schema = [
            bigquery.SchemaField('S_No', 'INTEGER', mode='REQUIRED'),
            bigquery.SchemaField('Age_in_cm', 'INTEGER', mode='REQUIRED'),
            bigquery.SchemaField('Weight_in_Kg', 'INTEGER', mode='REQUIRED')
        ]

print(schema)

[SchemaField('S_No', 'INTEGER', 'REQUIRED', None, None, (), None), SchemaField('Age_in_cm', 'INTEGER', 'REQUIRED', None, None, (), None), SchemaField('Weight_in_Kg', 'INTEGER', 'REQUIRED', None, None, (), None)]


In [23]:
from datetime import datetime, timedelta

# Define the start and end dates
start_date = datetime.strptime('20160801', '%Y%m%d')
end_date = datetime.strptime('20170801', '%Y%m%d')

# Generate a list of dates in the specified range
date_list = []
current_date = start_date
while current_date <= end_date:
    date_list.append(current_date.strftime('%Y%m%d'))
    current_date += timedelta(days=1)

# Print the list of dates
for date in date_list:
    print(date)


20160801
20160802
20160803
20160804
20160805
20160806
20160807
20160808
20160809
20160810
20160811
20160812
20160813
20160814
20160815
20160816
20160817
20160818
20160819
20160820
20160821
20160822
20160823
20160824
20160825
20160826
20160827
20160828
20160829
20160830
20160831
20160901
20160902
20160903
20160904
20160905
20160906
20160907
20160908
20160909
20160910
20160911
20160912
20160913
20160914
20160915
20160916
20160917
20160918
20160919
20160920
20160921
20160922
20160923
20160924
20160925
20160926
20160927
20160928
20160929
20160930
20161001
20161002
20161003
20161004
20161005
20161006
20161007
20161008
20161009
20161010
20161011
20161012
20161013
20161014
20161015
20161016
20161017
20161018
20161019
20161020
20161021
20161022
20161023
20161024
20161025
20161026
20161027
20161028
20161029
20161030
20161031
20161101
20161102
20161103
20161104
20161105
20161106
20161107
20161108
20161109
20161110
20161111
20161112
20161113
20161114
20161115
20161116
20161117
20161118
20161119
2

In [31]:
client = bigquery.Client(
    credentials = credentials,
    project=credentials.project_id
)


for i in date_list[167:197]:
    # Define the source table reference
    source_project_id = 'bigquery-public-data'
    source_dataset_id = 'google_analytics_sample'
    source_table_id = f'ga_sessions_{i}'
    source_table_ref = f'{source_project_id}.{source_dataset_id}.{source_table_id}'

    # Get the source table schema
    source_table = client.get_table(source_table_ref)
    schema = source_table.schema

    # Define the destination table reference
    destination_project_id = credentials.project_id
    destination_dataset_id = 'google_analytics_sample'
    destination_table_id = f'ga_sessions_{i}'
    destination_table_ref = client.dataset(destination_dataset_id, project=destination_project_id).table(destination_table_id)

    # Delete the existing destination table if it exists
    try:
        client.delete_table(destination_table_ref)
        print(f"Deleted table {destination_table_ref}")
    except Exception as e:
        print(f"Table {destination_table_ref} does not exist: {e}")


    # Create the destination table with the same schema as the source table
    destination_table = bigquery.Table(destination_table_ref, schema=schema)
    destination_table = client.create_table(destination_table)  # API request

    print(f"Created table {destination_table.project}.{destination_table.dataset_id}.{destination_table.table_id}")

    # Define the copy job configuration
    job_config = bigquery.CopyJobConfig()

    # Copy the table
    copy_job = client.copy_table(
        source_table_ref,
        destination_table_ref,
        job_config=job_config
    )  # Make an API request.

    # Wait for the job to complete
    copy_job.result()

    print(f"Table {source_table_ref} copied to {destination_table_ref}.")

Table gentle-keyword-423715-j0.google_analytics_sample.ga_sessions_20170115 does not exist: 404 DELETE https://bigquery.googleapis.com/bigquery/v2/projects/gentle-keyword-423715-j0/datasets/google_analytics_sample/tables/ga_sessions_20170115?prettyPrint=false: Not found: Table gentle-keyword-423715-j0:google_analytics_sample.ga_sessions_20170115
Created table gentle-keyword-423715-j0.google_analytics_sample.ga_sessions_20170115
Table bigquery-public-data.google_analytics_sample.ga_sessions_20170115 copied to gentle-keyword-423715-j0.google_analytics_sample.ga_sessions_20170115.
Table gentle-keyword-423715-j0.google_analytics_sample.ga_sessions_20170116 does not exist: 404 DELETE https://bigquery.googleapis.com/bigquery/v2/projects/gentle-keyword-423715-j0/datasets/google_analytics_sample/tables/ga_sessions_20170116?prettyPrint=false: Not found: Table gentle-keyword-423715-j0:google_analytics_sample.ga_sessions_20170116
Created table gentle-keyword-423715-j0.google_analytics_sample.ga_s

Created table gentle-keyword-423715-j0.google_analytics_sample.ga_sessions_20170129
Table bigquery-public-data.google_analytics_sample.ga_sessions_20170129 copied to gentle-keyword-423715-j0.google_analytics_sample.ga_sessions_20170129.
Table gentle-keyword-423715-j0.google_analytics_sample.ga_sessions_20170130 does not exist: 404 DELETE https://bigquery.googleapis.com/bigquery/v2/projects/gentle-keyword-423715-j0/datasets/google_analytics_sample/tables/ga_sessions_20170130?prettyPrint=false: Not found: Table gentle-keyword-423715-j0:google_analytics_sample.ga_sessions_20170130
Created table gentle-keyword-423715-j0.google_analytics_sample.ga_sessions_20170130
Table bigquery-public-data.google_analytics_sample.ga_sessions_20170130 copied to gentle-keyword-423715-j0.google_analytics_sample.ga_sessions_20170130.
Table gentle-keyword-423715-j0.google_analytics_sample.ga_sessions_20170131 does not exist: 404 DELETE https://bigquery.googleapis.com/bigquery/v2/projects/gentle-keyword-423715-

Table bigquery-public-data.google_analytics_sample.ga_sessions_20170212 copied to gentle-keyword-423715-j0.google_analytics_sample.ga_sessions_20170212.
Table gentle-keyword-423715-j0.google_analytics_sample.ga_sessions_20170213 does not exist: 404 DELETE https://bigquery.googleapis.com/bigquery/v2/projects/gentle-keyword-423715-j0/datasets/google_analytics_sample/tables/ga_sessions_20170213?prettyPrint=false: Not found: Table gentle-keyword-423715-j0:google_analytics_sample.ga_sessions_20170213
Created table gentle-keyword-423715-j0.google_analytics_sample.ga_sessions_20170213
Table bigquery-public-data.google_analytics_sample.ga_sessions_20170213 copied to gentle-keyword-423715-j0.google_analytics_sample.ga_sessions_20170213.


In [30]:
date_list[167:197]

['20170115',
 '20170116',
 '20170117',
 '20170118',
 '20170119',
 '20170120',
 '20170121',
 '20170122',
 '20170123',
 '20170124',
 '20170125',
 '20170126',
 '20170127',
 '20170128',
 '20170129',
 '20170130',
 '20170131',
 '20170201',
 '20170202',
 '20170203',
 '20170204',
 '20170205',
 '20170206',
 '20170207',
 '20170208',
 '20170209',
 '20170210',
 '20170211',
 '20170212',
 '20170213']