In [10]:
from google.cloud import bigquery
client = bigquery.Client()
dataset_id = "{}.telco".format(client.project)

# Construct a full Dataset object to send to the API.
dataset = bigquery.Dataset(dataset_id)

# TODO(developer): Specify the geographic location where the dataset should reside.
dataset.location = "europe-west4"

# Send the dataset to the API for creation, with an explicit timeout.
# Raises google.api_core.exceptions.Conflict if the Dataset already
# exists within the project.
try:
    dataset = client.create_dataset(dataset, timeout=30)  # Make an API request.
    print("Created dataset {}.{}".format(client.project, dataset.dataset_id))
except Exception as ex:
    print(ex)

409 POST https://bigquery.googleapis.com/bigquery/v2/projects/myfirstproject-226013/datasets?prettyPrint=false: Already Exists: Dataset myfirstproject-226013:telco


### Load Phone Data

In [36]:
table_id = "{}.telco.phone".format(client.project)
file_path = 'phone_dataset.csv'

job_config = bigquery.LoadJobConfig(
    schema=[
        bigquery.SchemaField("brand", "STRING"),
        bigquery.SchemaField("model", "STRING"),
        bigquery.SchemaField("network_technology", "STRING"),
        bigquery.SchemaField("bands_2G", "STRING"),
        bigquery.SchemaField("bands_3G", "STRING"),
        bigquery.SchemaField("bands_4G", "STRING"),
        bigquery.SchemaField("network_speed", "STRING"),
        bigquery.SchemaField("GPRS", "STRING"),
        bigquery.SchemaField("EDGE", "STRING"),
        bigquery.SchemaField("announced", "STRING"),
        bigquery.SchemaField("status", "STRING"),
        bigquery.SchemaField("dimentions", "STRING"),
        bigquery.SchemaField("weight_g", "FLOAT"),
        bigquery.SchemaField("weight_oz", "FLOAT"),
        bigquery.SchemaField("SIM", "STRING"),
        bigquery.SchemaField("display_type", "STRING"),
        bigquery.SchemaField("display_resolution", "STRING"),
        bigquery.SchemaField("display_size", "STRING"),
        bigquery.SchemaField("OS", "STRING"),
        bigquery.SchemaField("CPU", "STRING"),
        bigquery.SchemaField("Chipset", "STRING"),
        bigquery.SchemaField("GPU", "STRING"),
        bigquery.SchemaField("memory_card", "STRING"),
        bigquery.SchemaField("internal_memory", "STRING"),
        bigquery.SchemaField("RAM", "STRING"),
        bigquery.SchemaField("primary_camera", "STRING"),
        bigquery.SchemaField("secondary_camera", "STRING"),
        bigquery.SchemaField("loud_speaker", "STRING"),
        bigquery.SchemaField("audio_jack", "STRING"),
        bigquery.SchemaField("WLAN", "STRING"),
        bigquery.SchemaField("bluetooth", "STRING"),
        bigquery.SchemaField("GPS", "STRING"),
        bigquery.SchemaField("NFC", "STRING"),
        bigquery.SchemaField("radio", "STRING"),
        bigquery.SchemaField("USB", "STRING"),
        bigquery.SchemaField("sensors", "STRING"),
        bigquery.SchemaField("battery", "STRING"),
        bigquery.SchemaField("colors", "STRING"),
        bigquery.SchemaField("approx_price_EUR", "INTEGER"),
        bigquery.SchemaField("img_url", "STRING")
    ],
    
    
    autodetect=True,
    skip_leading_rows=1, 
    max_bad_records=10000,
    source_format=bigquery.SourceFormat.CSV, 
    writeDisposition="WRITE_TRUNCATE", # overwrites table
    
)

with open(file_path, "rb") as source_file:
    job = client.load_table_from_file(source_file, table_id, job_config=job_config)

job.result()  # Waits for the job to complete.

table = client.get_table(table_id)  # Make an API request.
print("Loaded {} rows and {} columns to {}".format(table.num_rows, len(table.schema), table_id))

Loaded 8590 rows and 40 columns to myfirstproject-226013.telco.phone


### Load Churn Data

In [37]:
table_id = "{}.telco.churn".format(client.project)
file_path = 'churn.csv'

job_config = bigquery.LoadJobConfig(
    source_format=bigquery.SourceFormat.CSV, 
    skip_leading_rows=1, 
    autodetect=True,
    writeDisposition="WRITE_TRUNCATE", # overwrites table
)

with open(file_path, "rb") as source_file:
    job = client.load_table_from_file(source_file, table_id, job_config=job_config)

job.result()  # Waits for the job to complete.

table = client.get_table(table_id)  # Make an API request.
print("Loaded {} rows and {} columns to {}".format(table.num_rows, len(table.schema), table_id))

Loaded 7043 rows and 21 columns to myfirstproject-226013.telco.churn_v5
