In [4]:
import pandas as pd

In [5]:
df = pd.read_csv('finaldataset.csv')


In [47]:
# df.head()

In [46]:
# list(df.columns)

In [42]:
# Gathering the types of each column and appending BigQuery datatype to list
type_list = []

for column in df.columns:
    if df[column].dtype == 'object':
        type_list.append("STRING")
    elif df[column].dtype == 'float64':
        type_list.append("FLOAT64")
    else:
        type_list.append("INTEGER")

In [13]:
# Gathering every column name
field_list = list(df.columns)

In [2]:
# https://www.rudderstack.com/guides/how-to-access-and-query-your-bigquery-data-using-python-and-r/
from google.cloud import bigquery
from google.oauth2 import service_account

In [3]:
# Key.json contains information for service account to connect to BigQuery API
credentials = service_account.Credentials.from_service_account_file('key.json')
project_id = 'zillow-houses-376821'
client = bigquery.Client(credentials = credentials, project = project_id)

In [14]:
# https://medium.com/pipeline-a-data-engineering-resource/automate-your-bigquery-schema-definitions-with-5-lines-of-python-7a1996749718
# Automate schema creation

def create_schema(field_list:list, type_list:list):
    schema_list = []
    for fields, types in zip(field_list, type_list):
        schema = bigquery.SchemaField(fields, types)
        schema_list.append(schema)
    return schema_list

In [None]:
# Manual schema creation

# schema = [
#     bigquery.SchemaField("zpid", "INTEGER"),
#     bigquery.SchemaField("streetAddress", "STRING"),
#     bigquery.SchemaField("zipcode", "STRING"),
#     bigquery.SchemaField("city", "STRING"),
#     bigquery.SchemaField("state","STRING"),
#     bigquery.SchemaField("latitude", "NUMERIC"),
#     bigquery.SchemaField("longitude", "NUMERIC"),
#     bigquery.SchemaField("price", "INTEGER"),
#     bigquery.SchemaField("bathrooms", "FLOAT64"),
#     bigquery.SchemaField("bedrooms", "FLOAT64"),
#     bigquery.SchemaField("livingArea", "FLOAT64"),
#     bigquery.SchemaField("homeType", "STRING"),
#     bigquery.SchemaField("taxAssessedValue", "FLOAT64"),
#     bigquery.SchemaField("brokerName", "STRING")
# ]

In [45]:
# create_schema(field_list,type_list)

In [39]:
# Function to load dataframe into BigQuery

def bq_load(df, dataset_id: str, table_id: str, schema, client):
    bq_client = client
    dataset_ref = bq_client.dataset(dataset_id)
    dataset_table_id = dataset_ref.table(table_id)
    
    job_config = bigquery.LoadJobConfig()
    job_config.write_disposition='WRITE_TRUNCATE'
    job_config.source_format = bigquery.SourceFormat.CSV
    job_config.autodetect=False
    # Calling create_schema()
    job_config.schema = schema
    job_config.ignore_unknown_values=False
    job = client.load_table_from_dataframe(df,
    table_id,
    location='US',
    job_config=job_config)
    
    return job.result()

In [43]:
df_to_bq = bq_load(df, "zillow_houses", "zillow-houses-376821.zillow_houses.vancouverhouses", create_schema(field_list,type_list), client)

In [48]:
# Testing query

initial_query = """ 

SELECT distinct homeType FROM `zillow-houses-376821.zillow_houses.vancouverhouses`

"""

query_results = client.query(initial_query).to_dataframe()

print(query_results)

        homeType
0            LOT
1      APARTMENT
2          CONDO
3      TOWNHOUSE
4   MULTI_FAMILY
5  SINGLE_FAMILY
