Create BQ dataset for storing the raw data

In [24]:
from google.cloud import bigquery

project_id = "steady-service-448416-k2"
dataset = "finance_economics_events_raw"
region = "us-central1"

bq_client = bigquery.Client()

dataset_id = bigquery.Dataset(f"{project_id}.{dataset}")
dataset_id.location = region
resp = bq_client.create_dataset(dataset_id, exists_ok=True)
print("Created dataset {}.{}".format(bq_client.project, resp.dataset_id))


Created dataset steady-service-448416-k2.finance_economics_events_raw


Common functions

In [25]:
from google.cloud import bigquery
import re
project_id = "steady-service-448416-k2"
bucket = "finance_economics_events"
parent_folder = " initial-loads"
region = "us-central1"
dataset = "finance_economics_events_raw"

bq_client = bigquery.Client()

def create_load_table_from_csv(folder,file_name, table, schema, delimiter=",", file_pattern="*.csv",quote_character="\""):

  uri = f"gs://{bucket}/{parent_folder}/{folder}/{file_pattern}"
  table_id = f"{project_id}.{dataset}.{table}"

  table = bigquery.Table(table_id, schema=schema)
  table = bq_client.create_table(table, exists_ok=True)
  print("Created table {}".format(table.table_id))

  # remove the data_source and load_time fields before loading the data,
  # neither one is present in the csv
  del schema[-1]
  del schema[-1]
  print(schema)

  job_config = bigquery.LoadJobConfig(
        schema=schema,
        skip_leading_rows=1,
        source_format=bigquery.SourceFormat.CSV,
        write_disposition=bigquery.WriteDisposition.WRITE_APPEND,
        field_delimiter=delimiter,
        quote_character=quote_character,
        max_bad_records=50,
        allow_jagged_rows=True,
        ignore_unknown_values=True
      )

  load_job = bq_client.load_table_from_uri(uri, table_id, job_config=job_config)
  load_job.result()

  destination_table = bq_client.get_table(table_id)
  print("Loaded {} rows.".format(destination_table.num_rows))


def create_load_table_from_json(folder, file_name, table, schema):

  table_id = f"{project_id}.{dataset}.{table}"

  table = bigquery.Table(table_id, schema=schema)
  table = bq_client.create_table(table, exists_ok=True)
  print("Created table {}".format(table.table_id))

  # remove the data_source and load_time fields before loading the data,
  # neither one is present in the json
  del schema[-1]
  del schema[-1]

  #print(schema)

  job_config = bigquery.LoadJobConfig(schema=schema,
      source_format=bigquery.SourceFormat.NEWLINE_DELIMITED_JSON,
      write_disposition = "WRITE_EMPTY"
  )

  uri = f"gs://{bucket}/{parent_folder}/{folder}/{file_name}"

  load_job = bq_client.load_table_from_uri(
      uri,
      table_id,
      location=region,
      job_config=job_config,
  )

  load_job.result()

  destination_table = bq_client.get_table(table_id)
  print("Loaded {} rows.".format(destination_table.num_rows))


Adding us_geocode


In [26]:
folder = "US Geo Code"
file_name = "*.csv"
table = "us_geocode"


schema = [
    bigquery.SchemaField("stateANDterritory", "STRING", mode="REQUIRED"),
    bigquery.SchemaField("latitude", "FLOAT", mode="REQUIRED"),
    bigquery.SchemaField("longitude", "FLOAT", mode="REQUIRED"),
    bigquery.SchemaField("Name", "STRING", mode="REQUIRED"),
    bigquery.SchemaField("_data_source", "STRING", mode="REQUIRED", default_value_expression="'US Geo Code'"),
    bigquery.SchemaField("_load_time", "TIMESTAMP", mode="REQUIRED", default_value_expression="CURRENT_TIMESTAMP"),
]


create_load_table_from_csv(folder, file_name, table, schema)

Created table us_geocode
[SchemaField('stateANDterritory', 'STRING', 'REQUIRED', None, None, (), None), SchemaField('latitude', 'FLOAT', 'REQUIRED', None, None, (), None), SchemaField('longitude', 'FLOAT', 'REQUIRED', None, None, (), None), SchemaField('Name', 'STRING', 'REQUIRED', None, None, (), None)]
Loaded 122 rows.


Adding mortality_and_natality

In [27]:
from google.cloud import bigquery


folder = "Health"
file_name = "*.csv"


table = "mortality_and_natality"

schema = [
    bigquery.SchemaField("State", "STRING", mode="REQUIRED"),
    bigquery.SchemaField("Year", "INTEGER", mode="REQUIRED"),
    bigquery.SchemaField("Month", "STRING", mode="REQUIRED"),
    bigquery.SchemaField("Period", "STRING", mode="REQUIRED"),
    bigquery.SchemaField("Indicator", "STRING", mode="REQUIRED"),
    bigquery.SchemaField("Data Value", "INTEGER", mode="REQUIRED"),  # Fixed column name
    bigquery.SchemaField("_data_source", "STRING", mode="REQUIRED", default_value_expression="'health'"),  #WE MUST ADD THESE 2 ATTRINBUTES FOR EACH TABLE
    bigquery.SchemaField("_load_time", "TIMESTAMP", mode="REQUIRED", default_value_expression="CURRENT_TIMESTAMP"), #WE MUST ADD THESE 2 ATTRINBUTES FOR EACH TABLE
]

create_load_table_from_csv(folder, file_name, table, schema)


Created table mortality_and_natality
[SchemaField('State', 'STRING', 'REQUIRED', None, None, (), None), SchemaField('Year', 'INTEGER', 'REQUIRED', None, None, (), None), SchemaField('Month', 'STRING', 'REQUIRED', None, None, (), None), SchemaField('Period', 'STRING', 'REQUIRED', None, None, (), None), SchemaField('Indicator', 'STRING', 'REQUIRED', None, None, (), None), SchemaField('Data Value', 'INTEGER', 'REQUIRED', None, None, (), None)]
Loaded 3960 rows.


Adding state_exp_data_1991_2024_key

In [28]:
from google.cloud import bigquery


folder = "State government budgets"
file_name = "*.csv"
table = "state_exp_data_1991_2024_key"

schema = [
    bigquery.SchemaField("column_code", "STRING", mode="REQUIRED"),
    bigquery.SchemaField("column_description", "STRING", mode="REQUIRED"),
    bigquery.SchemaField("_data_source", "STRING", mode="REQUIRED", default_value_expression="'State Exp Report - KEY'"),
    bigquery.SchemaField("_load_time", "TIMESTAMP", mode="REQUIRED", default_value_expression="CURRENT_TIMESTAMP"),
]

create_load_table_from_csv(folder, file_name, table, schema)

Created table state_exp_data_1991_2024_key
[SchemaField('column_code', 'STRING', 'REQUIRED', None, None, (), None), SchemaField('column_description', 'STRING', 'REQUIRED', None, None, (), None)]
Loaded 3636 rows.


Adding gdp

In [29]:
from google.cloud import bigquery


folder = "GDP"
file_name = "*.csv"
table = "gdp"

schema = [
    bigquery.SchemaField("year", "STRING", mode="REQUIRED"),
    bigquery.SchemaField("state", "STRING", mode="REQUIRED"),
    bigquery.SchemaField("category", "STRING", mode="REQUIRED"),
    bigquery.SchemaField("value", "FLOAT", mode="REQUIRED"),
    bigquery.SchemaField("_data_source", "STRING", mode="REQUIRED", default_value_expression="'GDP'"),
    bigquery.SchemaField("_load_time", "TIMESTAMP", mode="REQUIRED", default_value_expression="CURRENT_TIMESTAMP"),
]
create_load_table_from_csv(folder, file_name, table, schema)


Created table gdp
[SchemaField('year', 'STRING', 'REQUIRED', None, None, (), None), SchemaField('state', 'STRING', 'REQUIRED', None, None, (), None), SchemaField('category', 'STRING', 'REQUIRED', None, None, (), None), SchemaField('value', 'FLOAT', 'REQUIRED', None, None, (), None)]
Loaded 1700 rows.


Adding state_exp_data_1991_2024

In [30]:
from google.cloud import bigquery

folder = "State government budgets"
file_name = "*.csv"
table = "state_exp_data_1991_2024"


schema = [
    bigquery.SchemaField("YEAR", "STRING", mode="REQUIRED"),
    bigquery.SchemaField("STATW", "STRING", mode="REQUIRED"),
    bigquery.SchemaField("ELSED_GF", "FLOAT", mode="NULLABLE"),
    bigquery.SchemaField("ELSED_FF", "FLOAT", mode="NULLABLE"),
    bigquery.SchemaField("ELSED_OF", "FLOAT", mode="NULLABLE"),
    bigquery.SchemaField("ELSED_BF", "FLOAT", mode="NULLABLE"),
    bigquery.SchemaField("ELSED_TOT", "FLOAT", mode="NULLABLE"),
    bigquery.SchemaField("HGRED_GF", "FLOAT", mode="NULLABLE"),
    bigquery.SchemaField("HGRED_FF", "FLOAT", mode="NULLABLE"),
    bigquery.SchemaField("HGRED_OF", "FLOAT", mode="NULLABLE"),
    bigquery.SchemaField("HGRED_BF", "FLOAT", mode="NULLABLE"),
    bigquery.SchemaField("HGRED_TOT", "FLOAT", mode="NULLABLE"),
    bigquery.SchemaField("TANF_GF", "FLOAT", mode="NULLABLE"),
    bigquery.SchemaField("TANF_FF", "FLOAT", mode="NULLABLE"),
    bigquery.SchemaField("TANF_OF", "FLOAT", mode="NULLABLE"),
    bigquery.SchemaField("TANF_BF", "FLOAT", mode="NULLABLE"),
    bigquery.SchemaField("TANF_TOT", "FLOAT", mode="NULLABLE"),
    bigquery.SchemaField("OTCA_GF", "FLOAT", mode="NULLABLE"),
    bigquery.SchemaField("OTCA_FF", "FLOAT", mode="NULLABLE"),
    bigquery.SchemaField("OTCA_OF", "FLOAT", mode="NULLABLE"),
    bigquery.SchemaField("OTCA_BF", "FLOAT", mode="NULLABLE"),
    bigquery.SchemaField("OTCA_TOT", "FLOAT", mode="NULLABLE"),
    bigquery.SchemaField("MCAID_GF", "FLOAT", mode="NULLABLE"),
    bigquery.SchemaField("MCAID_FF", "FLOAT", mode="NULLABLE"),
    bigquery.SchemaField("MCAID_OF", "FLOAT", mode="NULLABLE"),
    bigquery.SchemaField("MCAID_BF", "FLOAT", mode="NULLABLE"),
    bigquery.SchemaField("MCAID_TOT", "FLOAT", mode="NULLABLE"),
    bigquery.SchemaField("CORR_GF", "FLOAT", mode="NULLABLE"),
    bigquery.SchemaField("CORR_FF", "FLOAT", mode="NULLABLE"),
    bigquery.SchemaField("CORR_OF", "FLOAT", mode="NULLABLE"),
    bigquery.SchemaField("CORR_BF", "FLOAT", mode="NULLABLE"),
    bigquery.SchemaField("CORR_TOT", "FLOAT", mode="NULLABLE"),
    bigquery.SchemaField("TRANS_GF", "FLOAT", mode="NULLABLE"),
    bigquery.SchemaField("TRANS_FF", "FLOAT", mode="NULLABLE"),
    bigquery.SchemaField("TRANS_OF", "FLOAT", mode="NULLABLE"),
    bigquery.SchemaField("TRANS_BF", "FLOAT", mode="NULLABLE"),
    bigquery.SchemaField("TRANS_TOT", "FLOAT", mode="NULLABLE"),
    bigquery.SchemaField("OTHER_GF", "FLOAT", mode="NULLABLE"),
    bigquery.SchemaField("OTHER_FF", "FLOAT", mode="NULLABLE"),
    bigquery.SchemaField("OTHER_OF", "FLOAT", mode="NULLABLE"),
    bigquery.SchemaField("OTHER_BF", "FLOAT", mode="NULLABLE"),
    bigquery.SchemaField("OTHER_TOT", "FLOAT", mode="NULLABLE"),
    bigquery.SchemaField("ELCAP_GF", "FLOAT", mode="NULLABLE"),
    bigquery.SchemaField("ELCAP_FF", "FLOAT", mode="NULLABLE"),
    bigquery.SchemaField("ELCAP_OF", "FLOAT", mode="NULLABLE"),
    bigquery.SchemaField("ELCAP_BF", "FLOAT", mode="NULLABLE"),
    bigquery.SchemaField("ELCAP_TOT", "FLOAT", mode="NULLABLE"),
    bigquery.SchemaField("HEDCP_GF", "FLOAT", mode="NULLABLE"),
    bigquery.SchemaField("HEDCP_FF", "FLOAT", mode="NULLABLE"),
    bigquery.SchemaField("HEDCP_OF", "FLOAT", mode="NULLABLE"),
    bigquery.SchemaField("HEDCP_BF", "FLOAT", mode="NULLABLE"),
    bigquery.SchemaField("HEDCP_TOT", "FLOAT", mode="NULLABLE"),
    bigquery.SchemaField("CORCP_GF", "FLOAT", mode="NULLABLE"),
    bigquery.SchemaField("CORCP_FF", "FLOAT", mode="NULLABLE"),
    bigquery.SchemaField("CORCP_OF", "FLOAT", mode="NULLABLE"),
    bigquery.SchemaField("CORCP_BF", "FLOAT", mode="NULLABLE"),
    bigquery.SchemaField("CORCP_TOT", "FLOAT", mode="NULLABLE"),
    bigquery.SchemaField("TRCAP_GF", "FLOAT", mode="NULLABLE"),
    bigquery.SchemaField("TRCAP_FF", "FLOAT", mode="NULLABLE"),
    bigquery.SchemaField("TRCAP_OF", "FLOAT", mode="NULLABLE"),
    bigquery.SchemaField("TRCAP_BF", "FLOAT", mode="NULLABLE"),
    bigquery.SchemaField("TRCAP_TOT", "FLOAT", mode="NULLABLE"),
    bigquery.SchemaField("HSCAP_GF", "FLOAT", mode="NULLABLE"),
    bigquery.SchemaField("HSCAP_FF", "FLOAT", mode="NULLABLE"),
    bigquery.SchemaField("HSCAP_OF", "FLOAT", mode="NULLABLE"),
    bigquery.SchemaField("HSCAP_BF", "FLOAT", mode="NULLABLE"),
    bigquery.SchemaField("HSCAP_TOT", "FLOAT", mode="NULLABLE"),
    bigquery.SchemaField("ENVCP_GF", "FLOAT", mode="NULLABLE"),
    bigquery.SchemaField("ENVCP_FF", "FLOAT", mode="NULLABLE"),
    bigquery.SchemaField("ENVCP_OF", "FLOAT", mode="NULLABLE"),
    bigquery.SchemaField("ENVCP_BF", "FLOAT", mode="NULLABLE"),
    bigquery.SchemaField("ENVCP_TOT", "FLOAT", mode="NULLABLE"),
    bigquery.SchemaField("OTHCP_GF", "FLOAT", mode="NULLABLE"),
    bigquery.SchemaField("OTHCP_FF", "FLOAT", mode="NULLABLE"),
    bigquery.SchemaField("OTHCP_OF", "FLOAT", mode="NULLABLE"),
    bigquery.SchemaField("OTHCP_BF", "FLOAT", mode="NULLABLE"),
    bigquery.SchemaField("OTHCP_TOT", "FLOAT", mode="NULLABLE"),
    bigquery.SchemaField("GFTOT_CAPI", "FLOAT", mode="NULLABLE"),
    bigquery.SchemaField("FFTOT_CAPI", "FLOAT", mode="NULLABLE"),
    bigquery.SchemaField("OFTOT_CAPI", "FLOAT", mode="NULLABLE"),
    bigquery.SchemaField("BFTOT_CAPI", "FLOAT", mode="NULLABLE"),
    bigquery.SchemaField("TOTAL_CAPI", "FLOAT", mode="NULLABLE"),
    bigquery.SchemaField("GFTOT_CAP", "FLOAT", mode="NULLABLE"),
    bigquery.SchemaField("FFTOT_CAP", "FLOAT", mode="NULLABLE"),
    bigquery.SchemaField("OFTOT_CAP", "FLOAT", mode="NULLABLE"),
    bigquery.SchemaField("BFTOT_CAP", "FLOAT", mode="NULLABLE"),
    bigquery.SchemaField("TOTAL_CAP", "FLOAT", mode="NULLABLE"),
    bigquery.SchemaField("_data_source", "STRING", mode="REQUIRED", default_value_expression="'State Exp Report'"),
    bigquery.SchemaField("_load_time", "TIMESTAMP", mode="REQUIRED", default_value_expression="CURRENT_TIMESTAMP"),
]

create_load_table_from_csv(folder, file_name, table, schema)


Created table state_exp_data_1991_2024
[SchemaField('YEAR', 'STRING', 'REQUIRED', None, None, (), None), SchemaField('STATW', 'STRING', 'REQUIRED', None, None, (), None), SchemaField('ELSED_GF', 'FLOAT', 'NULLABLE', None, None, (), None), SchemaField('ELSED_FF', 'FLOAT', 'NULLABLE', None, None, (), None), SchemaField('ELSED_OF', 'FLOAT', 'NULLABLE', None, None, (), None), SchemaField('ELSED_BF', 'FLOAT', 'NULLABLE', None, None, (), None), SchemaField('ELSED_TOT', 'FLOAT', 'NULLABLE', None, None, (), None), SchemaField('HGRED_GF', 'FLOAT', 'NULLABLE', None, None, (), None), SchemaField('HGRED_FF', 'FLOAT', 'NULLABLE', None, None, (), None), SchemaField('HGRED_OF', 'FLOAT', 'NULLABLE', None, None, (), None), SchemaField('HGRED_BF', 'FLOAT', 'NULLABLE', None, None, (), None), SchemaField('HGRED_TOT', 'FLOAT', 'NULLABLE', None, None, (), None), SchemaField('TANF_GF', 'FLOAT', 'NULLABLE', None, None, (), None), SchemaField('TANF_FF', 'FLOAT', 'NULLABLE', None, None, (), None), SchemaField('

Adding crime_data

In [31]:
from google.cloud import bigquery

folder = "Crime data"
file_name = "*.csv"
table = "crime_data"


#delimiter = "\t"

schema = [
    bigquery.SchemaField("year", "INTEGER", mode="REQUIRED"),
    bigquery.SchemaField("state", "STRING", mode="REQUIRED"),
    bigquery.SchemaField("population", "FLOAT", mode="REQUIRED"),
    bigquery.SchemaField("crime", "FLOAT", mode="REQUIRED"),
    bigquery.SchemaField("crime_against_persons", "FLOAT", mode="REQUIRED"),
    bigquery.SchemaField("crime_against_property", "FLOAT", mode="REQUIRED"),
    bigquery.SchemaField("crime_against_society", "FLOAT", mode="REQUIRED"),
    bigquery.SchemaField("_data_source", "STRING", mode="REQUIRED", default_value_expression="'CrimeData'"),
    bigquery.SchemaField("_load_time", "TIMESTAMP", mode="REQUIRED", default_value_expression="CURRENT_TIMESTAMP"),
]


#create_load_table_from_csv(folder, file_name, table, schema, delimiter)
create_load_table_from_csv(folder, file_name, table, schema)

Created table crime_data
[SchemaField('year', 'INTEGER', 'REQUIRED', None, None, (), None), SchemaField('state', 'STRING', 'REQUIRED', None, None, (), None), SchemaField('population', 'FLOAT', 'REQUIRED', None, None, (), None), SchemaField('crime', 'FLOAT', 'REQUIRED', None, None, (), None), SchemaField('crime_against_persons', 'FLOAT', 'REQUIRED', None, None, (), None), SchemaField('crime_against_property', 'FLOAT', 'REQUIRED', None, None, (), None), SchemaField('crime_against_society', 'FLOAT', 'REQUIRED', None, None, (), None)]
Loaded 402 rows.


Adding demographics

In [33]:
from google.cloud import bigquery

folder = "Demographics"
file_name = "*.csv"
table = "demographics"  # Fixed syntax error


schema = [
    bigquery.SchemaField("STATE", "STRING", mode="REQUIRED"),
    bigquery.SchemaField("AGE", "FLOAT", mode="NULLABLE"),
    bigquery.SchemaField("RACE", "STRING", mode="REQUIRED"),
    bigquery.SchemaField("TOTAL HOUSING UNITS", "FLOAT", mode="NULLABLE"),
    bigquery.SchemaField("_data_source", "STRING", mode="REQUIRED", default_value_expression="'Demographics'"),
    bigquery.SchemaField("_load_time", "TIMESTAMP", mode="REQUIRED", default_value_expression="CURRENT_TIMESTAMP"),
]

create_load_table_from_csv(folder, file_name, table, schema)


Created table demographics
[SchemaField('STATE', 'STRING', 'REQUIRED', None, None, (), None), SchemaField('AGE', 'FLOAT', 'NULLABLE', None, None, (), None), SchemaField('RACE', 'STRING', 'REQUIRED', None, None, (), None), SchemaField('TOTAL HOUSING UNITS', 'FLOAT', 'NULLABLE', None, None, (), None)]
Loaded 188 rows.


Load those CSVs (Unstructured Data) into Google BQ                 
Adding state_energy_consumption

In [34]:
folder = "The unstructured data/llm_text"
file_name = "*.csv"
table = "state_energy_consumption"
delimiter =","
schema = [
    bigquery.SchemaField("State","STRING", mode="REQUIRED"),
    bigquery.SchemaField("Year","INTEGER", mode="REQUIRED"),
    bigquery.SchemaField("Value","FLOAT", mode="REQUIRED"),
    bigquery.SchemaField("Sector","STRING", mode="REQUIRED"),
    bigquery.SchemaField("_data_source", "STRING", mode="REQUIRED", default_value_expression="'Unstructured Data'"),
    bigquery.SchemaField("_load_time", "TIMESTAMP", mode="REQUIRED", default_value_expression="CURRENT_TIMESTAMP"),
]
create_load_table_from_csv(folder, file_name, table, schema, delimiter)

Created table state_energy_consumption
[SchemaField('State', 'STRING', 'REQUIRED', None, None, (), None), SchemaField('Year', 'INTEGER', 'REQUIRED', None, None, (), None), SchemaField('Value', 'FLOAT', 'REQUIRED', None, None, (), None), SchemaField('Sector', 'STRING', 'REQUIRED', None, None, (), None)]
Loaded 12800 rows.


Adding state_level_unemployment_data

In [37]:
from google.cloud import bigquery

folder = "Labor market data"
file_name = "*.csv"
table = "state_level_unemployment_data"  # Fixed syntax error

schema = [
    bigquery.SchemaField("FIPS Code", "STRING", mode="REQUIRED"),
    bigquery.SchemaField("State and area", "STRING", mode="NULLABLE"),
    bigquery.SchemaField("Period", "INTEGER", mode="REQUIRED"),
    bigquery.SchemaField("Employment", "FLOAT", mode="REQUIRED"),
    bigquery.SchemaField("Unemployment", "FLOAT", mode="REQUIRED"),
    bigquery.SchemaField("_data_source", "STRING", mode="REQUIRED", default_value_expression="'Labor Market Data'"),
    bigquery.SchemaField("_load_time", "TIMESTAMP", mode="REQUIRED", default_value_expression="CURRENT_TIMESTAMP"),
]

create_load_table_from_csv(folder, file_name, table, schema)

Created table state_level_unemployment_data
[SchemaField('FIPS Code', 'STRING', 'REQUIRED', None, None, (), None), SchemaField('State and area', 'STRING', 'NULLABLE', None, None, (), None), SchemaField('Period', 'INTEGER', 'REQUIRED', None, None, (), None), SchemaField('Employment', 'FLOAT', 'REQUIRED', None, None, (), None), SchemaField('Unemployment', 'FLOAT', 'REQUIRED', None, None, (), None)]
Loaded 31111 rows.


Make every column name lower case and using underscore to connect words

In [40]:
from google.cloud import bigquery

def normalize_table_schema(project_id, dataset, table):
    client = bigquery.Client()
    table_id = f"{project_id}.{dataset}.{table}"

    table_ref = client.get_table(table_id)
    original_schema = table_ref.schema


    new_schema = [
        bigquery.SchemaField(field.name.lower().replace(" ", "_"), field.field_type)
        for field in original_schema
    ]


    temp_table_id = f"{project_id}.{dataset}.{table}_temp"


    try:
        client.delete_table(temp_table_id)
        print(f"Deleted existing temp table {temp_table_id}")
    except:
        pass

    temp_table = bigquery.Table(temp_table_id, schema=new_schema)
    client.create_table(temp_table)
    print(f"Created new table {temp_table_id}")

    copy_query = f"""
    INSERT INTO `{temp_table_id}`
    SELECT * FROM `{table_id}`
    """
    client.query(copy_query).result()
    print(f"Copied data to {temp_table_id}")

    client.delete_table(table_id)
    print(f"Deleted old table {table_id}")

    rename_query = f"""
    ALTER TABLE `{temp_table_id}` RENAME TO `{table}`
    """
    client.query(rename_query).result()
    print(f"Renamed {temp_table_id} to {table}")

    print(f"Table {table} schema normalized successfully.")

client = bigquery.Client()
tables = client.list_tables(f"{project_id}.{dataset}")
project_id = "steady-service-448416-k2"
dataset = "finance_economics_events_raw"

for table in tables:
    table_name = table.table_id
    print(f"Processing table: {table_name}")
    normalize_table_schema(project_id, dataset, table_name)


Processing table: crime_data
Created new table steady-service-448416-k2.finance_economics_events_raw.crime_data_temp
Copied data to steady-service-448416-k2.finance_economics_events_raw.crime_data_temp
Deleted old table steady-service-448416-k2.finance_economics_events_raw.crime_data
Renamed steady-service-448416-k2.finance_economics_events_raw.crime_data_temp to crime_data
Table crime_data schema normalized successfully.
Processing table: demographics
Created new table steady-service-448416-k2.finance_economics_events_raw.demographics_temp
Copied data to steady-service-448416-k2.finance_economics_events_raw.demographics_temp
Deleted old table steady-service-448416-k2.finance_economics_events_raw.demographics
Renamed steady-service-448416-k2.finance_economics_events_raw.demographics_temp to demographics
Table demographics schema normalized successfully.
Processing table: gdp
Created new table steady-service-448416-k2.finance_economics_events_raw.gdp_temp
Copied data to steady-service-4