In [None]:
from api_library import StorageConnect
from api_library import BQConnect
from google.cloud import bigquery
from io import StringIO
import pandas as pd
import logging

logging.basicConfig(level=logging.INFO)

def bq_laod():
    # Initiate the StorageConnect object
    storage_conn = StorageConnect()

    # Get normalized.csv
    try:
        normalized_string_data = storage_conn.sm_normalized_responses_blob.download_as_text()
        if normalized_string_data:
            logging.info(f'Retrieved normalized responses csv from blob: {storage_conn.sm_normalized_responses_blob.name}, project: {storage_conn.project_id}')
        else: 
            raise RuntimeError('Downloaded file is empty')
    except Exception as e:
        logging.error(f"Failed to retrieve normalized responses CSV from blob: {storage_conn.sm_normalized_responses_blob.name}, project: {storage_conn.project_id}. Error: {e}", exc_info=True)

    # create dataframe
    normalized_df = pd.read_csv(StringIO(normalized_string_data))

    # Initiate the BigQueryConnect object
    bq_conn = BQConnect()

    # Fixing datatypes
    normalized_df['date'] = pd.to_datetime(normalized_df['date'], errors='coerce')

    str_columns = [ "choice_id", "row_id", "choice_metadata.weight", "question_id",
        "respondent_id", "collector_id", "survey_id", "const_id",
        "email", "performance_code", "production_name", "tag_data",
        "text", "other_id"]

    normalized_df[str_columns] = normalized_df[str_columns].astype(str)

    # define project_id, dataset_id and table_id
    dataset_id = bq_conn.dataset_pipeline
    table_id = f'{dataset_id}.raw_sm_responses'

    # renaming columns
    normalized_df.rename(columns={
        'choice_metadata.weight': 'choice_metadata_weight'
    }, inplace=True)

    # configure the specific table we're sending the df to in bigquery
    job_config = bigquery.LoadJobConfig(
        schema=[
            bigquery.SchemaField("choice_id", "STRING"),
            bigquery.SchemaField("row_id", "STRING"),
            bigquery.SchemaField("choice_metadata_weight", "STRING"),
            bigquery.SchemaField("question_id", "STRING"),
            bigquery.SchemaField("respondent_id", "STRING"),
            bigquery.SchemaField("collector_id", "STRING"),
            bigquery.SchemaField("survey_id", "STRING"),
            bigquery.SchemaField("date", "TIMESTAMP"),
            bigquery.SchemaField("const_id", "STRING"),
            bigquery.SchemaField("email", "STRING"),
            bigquery.SchemaField("performance_code", "STRING"),
            bigquery.SchemaField("production_name", "STRING"),
            bigquery.SchemaField("tag_data", "STRING"),
            bigquery.SchemaField("text", "STRING"),
            bigquery.SchemaField("other_id", "STRING"),
        ],
        write_disposition="WRITE_TRUNCATE"
    )

    # load the table to bigquery
    load_job = bq_conn.client.load_table_from_dataframe(
        normalized_df, table_id, job_config=job_config
    )

    logging.info(f'Loading normalized responses to BigQuery, table: {table_id}')

    # wait for the result
    load_job.result()  

    # Check job status and log
    if load_job.state == 'DONE':
        if load_job.error_result:
            logging.error(f"Job failed with error: {load_job.error_result}")
        else:
            logging.info(f"Job completed successfully with {load_job.output_rows} rows loaded.")
    else:
        logging.warning(f"Job state: {load_job.state}")
        
        
        
bq_laod()



INFO:root:GCP credentials found in environment variable: C:\Users\edohner\OneDrive - Lyric Opera of Chicago\Desktop\Python Learning\airflow_test_project\gcp_service_account.json
INFO:root:GCP credentials JSON loaded successfully.
INFO:root:Using project ID from JSON credentials: dbt-test-449821
INFO:root:Retrieved normalized responses csv from blob: normalized_data.csv, project: dbt-test-449821
INFO:root:Loading normalized responses to BigQuery, table: dbt-test-449821.pipeline.raw_sm_responses
INFO:root:Job completed successfully with 56 rows loaded.
