In [None]:
######## SENTIMENT TABLE AS DATAFRAME ################
from google.cloud import bigquery

# Initialize BigQuery client
client = bigquery.Client()

# Define your table path
table_path = "sentiment-lewagon.sentiment_db.SENTIMENT"

# Load the table into a DataFrame
df = client.list_rows(table_path).to_dataframe()

df = df.drop_duplicates()

# View the first few rows
df


In [None]:
import pandas as pd

# Example: Assuming df is already your DataFrame
# and it has these three columns:
# 'count_positive_chunks', 'count_negative_chunks', 'count_neutral_chunks'

def determine_sentiment(row):
    max_val = max(row['count_positive_chunks'], row['count_negative_chunks'], row['count_neutral_chunks'])
    if row['count_positive_chunks'] == max_val:
        return 'positive'
    elif row['count_negative_chunks'] == max_val:
        return 'negative'
    else:
        return 'neutral'

# Apply the function row-wise
df['overall_sentiment'] = df.apply(determine_sentiment, axis=1)

In [None]:
########### ADD NEW COLUMNS ###################
total = df[['count_positive_chunks', 'count_negative_chunks', 'count_neutral_chunks']].sum(axis=1)
total = total.replace(0, np.nan)  # Avoid division by zero
df['count_pos_over_total_count'] = df['count_positive_chunks'] / total
df['count_neg_over_total_count'] = df['count_negative_chunks'] / total
df['count_neut_over_total_count'] = df['count_neutral_chunks'] / total
df['net_sentiment'] = ((df['count_positive_chunks'] -df['count_negative_chunks']) / total

In [None]:
from google.cloud import bigquery

def upload_sentiment_clean_to_bq_(df):
    """
    Uploads parsed MDA data from EDGAR filings to BigQuery.
    Expects columns: cik, filename, management_discussion
    """
    # Rename columns to lowercase
    df.columns = df.columns.str.lower()


    try:
        # Keep only the required columns

        BQ_PROJECT_ID = 'sentiment-lewagon'
        BQ_DATASET_ID = 'sentiment_db'
        BQ_TABLE_ID = 'sentiment_clean_v2'
        table_ref = f"{BQ_PROJECT_ID}.{BQ_DATASET_ID}.{BQ_TABLE_ID}"

        client = bigquery.Client()

        job_config = bigquery.LoadJobConfig(
            write_disposition="WRITE_APPEND"
        )

        job = client.load_table_from_dataframe(df, table_ref, job_config=job_config)
        job.result()

        print(f"✅ Uploaded {job.output_rows} rows to {table_ref}")

    except Exception as e:
        print(f"❌ Failed to upload DataFrame to BigQuery: {e}")

In [None]:
upload_sentiment_clean_to_bq_(df)