# Import Libraries

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import pandas_gbq

from sklearn.preprocessing import StandardScaler

from google.cloud import bigquery
from google.cloud.exceptions import NotFound

from tqdm import tqdm
import yfinance as yf

# Query from CSV

In [2]:
df_enriched_stock_data = pd.read_csv('/Users/ani/Projects/6_stock_portfolio_recommendation/data//enriched_stock_data.csv')
df_enriched_stock_data

Unnamed: 0,Ticker,Closing_Price,All_Time_High,Percent_From_All_Time_High,Percent_Difference_200_Day_Moving_Average,24_Hour_Percent_Change,7_Day_Percent_Change,30_Day_Percent_Change,Annualized_Return,YTD_Return,...,Industry,Country,Business_Summary,Dividend_Yield,Trailing_PE,Forward_PE,Average_Volume,Average_Volume_10days,52_Week_Change,Update_Date
0,NVDA,157.75,157.75,0.00,22.11,1.76,9.46,16.57,60.67,14.07,...,Semiconductors,United States,"NVIDIA Corporation, a computing infrastructure...",0.03,50.887100,38.288837,249322685,187835800,0.254816,2025-06-27
1,MSFT,495.94,497.45,-0.30,17.61,-0.30,3.74,9.69,17.33,18.94,...,Software - Infrastructure,United States,Microsoft Corporation develops and supports so...,0.67,38.355762,33.173244,23145869,20812190,0.112988,2025-06-27
2,AAPL,201.08,258.40,-22.18,-9.79,0.04,2.78,-5.30,14.78,-17.34,...,Consumer Electronics,United States,"Apple Inc. designs, manufactures, and markets ...",0.52,31.369736,24.197351,61975983,51950890,-0.045675,2025-06-27
3,AMZN,223.30,242.06,-7.75,8.96,2.85,3.95,6.21,8.88,1.40,...,Internet Retail,United States,"Amazon.com, Inc. engages in the retail sale of...",0.00,36.368080,36.308945,49333604,39989330,0.123519,2025-06-27
4,GOOGL,178.53,205.89,-13.29,3.89,2.88,1.47,8.09,17.04,-5.53,...,Internet Content & Information,United States,Alphabet Inc. offers various products and plat...,0.48,19.947487,19.925222,41357416,37073840,-0.047269,2025-06-27
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
495,IVZ,15.70,24.58,-36.12,-2.56,0.77,8.13,1.42,11.01,-8.82,...,Asset Management,United States,Invesco Ltd. is a publicly owned investment ma...,5.39,12.559999,8.306878,5714119,5047610,0.041444,2025-06-27
496,APA,18.56,46.75,-60.30,-10.65,0.60,-10.68,2.15,7.67,-18.45,...,Oil & Gas E&P,United States,"APA Corporation, an independent energy company...",5.42,6.652329,6.749091,9028964,10513290,-0.373302,2025-06-27
497,MHK,104.90,229.74,-54.34,-15.38,1.50,6.57,-2.20,0.83,-9.51,...,"Furnishings, Fixtures & Appliances",United States,"Mohawk Industries, Inc. designs, manufactures,...",0.00,13.712419,9.408072,803162,920930,-0.090149,2025-06-27
498,CZR,28.86,119.49,-75.85,-14.37,1.23,9.11,-5.96,-5.13,-11.45,...,Resorts & Casinos,United States,"Caesars Entertainment, Inc. operates as a gami...",0.00,0.000000,21.537313,5763859,5734720,-0.282587,2025-06-27


# Save to BigQuery

In [3]:
def create_schema(df):
    """Creates a BigQuery schema based on the DataFrame's columns and their data types.
    Args:
        df (pd.DataFrame): The DataFrame for which to create the schema.
    Returns:
        list: A list of bigquery.SchemaField objects representing the schema.
    """

    schema = []
    for col in df.columns:
        if df[col].dtype == 'float64':
            schema.append(bigquery.SchemaField(col, 'FLOAT'))
        elif df[col].dtype == 'int64':
            schema.append(bigquery.SchemaField(col, 'INTEGER'))
        else:
            schema.append(bigquery.SchemaField(col, 'STRING'))
    
    print(f"Created schema with {len(schema)} fields.")
    return schema
    
def save_table_to_bigquery(df, dataset_id, table_id):
    client = bigquery.Client()
    table_ref = client.dataset(dataset_id).table(table_id)
    
    # If the table does not exist, create it
    try:
        client.get_table(table_ref)
        print(f"Table {table_id} already exists in dataset {dataset_id}.")
        # delete the existing table
        client.delete_table(table_ref)
        print(f"Table {table_id} deleted from dataset {dataset_id}.")
    except NotFound:
        schema = create_schema(df)
        table = bigquery.Table(table_ref, schema=schema)
        table = client.create_table(table)
    print(f"Table {table_id} created in dataset {dataset_id}.")
    
    # Insert the DataFrame into the BigQuery table
    job = client.load_table_from_dataframe(df, table_ref)
    job.result()  # Wait for the job to complete

def load_table_from_bigquery(dataset_id, table_id, project_id):
    """Load a table from BigQuery."""

    query = f"SELECT * FROM `{dataset_id}.{table_id}`"
    df = pandas_gbq.read_gbq(query, project_id=project_id)
    return df

In [4]:
# Insert values in a table
save_table_to_bigquery(df=df_enriched_stock_data, dataset_id='stock_data', table_id='stock_data_test')

Table stock_data_test already exists in dataset stock_data.
Table stock_data_test deleted from dataset stock_data.
Table stock_data_test created in dataset stock_data.


# Load from BigQuery

In [5]:
df_enriched_stock_data = load_table_from_bigquery(dataset_id='stock_data', table_id='stock_data_test', project_id="capable-arbor-293714")
df_enriched_stock_data

Downloading: 100%|[32m██████████[0m|


Unnamed: 0,Ticker,Closing_Price,All_Time_High,Percent_From_All_Time_High,Percent_Difference_200_Day_Moving_Average,24_Hour_Percent_Change,7_Day_Percent_Change,30_Day_Percent_Change,Annualized_Return,YTD_Return,...,Industry,Country,Business_Summary,Dividend_Yield,Trailing_PE,Forward_PE,Average_Volume,Average_Volume_10days,52_Week_Change,Update_Date
0,CTVA,74.39,74.44,-0.07,20.44,0.12,0.24,10.33,20.21,32.71,...,Agricultural Inputs,United States,"Corteva, Inc. operates in the agriculture busi...",0.92,44.813255,23.174454,3976774,5551530,0.377456,2025-06-27
1,CF,90.99,111.25,-18.21,8.17,0.33,-11.21,6.46,24.41,7.29,...,Agricultural Inputs,United States,"CF Industries Holdings, Inc., together with it...",2.21,12.035714,14.723301,2901520,4779180,0.223556,2025-06-27
2,MOS,35.32,72.25,-51.11,27.83,0.40,-2.32,5.50,20.54,47.08,...,Agricultural Inputs,United States,"The Mosaic Company, through its subsidiaries, ...",2.50,30.448277,14.357723,5711301,5851640,0.217301,2025-06-27
3,VMC,262.18,291.14,-9.95,1.54,2.03,1.21,-1.38,15.36,3.00,...,Building Materials,United States,Vulcan Materials Company produces and supplies...,0.76,36.823032,28.874449,1139514,987340,0.033296,2025-06-27
4,MLM,550.05,617.09,-10.86,3.11,1.74,1.60,1.82,18.40,8.07,...,Building Materials,United States,"Martin Marietta Materials, Inc., a natural res...",0.58,31.503437,26.393953,461604,439500,-0.002104,2025-06-27
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
495,PNW,88.55,94.52,-6.32,0.04,-0.47,-0.33,0.28,7.16,7.14,...,Utilities - Regulated Electric,United States,"Pinnacle West Capital Corporation, through its...",4.02,17.534653,18.920942,1260314,1381100,0.164834,2025-06-27
496,ATO,152.49,161.76,-5.73,4.78,-0.28,0.12,0.71,9.88,11.53,...,Utilities - Regulated Gas,United States,"Atmos Energy Corporation, together with its su...",2.28,21.297487,21.357143,1131991,755090,0.310930,2025-06-27
497,NI,39.97,40.94,-2.37,7.54,0.91,1.76,5.52,13.13,11.55,...,Utilities - Regulated Gas,United States,"NiSource Inc., an energy holding company, oper...",2.83,21.605406,21.489248,4681225,4471860,0.374870,2025-06-27
498,AWK,137.92,175.80,-21.55,1.28,-0.36,-2.13,3.06,3.19,12.72,...,Utilities - Regulated Water,United States,"American Water Works Company, Inc., through it...",2.39,25.076363,24.196491,1414240,1102550,0.071694,2025-06-27
