# Import Libraries

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import pandas_gbq

from sklearn.preprocessing import StandardScaler

from google.cloud import bigquery
from google.cloud.exceptions import NotFound

from tqdm import tqdm
import yfinance as yf

# Query from CSV

In [2]:
df_enriched_stock_data = pd.read_csv('/Users/ani/Projects/6_stock_portfolio_recommendation/data//enriched_stock_data.csv')
df_enriched_stock_data

Unnamed: 0,Ticker,Closing_Price,All_Time_High,Percent_From_All_Time_High,Percent_Difference_200_Day_Moving_Average,24_Hour_Percent_Change,7_Day_Percent_Change,30_Day_Percent_Change,Annualized_Return,YTD_Return,...,Sector,Industry,Country,Business_Summary,Dividend_Yield,Trailing_PE,Forward_PE,Average_Volume,Average_Volume_10days,52_Week_Change
0,MSFT,487.27,487.27,0.00,16.05,2.07,3.10,11.41,16.65,16.86,...,Technology,Software - Infrastructure,United States,Microsoft Corporation develops and supports so...,0.70,37.656105,32.593310,22828664.0,18533110.0,0.066411
1,NVDA,144.28,149.41,-3.43,12.52,0.30,1.02,22.94,57.59,4.33,...,Technology,Semiconductors,United States,"NVIDIA Corporation, a computing infrastructure...",0.03,46.392320,35.019444,251213522.0,173200140.0,0.217932
2,AAPL,201.95,258.40,-21.85,-9.56,0.47,1.59,2.39,14.62,-16.98,...,Technology,Consumer Electronics,United States,"Apple Inc. designs, manufactures, and markets ...",0.52,31.495320,24.294222,61159585.0,55367100.0,-0.034304
3,AMZN,209.62,242.06,-13.40,2.70,-0.03,-1.68,9.13,7.19,-4.81,...,Consumer Cyclical,Internet Retail,United States,"Amazon.com, Inc. engages in the retail sale of...",0.00,34.122150,34.066666,48931716.0,39057090.0,0.129978
4,GOOGL,163.90,205.89,-20.39,-4.40,-1.64,-7.58,6.37,14.49,-13.27,...,Communication Services,Internet Content & Information,United States,Alphabet Inc. offers various products and plat...,0.50,18.278538,18.298939,40432375.0,36300310.0,-0.070193
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
496,ALB,56.83,313.32,-81.86,-29.27,0.30,-12.11,-0.42,-3.84,-32.51,...,Basic Materials,Specialty Chemicals,United States,Albemarle Corporation provides energy storage ...,2.86,0.000000,30.885870,3520533.0,3122920.0,-0.412241
497,IVZ,14.88,24.58,-39.45,-7.66,0.48,-1.26,2.26,8.32,-13.58,...,Financial Services,Asset Management,United States,Invesco Ltd. is a publicly owned investment ma...,5.67,11.904560,7.873386,5743362.0,5694980.0,-0.032658
498,MHK,102.00,229.74,-55.60,-18.31,2.48,-2.59,-2.96,-0.30,-12.01,...,Consumer Cyclical,"Furnishings, Fixtures & Appliances",United States,"Mohawk Industries, Inc. designs, manufactures,...",0.00,13.333333,9.147983,800620.0,908320.0,-0.117407
499,CZR,28.26,119.49,-76.35,-16.54,0.39,1.29,1.55,-7.11,-13.29,...,Consumer Cyclical,Resorts & Casinos,United States,"Caesars Entertainment, Inc. operates as a gami...",0.00,0.000000,21.089552,5669772.0,5448770.0,-0.278020


# Save to BigQuery

In [3]:
def create_schema(df):
    """Creates a BigQuery schema based on the DataFrame's columns and their data types.
    Args:
        df (pd.DataFrame): The DataFrame for which to create the schema.
    Returns:
        list: A list of bigquery.SchemaField objects representing the schema.
    """

    schema = []
    for col in df.columns:
        if df[col].dtype == 'float64':
            schema.append(bigquery.SchemaField(col, 'FLOAT'))
        elif df[col].dtype == 'int64':
            schema.append(bigquery.SchemaField(col, 'INTEGER'))
        else:
            schema.append(bigquery.SchemaField(col, 'STRING'))
    
    print(f"Created schema with {len(schema)} fields.")
    return schema
    
def save_table_to_bigquery(df, dataset_id, table_id):
    client = bigquery.Client()
    table_ref = client.dataset(dataset_id).table(table_id)
    
    # If the table does not exist, create it
    try:
        client.get_table(table_ref)
        print(f"Table {table_id} already exists in dataset {dataset_id}.")
        # delete the existing table
        client.delete_table(table_ref)
        print(f"Table {table_id} deleted from dataset {dataset_id}.")
    except NotFound:
        schema = create_schema(df)
        table = bigquery.Table(table_ref, schema=schema)
        table = client.create_table(table)
    print(f"Table {table_id} created in dataset {dataset_id}.")
    
    # Insert the DataFrame into the BigQuery table
    job = client.load_table_from_dataframe(df, table_ref)
    job.result()  # Wait for the job to complete

def load_table_from_bigquery(dataset_id, table_id, project_id):
    """Load a table from BigQuery."""

    query = f"SELECT * FROM `{dataset_id}.{table_id}`"
    df = pandas_gbq.read_gbq(query, project_id=project_id)
    return df

In [4]:
# Insert values in a table
save_table_to_bigquery(df=df_enriched_stock_data, dataset_id='stock_data', table_id='stock_data_test')

Table stock_data_test already exists in dataset stock_data.
Table stock_data_test deleted from dataset stock_data.
Table stock_data_test created in dataset stock_data.


# Load from BigQuery

In [5]:
df_enriched_stock_data = load_table_from_bigquery(dataset_id='stock_data', table_id='stock_data_test', project_id="capable-arbor-293714")
df_enriched_stock_data

Downloading: 100%|[32m██████████[0m|


Unnamed: 0,Ticker,Closing_Price,All_Time_High,Percent_From_All_Time_High,Percent_Difference_200_Day_Moving_Average,24_Hour_Percent_Change,7_Day_Percent_Change,30_Day_Percent_Change,Annualized_Return,YTD_Return,...,Sector,Industry,Country,Business_Summary,Dividend_Yield,Trailing_PE,Forward_PE,Average_Volume,Average_Volume_10days,52_Week_Change
0,CTVA,74.02,74.21,-0.26,20.58,0.41,2.71,10.98,19.16,32.05,...,Basic Materials,Agricultural Inputs,United States,"Corteva, Inc. operates in the agriculture busi...",0.92,44.590360,23.059189,3815208.0,5275200.0,0.393309
1,CF,96.82,111.25,-12.97,15.50,-3.52,1.66,19.76,24.07,14.17,...,Basic Materials,Agricultural Inputs,United States,"CF Industries Holdings, Inc., together with it...",1.99,12.807539,15.667476,2789909.0,4192810.0,0.327513
2,MOS,35.92,72.25,-50.28,30.95,-1.16,3.67,12.05,20.45,49.58,...,Basic Materials,Agricultural Inputs,United States,"The Mosaic Company, through its subsidiaries, ...",2.42,30.965517,14.601625,5762537.0,7347690.0,0.248368
3,VMC,259.05,291.14,-11.02,0.56,1.22,-0.79,-3.31,14.54,1.77,...,Basic Materials,Building Materials,United States,Vulcan Materials Company produces and supplies...,0.77,36.384130,28.530285,1124596.0,849750.0,0.033767
4,MLM,547.22,617.09,-11.32,2.74,1.67,0.25,1.19,17.78,7.51,...,Basic Materials,Building Materials,United States,"Martin Marietta Materials, Inc., a natural res...",0.59,31.377007,26.257920,453570.0,377270.0,-0.005451
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
496,PNW,89.54,94.52,-5.27,1.23,1.10,0.37,-2.68,7.53,8.34,...,Utilities,Utilities - Regulated Electric,United States,"Pinnacle West Capital Corporation, through its...",4.04,17.730692,19.132479,1258969.0,1416440.0,0.169550
497,ATO,155.88,161.76,-3.64,7.47,1.51,2.24,-2.79,10.20,14.01,...,Utilities,Utilities - Regulated Gas,United States,"Atmos Energy Corporation, together with its su...",2.27,21.766762,21.827732,1132853.0,794570.0,0.301135
498,NI,40.31,40.94,-1.54,8.89,2.00,2.15,1.77,13.16,12.50,...,Utilities,Utilities - Regulated Gas,United States,"NiSource Inc., an energy holding company, oper...",2.83,21.789190,21.672043,4622620.0,4381450.0,0.368421
499,AWK,143.31,175.80,-18.48,5.20,1.46,1.92,-0.85,3.87,17.12,...,Utilities,Utilities - Regulated Water,United States,"American Water Works Company, Inc., through it...",2.34,26.074680,25.114035,1440667.0,1053040.0,0.068619
