# Import Libraries

In [None]:
# import os

# os.chdir('..')
# print(os.getcwd())

In [3]:
from scripts.functions.data_ingestion import get_base_data

ModuleNotFoundError: No module named 'functions'

In [5]:
from .scripts import *

ImportError: attempted relative import with no known parent package

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import pandas_gbq

import tensorflow as tf
from sklearn.preprocessing import StandardScaler

from scripts.functions.data_ingestion import *
from src.parameters import *

from google.cloud import bigquery
from google.cloud.exceptions import NotFound

from tqdm import tqdm
import yfinance as yf
import lxml

# Get List of Tickers

In [None]:
df_sp500 = get_base_data('https://en.wikipedia.org/wiki/List_of_S%26P_500_companies')
df_sp500[:5]

# Create Dataset with Features

In [None]:
def calculate_annualized_returns(ticker_symbol, df_input, period="5y"):
    """Calculate annualized returns for a given ticker over a specified period.
    Args:
        ticker (yfinance.Ticker): The ticker object for the stock.
        period (str): The period over which to calculate returns (default is "5y").
    Returns:
        pd.Series: A series of annualized returns.
    """

    try:
        ticker = yf.Ticker(ticker_symbol)

        # Get historical market data
        hist = ticker.history(period=period).reset_index()
        hist['Date'] = pd.to_datetime(hist['Date'])
        hist['Year'] = hist['Date'].dt.year
        current_year = hist['Year'].max()    

        
        # average annualized return
        annualized_return = ((hist['Close'].iloc[-1] / hist['Close'].iloc[0]) ** (1 / (current_year - hist['Year'].min())) - 1) * 100
        df_input.loc[df_input['Ticker'] == ticker_symbol, 'Annualized_Return'] = np.round(annualized_return, 2)

        # Calculate Percent Returns
        for year in hist['Year'].sort_values(ascending=False).unique():
            if year == current_year:
                ytd_return = ((hist[hist['Year'] == year]['Close'].iloc[-1] - hist[hist['Year'] == year]['Close'].iloc[0]) / hist[hist['Year'] == year]['Close'].iloc[0]) * 100
                df_input.loc[df_input['Ticker'] == ticker_symbol, 'YTD_Pct_Return'] = np.round(ytd_return, 2)
            elif year < current_year:
                annual_return = ((hist[hist['Year'] == year]['Close'].iloc[-1] - hist[hist['Year'] == year]['Close'].iloc[0]) / hist[hist['Year'] == year]['Close'].iloc[0]) * 100
                df_input.loc[df_input['Ticker'] == ticker_symbol, f'{year}_Pct_Return'] = np.round(annual_return, 2)
        
        # Get market cap
        df_input.loc[df_input['Ticker'] == ticker_symbol, 'Market_Cap'] = ticker.info.get('marketCap', np.nan)
        
        # 24 Hour Change
        hist['24_Hour_Change'] = hist['Close'].pct_change(periods=1) * 100
        df_input.loc[df_input['Ticker'] == ticker_symbol, '24_Hour_Change'] = np.round(hist['24_Hour_Change'].iloc[-1], 2)

        # 7 day Change
        hist['7_Day_Change'] = hist['Close'].pct_change(periods=7) * 100
        df_input.loc[df_input['Ticker'] == ticker_symbol, '7_Day_Change'] = np.round(hist['7_Day_Change'].iloc[-1], 2)

        # 30 Day Change
        hist['30_Day_Change'] = hist['Close'].pct_change(periods=30) * 100
        df_input.loc[df_input['Ticker'] == ticker_symbol, '30_Day_Change'] = np.round(hist['30_Day_Change'].iloc[-1], 2)
        
        # Calculate 200 Day Moving Average & Pct Difference from it
        hist['200_MA'] = hist['Close'].rolling(window=200).mean()
        hist['Pct_Diff_200_MA'] = ((hist['Close'] - hist['200_MA']) / hist['200_MA']) * 100
        df_input.loc[df_input['Ticker'] == ticker_symbol, 'Pct_Diff_200_MA'] = np.round(hist['Pct_Diff_200_MA'].iloc[-1], 2)
        
        # Calculate Volatility
        hist['Daily_Return'] = hist['Close'].pct_change()
        mean_daily_return = hist['Daily_Return'].mean()
        volatility = (((hist['Daily_Return'] - mean_daily_return) ** 2).mean() ** 0.5) * np.sqrt(252)  # Annualize the volatility
        df_input.loc[df_input['Ticker'] == ticker_symbol, 'Annualized_Volatility'] = np.round(volatility, 2)

        # Calculate Sharpe Ratio
        risk_free_rate = 0.01  # Assuming a risk-free rate of 1%
        sharpe_ratio = (annualized_return - risk_free_rate) / volatility
        df_input.loc[df_input['Ticker'] == ticker_symbol, 'Sharpe_Ratio'] = np.round(sharpe_ratio, 2)

        # Calculate Beta
        benchmark_ticker = '^GSPC'  # S&P 500 as benchmark
        benchmark = yf.Ticker(benchmark_ticker)
        benchmark_hist = benchmark.history(period=period).reset_index()
        benchmark_hist['Date'] = pd.to_datetime(benchmark_hist['Date'])
        benchmark_hist['Daily_Return'] = benchmark_hist['Close'].pct_change()
        hist = hist.merge(benchmark_hist[['Date', 'Daily_Return']], on='Date', suffixes=('', '_Benchmark'))
        covariance = hist['Daily_Return'].cov(hist['Daily_Return_Benchmark'])
        benchmark_variance = hist['Daily_Return_Benchmark'].var()
        beta = covariance / benchmark_variance if benchmark_variance != 0 else np.nan
        df_input.loc[df_input['Ticker'] == ticker_symbol, 'Beta'] = np.round(beta, 2)

        # Years since founded
        df_input.loc[df_input['Ticker'] == ticker_symbol, 'Years_Since_Founded'] = current_year - int(df_input[df_input['Ticker'] == ticker_symbol]['Founded'].max()[:4])

        return df_input
    
    except Exception as e:
        print(f"Error processing {ticker_symbol}: {e}")
        return df_input



In [None]:
for tickers in tqdm(df_sp500['Ticker'].unique().tolist()):
    df_sp500 = calculate_annualized_returns(ticker_symbol=tickers, df_input=df_sp500, period="5y")

df_sp500 = df_sp500.sort_values('Market_Cap', ascending=False).reset_index(drop=True)
df_sp500

# Save to BigQuery

In [None]:
# Insert values in a table
save_table_to_bigquery(df=df_sp500, dataset_id=dataset_id, table_id=table_id)

# Load from BigQuery

In [None]:
df_sp500 = load_table_from_bigquery(dataset_id=dataset_id, table_id=table_id, project_id=PROJECT_ID)
df_sp500