In [1]:
from processor.processor import Processor as processor
from database.adatabase import ADatabase
from xgboost import XGBRegressor
from statistics import mean
import math
import pandas as pd
import numpy as np
from scipy.optimize import minimize
import matplotlib.pyplot as plt
from datetime import datetime, timedelta
from tqdm import tqdm
from dotenv import load_dotenv
from asset.stock import Stock
from asset.bond import Bond
from asset.option import Option
import warnings
import pytz
import copy
warnings.simplefilter(action="ignore")
import pickle

In [2]:
market = ADatabase("market")
sec = ADatabase("sec")
market = ADatabase("market")
fred = ADatabase("fred")
db = ADatabase("sapling")

In [3]:
sp500 = pd.read_html("https://en.wikipedia.org/wiki/List_of_S%26P_500_companies",attrs={"id":"constituents"})[0].rename(columns={"Symbol":"ticker"})

In [4]:
fred.connect()
spy = fred.retrieve("sp500")
spy = spy.rename(columns={"value":"spy"})
spy["spy"] = spy["spy"].replace(".",np.nan)
spy.dropna(inplace=True)
spy["spy"] = [float(x) for x in spy["spy"]]
spy["date"] = spy["date"].shift(-5)
spy = processor.column_date_processing(spy)
fred.disconnect()

In [10]:
prices = []
market.connect()

for ticker in tqdm(sp500["ticker"].unique()):
    try:
        price = processor.column_date_processing(market.query("prices",{"ticker":ticker}))
        price.sort_values("date",inplace=True)
        price = price.merge(spy[["date","spy"]],on="date",how="left")
        price = price.merge(sp500[["ticker","GICS Sector"]],on="ticker",how="left")
        price["covar"] = price["spy"].cov(price["adjclose"])
        price["var"] = price["spy"].var()
        price["beta"] = price["covar"] / price["var"]
        price["prediction"] = price["beta"] * (price["adjclose"])
        price["excess_return"] = (price["prediction"] - price["adjclose"]) / price["adjclose"]
        prices.append(price)
    except Exception as e:
        print(ticker,str(e))
        continue
market.disconnect()

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 503/503 [00:04<00:00, 112.64it/s]


In [11]:
sim = pd.concat(prices)[["date","ticker","GICS Sector","prediction"]]
sim = sim.dropna()


In [12]:
sim[sim["date"]==sim["date"].max()]

Unnamed: 0,date,ticker,GICS Sector,prediction
250,2024-07-15,MMM,Industrials,0.167063
250,2024-07-15,AOS,Industrials,1.332662
250,2024-07-15,ABT,Health Care,0.865061
250,2024-07-15,ABBV,Health Care,4.438537
250,2024-07-15,ACN,Information Technology,1.091689
...,...,...,...,...
250,2024-07-15,XYL,Industrials,5.727000
250,2024-07-15,YUM,Consumer Discretionary,1.419437
250,2024-07-15,ZBRA,Information Technology,23.761538
250,2024-07-15,ZBH,Health Care,0.261558


In [13]:
db.cloud_connect()
db.drop("sim")
db.store("sim",sim[sim["date"]==sim["date"].max()])
db.disconnect() 