In [1]:
from processor.processor import Processor as processor
from database.adatabase import ADatabase
from xgboost import XGBRegressor
from statistics import mean
import math
import pandas as pd
import numpy as np
from scipy.optimize import minimize
import matplotlib.pyplot as plt
from datetime import datetime, timedelta
from tqdm import tqdm
from dotenv import load_dotenv
from asset.stock import Stock
from asset.bond import Bond
from asset.option import Option
import warnings
import pytz
import copy
warnings.simplefilter(action="ignore")
import pickle

In [2]:
market = ADatabase("market")
sec = ADatabase("sec")
market = ADatabase("market")
fred = ADatabase("fred")
db = ADatabase("sapling")

In [3]:
sp500 = pd.read_html("https://en.wikipedia.org/wiki/List_of_S%26P_500_companies",attrs={"id":"constituents"})[0].rename(columns={"Symbol":"ticker"})

In [14]:
fred.connect()
market_yield = fred.retrieve("market_yield")
market_yield = market_yield.rename(columns={"value":"rf"})
market_yield["rf"] = market_yield["rf"].replace(".",np.nan)
market_yield.dropna(inplace=True)
market_yield["rf"] = [float(x)/100 for x in market_yield["rf"]]
market_yield["date"] = market_yield["date"].shift(-5)
market_yield = processor.column_date_processing(market_yield)
spy = fred.retrieve("sp500")
spy = spy.rename(columns={"value":"spy"})
spy["spy"] = spy["spy"].replace(".",np.nan)
spy.dropna(inplace=True)
spy["spy"] = [float(x) for x in spy["spy"]]
spy["date"] = spy["date"].shift(-5)
spy = processor.column_date_processing(spy)
fred.disconnect()

In [15]:
prices = []
market.connect()

for ticker in tqdm(sp500["ticker"].unique()):
    try:
        price = processor.column_date_processing(market.query("prices",{"ticker":ticker}))
        price.sort_values("date",inplace=True)
        price = price.merge(spy[["date","spy"]],on="date",how="left")
        price = price.merge(market_yield[["date","rf"]],on="date",how="left")
        price = price.merge(sp500[["ticker","GICS Sector"]],on="ticker",how="left")
        price = price.dropna()
        price["sigma"] = price["adjclose"].pct_change(65)
        price["covar"] = price["spy"].rolling(100).cov(price["adjclose"])
        price["var"] = price["spy"].rolling(100).var()
        price["beta"] = price["covar"] / price["var"]
        price["prediction"] = price["beta"] * (price["adjclose"])
        price["excess_return"] = (price["prediction"] - price["adjclose"]) / price["adjclose"]
        prices.append(price)
    except Exception as e:
        print(ticker,str(e))
        continue
market.disconnect()

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 503/503 [00:05<00:00, 95.71it/s]


In [16]:
sim = pd.concat(prices)[["date","ticker","GICS Sector","prediction"]]
sim = sim.dropna()


Unnamed: 0,date,ticker,GICS Sector,prediction
248,2024-07-11,MMM,Industrials,1.795164
248,2024-07-11,AOS,Industrials,-0.334330
248,2024-07-11,ABT,Health Care,-2.501436
248,2024-07-11,ABBV,Health Care,-1.938827
248,2024-07-11,ACN,Information Technology,-36.689206
...,...,...,...,...
248,2024-07-11,XYL,Industrials,2.684741
248,2024-07-11,YUM,Consumer Discretionary,-1.752682
248,2024-07-11,ZBRA,Information Technology,18.555919
248,2024-07-11,ZBH,Health Care,-4.300176


In [18]:
db.cloud_connect()
db.drop("sim")
db.store("sim",sim[sim["date"]==sim["date"].max()])
db.disconnect() 