In [276]:
import pandas as pd
from database.adatabase import ADatabase
from xgboost import XGBRegressor
import matplotlib.pyplot as plt
from tqdm import tqdm
from processor.processor import Processor as p
from asset.exposure import Exposure
from equations.capm import CAPM
from sklearn.preprocessing import normalize

In [277]:
sec = ADatabase("sec")
market = ADatabase("market")
fred = ADatabase("fred")

In [278]:
market.connect()
index = market.retrieve("sp500")
market.disconnect()
factors = ["assets","liabilities","reference_price"]
fred.connect()
sp500 = p.column_date_processing(fred.retrieve("sp500_projections").rename(columns={"prediction":"sp500_prediction"}))
interest_rates = p.column_date_processing(fred.retrieve("market_yield").rename(columns={"value":"rf"}))
interest_rates["rf"] = interest_rates["rf"].replace(".",0)
interest_rates["rf"] = [float(x) / 100 for x in interest_rates["rf"]]
fred.disconnect
macro = sp500.merge(interest_rates,on="date",how="left")

In [279]:
market.connect()
sec.connect()
factors_df = []
for row in tqdm(index.iterrows()):
    try:
        ticker = row[1]["ticker"]
        cik = int(row[1]["CIK"])
        price = p.column_date_processing(market.query("prices",{"ticker":ticker}))
        price.sort_values("date")
        price["year"] = [x.year for x in price["date"]]
        price["quarter"] = [x.quarter for x in price["date"]]
        filings = p.column_date_processing(sec.query("filings",{"cik":cik}))
        filings["year"] = [x.year + 1 for x in filings["date"]]
        filings["quarter"] = [x.quarter for x in filings["date"]]
        # filings["year"] = [x.year if x.quarter < 4 else x.year + 1 for x in filings["date"]]
        # filings["quarter"] = [x.quarter + 1 if x.quarter < 4 else 1 for x in filings["date"]]
        ticker_factors = filings.drop(["date","cik"],axis=1).groupby(["year","quarter"]).mean().reset_index()
        price = price.drop(["date","ticker"],axis=1).groupby(["year","quarter"]).mean().reset_index()
        ticker_factors = ticker_factors.merge(price,on=["year","quarter"],how="left")
        ticker_factors["ticker"] = ticker
        ticker_factors["reference_price"] = ticker_factors["adjclose"].shift(1)
        ticker_factors["y"] = ticker_factors["adjclose"].shift(-1)
        factors_df.append(ticker_factors[["year","quarter","ticker","adjclose","y"]+factors])
    except Exception as e:
        print(ticker,str(e))
        continue
sec.disconnect()
market.disconnect()
factors_df = pd.concat(factors_df).sort_values(["year","quarter"])

27it [00:01, 20.12it/s]

AMTM 'date'


64it [00:03, 18.69it/s]

BRK.B 'date'


66it [00:04, 18.27it/s]

BLK 'date'


79it [00:04, 18.67it/s]

BF.B 'date'


502it [00:31, 15.99it/s]

ZTS 'date'





In [280]:
factors_df.sort_values(["ticker","year","quarter"])

Unnamed: 0,year,quarter,ticker,adjclose,y,assets,liabilities,reference_price
0,2014,1,A,37.984034,37.120989,1.059450e+10,5.326500e+09,
1,2014,2,A,37.120989,37.846924,1.056150e+10,5.315000e+09,37.984034
2,2014,3,A,37.846924,37.250024,1.040700e+10,5.419500e+09,37.120989
3,2014,4,A,37.250024,37.340297,1.061100e+10,5.374000e+09,37.846924
4,2015,1,A,37.340297,38.407948,1.066200e+10,5.294000e+09,37.250024
...,...,...,...,...,...,...,...,...
42,2024,3,ZBRA,335.879531,387.112069,7.492500e+09,4.630000e+09,304.088016
43,2024,4,ZBRA,387.112069,,7.430000e+09,4.557000e+09,335.879531
44,2025,1,ZBRA,,,7.417500e+09,4.533000e+09,387.112069
45,2025,2,ZBRA,,,7.282500e+09,4.180000e+09,


In [281]:
model = XGBRegressor()
factors_df = factors_df.dropna(subset=factors)
training_data = factors_df[(factors_df["year"]>=2013) & (factors_df["year"]<2020)].dropna()
sim = factors_df[(factors_df["year"]>=2019)].dropna(subset=factors)
model.fit(training_data[factors],training_data["y"])
sim["prediction"] = model.predict(sim[factors])
sim = sim[["year","quarter","ticker","prediction"]]

In [282]:
market.connect()
prices = []
for ticker in tqdm(index["ticker"]):
    try:
        price = p.column_date_processing(market.query("prices",{"ticker":ticker}))
        price.sort_values("date",inplace=True)
        price["year"] = [x.year for x in price["date"]]
        price["quarter"] = [x.quarter for x in price["date"]]
        price.sort_values("date",inplace=True)
        price = price.merge(sim[["year","quarter","ticker","prediction"]],on=["year","quarter","ticker"],how="left").merge(macro,on="date",how="left")
        price["factor"] = price["prediction"] / price["adjclose"] - 1
        # price = CAPM.apply(price)
        prices.append(price)
    except Exception as e:
        print(str(e))
        continue
market.disconnect()

 13%|███████████████████▏                                                                                                                                     | 63/502 [00:03<00:32, 13.68it/s]

'date'


 16%|███████████████████████▊                                                                                                                                 | 78/502 [00:04<00:23, 18.28it/s]

'date'


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 502/502 [00:29<00:00, 17.22it/s]

'date'





In [283]:
simulation = pd.concat(prices).merge(index,on="ticker",how="left")

In [284]:
tickers = list(simulation[simulation["year"]==2020]["ticker"].unique())
simulation = simulation[simulation["ticker"].isin(tickers)]

In [285]:
simulation["rank"] = simulation.groupby(["date","GICS Sector"])["factor"].rank(method="dense",ascending=False).astype(int)

IntCastingNaNError: Cannot convert non-finite values (NA or inf) to integer

In [None]:
simulation["exposure"] = [1 if x <= simulation["rank"].max() or x > simulation["rank"].max() * 0.90 else 1.0/6.0 for x in simulation["rank"]]

In [None]:
simulation["projected"] = simulation["factor"] * simulation["exposure"]

In [None]:
simulation["sell_price"] = simulation["adjclose"]
simulation.sort_values("date",inplace=True)
trades = simulation[["year","quarter","GICS Sector","ticker","projected","exposure","adjclose","sell_price"]].groupby(["year","quarter","ticker"]).agg({"GICS Sector":"first","projected":"first","exposure":"first","adjclose":"first","sell_price":"last"}).reset_index()
trades = trades.sort_values("projected",ascending=False).groupby(["year","quarter","GICS Sector"]).first().reset_index()
trades["return"] = (trades["sell_price"] - trades["adjclose"])/trades["adjclose"]
trades["return"] = trades["return"] * trades["exposure"]
analysis = trades[["year","quarter","exposure","return"]].groupby(["year","quarter","exposure"]).mean().pivot_table(index=["year","quarter"],columns="exposure",values="return").reset_index()

In [None]:
trades.sort_values("return",ascending=False).head(40)

In [None]:
trades.iloc[-505:].to_csv("current.csv")

In [None]:
analysis['Year-Quarter'] = analysis["year"].astype(str) + "q" + analysis["quarter"].astype(str)

# Reset index for plotting
analysis.drop(["year","quarter"],axis=1,inplace=True)
analysis.reset_index(drop=True, inplace=True)

# Plot a bar chart
analysis.set_index("Year-Quarter").plot(
    kind="bar",
    figsize=(12, 6),
    title="Mean Returns by Exposure per Year and Quarter"
)

# Customize the plot
plt.xlabel("Year, Quarter")
plt.ylabel("Mean Return")
plt.xticks(rotation=45, ha='right')  # Rotate x-axis labels for better readability
plt.legend(title="Exposure")
plt.tight_layout()

# Show the plot
plt.show()


In [None]:
macro["year"] = [x.year for x in macro["date"]]
macro["quarter"] = [x.quarter for x in macro["date"]]
benchmark = macro.drop(["date","realtime_start","realtime_end"],axis=1).groupby(["year","quarter"]).mean().reset_index()
benchmark["sp500"] = benchmark["sp500"] / benchmark["sp500"].iloc[0] - 1
benchmark["rf"] = benchmark["rf"] / benchmark["rf"].iloc[0] - 1
benchmark["sp500_prediction"] = benchmark["sp500_prediction"] / benchmark["sp500_prediction"].iloc[0] - 1

In [None]:
benchmark

In [None]:
portfolio = trades[["year","quarter","return"]].groupby(["year","quarter"]).mean().reset_index().sort_values(["year","quarter"])
portfolio["return"] = portfolio["return"] + 1
portfolio["return"] = portfolio["return"].cumprod() - 1
portfolio['Year-Quarter'] = portfolio["year"].astype(str) + "q" + portfolio["quarter"].astype(str)
portfolio = portfolio.merge(benchmark,on=["year","quarter"],how="left")

In [None]:
portfolio.drop(["year","quarter"],axis=1,inplace=True)
portfolio.set_index("Year-Quarter").plot(
    kind="line",
    figsize=(12, 6),
    title="Mean Returns by Exposure per Year and Quarter"
)

In [None]:
trades.groupby(["year","quarter","GICS Sector"]).agg({"return":"mean"}).tail(20)

In [None]:
sim