In [None]:
from pathlib import Path
import numpy as np
import pandas as pd
import quantstats as qs
import statsmodels.api as sm
from linearmodels.asset_pricing import LinearFactorModel
from statsmodels.regression.rolling import RollingOLS

In [None]:
def cache(func):
    def wrapped_func(*args, **kwargs):
        table_name = func.__name__.split("_")[-1]
        parquet_path = f"./data/{table_name}.parquet"
        try:
            table = pd.read_parquet(parquet_path)
        except FileNotFoundError:
            table = func(*args, **kwargs)
            table.to_parquet(parquet_path)
        return table

    return wrapped_func

def query(sql_stmt, params, wrds_username):
    import wrds

    with wrds.Connection(wrds_username=wrds_username) as db:
        data = db.raw_sql(sql_stmt, date_cols=["date"], params=params)
    return data

def get_crsp(permnos, wrds_username):
    sql_crsp = """
    SELECT
        date,
        permno,
        openprc AS open,
        askhi AS high,
        bidlo AS low,
        prc AS close,
        vol AS volume,
        ret,
        shrout
    FROM
        crsp.dsf
    WHERE
        permno IN %(permnos)s
        AND date >= '1997-01-01'
        AND date <= '2019-12-31'
    ORDER BY
        date, permno;
    """
    params = {"permnos": permnos}
    crsp = query(sql_crsp, params, wrds_username).astype({"permno":"category"}).convert_dtypes()
    # Fill missing close prices of permno 80539
    crsp.loc[crsp.permno == 80539, "close"] = crsp.loc[crsp.permno == 80539, "close"].fillna(method="bfill")
    # Fill other missing values
    crsp = crsp.fillna({"open": crsp.close,
        "high": crsp.close,
        "low": crsp.close,
        "volume": 0,
        "ret": 0})
    # Calculate market capitalization
    crsp["cap"] = crsp.close * crsp.shrout
    # Shift market capitalization to avoid look ahead bias
    crsp["cap"] = crsp.groupby("permno").cap.shift(1)
    # Calculate market capiticalization weight
    crsp["w_cap"] = crsp.groupby("date").cap.apply(lambda x: x / x.sum())
    # Convert certain data types to float64
    crsp = crsp.astype({"ret":"float"})
    return crsp

def get_fama_french():
    """
    Get 3-factor and momentum data from Ken French data library.
    """
    import pandas_datareader as web

    # Transfrom from percentage to nominal value
    factor = web.DataReader("F-F_Research_Data_Factors_daily", "famafrench", start="1997-01-01", end="2019-12-31")[0]/100
    momentum = web.DataReader("F-F_Momentum_Factor_daily", "famafrench", start="1997-01-01", end="2019-12-31")[0]/100
    # Merge into 4 factor model
    fama_french = pd.concat([factor, momentum], axis=1)
    fama_french = fama_french.rename(columns=lambda x: x.lower().strip().replace("-", "")).rename_axis(index=str.lower)
    return fama_french

## Fama-French 4 Factors

In [None]:
path = Path("./data")
permno_path = path/"permno_selection.csv"
permnos = pd.read_csv(permno_path, dtype={"permno":"str"}).squeeze().pipe(tuple)
crsp = get_crsp(permnos, "iewaij")
fama_french = get_fama_french()

### Linear Factor Model

In [None]:
portfolio = crsp.loc[crsp.date >= "2000-01-01", ["date", "permno", "ret"]].pivot(index="date", columns="permno", values="ret")
portfolio = portfolio.sub(fama_french.loc[portfolio.index, "rf"], axis=0)
factor = fama_french.loc[portfolio.index, ["mktrf", "smb", "hml", "mom"]]
mod = LinearFactorModel(portfolios=portfolio,
                        factors=factor)
res = mod.fit()
print(res.summary)

                      LinearFactorModel Estimation Summary                      
No. Test Portfolios:                 50   R-squared:                      0.3259
No. Factors:                          4   J-statistic:                    17.683
No. Observations:                  5031   P-value                         0.9999
Date:                  Mon, Mar 28 2022   Distribution:                 chi2(46)
Time:                          14:14:52                                         
Cov. Estimator:                  robust                                         
                                                                                
                            Risk Premia Estimates                             
            Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
------------------------------------------------------------------------------
mktrf          0.0006     0.0002     3.1725     0.0015      0.0002      0.0009
smb         9.693e-05     0.0002    

### Estimate Rolling Factor Exposure

#### Daily Estimate

In [None]:
crsp = get_crsp(permnos, "iewaij")

Loading library list...
Done


In [None]:
crsp.dtypes

date      datetime64[ns]
permno          category
open             Float64
high             Float64
low              Float64
close            Float64
volume             Int64
ret              float64
shrout             Int64
cap              Float64
w_cap            Float64
dtype: object

In [None]:
portfolio = crsp.loc[crsp.date >= "1999-01-01", ["date", "permno", "ret"]].pivot(
    index="date", columns="permno", values="ret"
)
portfolio = portfolio.sub(fama_french.loc[portfolio.index, "rf"], axis=0)
factor = fama_french.loc[portfolio.index, ["mktrf", "smb", "hml", "mom"]].assign(const=1)

mod = RollingOLS(endog=portfolio[10874],exog=factor,window=252)

In [None]:
portfolio = crsp.loc[crsp.date >= "1999-01-01", ["date", "permno", "ret"]].pivot(
    index="date", columns="permno", values="ret"
)
portfolio = portfolio.sub(fama_french.loc[portfolio.index, "rf"], axis=0)
factor = fama_french.loc[portfolio.index, ["mktrf", "smb", "hml", "mom"]].assign(
    const=1
)
betas = []

for permno in portfolio:
    betas.append(RollingOLS(endog=portfolio[10874], exog=factor, window=252).fit(params_only=True).params.drop("const", axis=1).assign(permno=permno))

In [None]:
betas[0]

Unnamed: 0_level_0,mktrf,smb,hml,mom,permno
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1999-01-04,,,,,10874.0
1999-01-05,,,,,10874.0
1999-01-06,,,,,10874.0
1999-01-07,,,,,10874.0
1999-01-08,,,,,10874.0
...,...,...,...,...,...
2019-12-24,1.284331,0.534823,0.332856,-0.378967,10874.0
2019-12-26,1.301805,0.542204,0.331372,-0.373530,10874.0
2019-12-27,1.292380,0.558356,0.330043,-0.373617,10874.0
2019-12-30,1.293027,0.547177,0.318343,-0.382091,10874.0


In [None]:
re=crsp.ret - fama_french.rf
portfolio = (
    crsp[["permno"]]
    .join(fama_french.drop(columns=["rf"]))
    .assign(re=crsp.ret - fama_french.rf, const=1)
).set_index("permno", append=True)

beta = portfolio.groupby("permno").apply(
    lambda x: RollingOLS(
        endog=x.re,
        exog=x.drop(columns=["re"]),
        window=252
    )
    .fit(params_only=True)
    .params.drop("const", axis=1)
)

### Prediction

In [None]:
data = (
    crsp[["permno"]]
    .assign(logret=np.log(crsp.ret - fama_french.rf + 1),
            const=1)
    .loc["2000-01-01":"2012-12-31"]
    .set_index("permno", append=True)
    .join(beta)
    .dropna()
)
by = data.reset_index("permno").groupby("permno")
data["ret_1d"] = np.exp(data.logret) - 1
data["ret_5d"] = np.exp(by.logret.rolling(5).sum().shift(-5)) - 1
data["ret_10d"] = np.exp(by.logret.rolling(10).sum().shift(-10)) - 1
data["ret_21d"] = np.exp(by.logret.rolling(21).sum().shift(-21)) - 1

## Industry Portfolio

## Technical Indicators

## Option Metrics