In [None]:
import torch
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import yfinance as yf
from datetime import datetime
import numpy as np
import seaborn as sns
import wrds
import statsmodels.api as sm

plt.style.use('seaborn-v0_8-notebook')
eps = 1e-8
mpl.rcParams['axes.titlesize'] = 16
mpl.rcParams['axes.labelsize'] = 16
mpl.rcParams['xtick.labelsize'] = 12
mpl.rcParams['ytick.labelsize'] = 12
mpl.rcParams['legend.fontsize'] = 12
mpl.rcParams['figure.figsize'] = [10, 6]
mpl.rcParams['figure.dpi'] = 100
mpl.rcParams['savefig.dpi'] = 100
mpl.rcParams['figure.autolayout'] = True

conn = wrds.Connection()
start = '1965-01-01'
end = '2025-01-01'

Loading library list...
Done


In [2]:
month_end_dates = pd.date_range(
    start=start,
    end=end,
    freq='ME'
)

factors = conn.raw_sql(
    f"""
    SELECT date, mktrf, smb, hml, rf
    FROM ff.factors_daily
    WHERE date BETWEEN '{start}' AND '{end}'
    """,
    date_cols = ['date']
).set_index('date')

factors = (
    factors.replace([np.inf, -np.inf], np.nan)
    .fillna(0)
    .astype(float)
)

factors

Unnamed: 0_level_0,mktrf,smb,hml,rf
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1965-01-04,-0.0045,0.0070,-0.0008,0.00014
1965-01-05,0.0049,0.0038,-0.0009,0.00014
1965-01-06,0.0034,0.0018,0.0042,0.00014
1965-01-07,0.0040,0.0008,0.0019,0.00014
1965-01-08,0.0017,0.0017,-0.0020,0.00014
...,...,...,...,...
2024-12-24,0.0111,-0.0009,-0.0005,0.00017
2024-12-26,0.0002,0.0104,-0.0019,0.00017
2024-12-27,-0.0117,-0.0066,0.0056,0.00017
2024-12-30,-0.0109,0.0012,0.0074,0.00017


In [6]:
ivol_df = pd.read_csv(
    './ivol_part_1.csv',
    index_col=0
)

ivol_df.index = pd.to_datetime(ivol_df.index)

ivol_df

Unnamed: 0,83264,63618,10896,69906,79030,11233,44127,86580,91786,88837,...,83587,65752,24471,21085,59141,11494,91215,23780,17676,83557
1965-01-31,,,,,,,,,,,...,,,,,,,,,,
1965-02-28,,,,,,,,,,,...,,,,,,,,,,
1965-03-31,,,,,,,,,,,...,,,,,,,,,,
1965-04-30,,,,,,,,,,,...,,,,,,,,,,
1965-05-31,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-08-31,0.011945,,,,,,,0.025849,0.009253,0.013778,...,,,,,,,,,,
2024-09-30,0.010587,,,,,,,0.011175,0.011429,0.010877,...,,,,,,,,,,
2024-10-31,0.007728,,,,,,,0.016819,0.010310,0.047612,...,,,,,,,,,,
2024-11-30,0.008987,,,,,,,0.017378,0.012528,0.048934,...,,,,,,,,,,


In [None]:
for i, col in enumerate(ivol_df.columns):
    if ivol_df[col].isna().sum() < ivol_df.shape[0]:
        print(f"Skipping column {i}")
        continue
    print(f"Processing column {i} out of {len(ivol_df.columns)}")
    permno = col
    ret_df = (
        conn.raw_sql(
        f"""
        SELECT date, ret
        FROM crsp.dsf
        WHERE date BETWEEN '{start}' AND '{end}'
        AND permno = {permno}
        """,
        date_cols = ['date'])
        .set_index('date')
        .merge(
            factors,
            left_index=True,
            right_index=True,
            how='left'
        )
    )
    ret_df['ex_ret'] = ret_df['ret'] - ret_df['rf']

    for idx in ivol_df.index:
        time_range_mask = (
            (ret_df.index >= idx - pd.DateOffset(months = 1)) * (ret_df.index <= idx)
        )
        X = np.asarray(
            ret_df[['mktrf', 'smb', 'hml']][time_range_mask]
        )
        y = np.asarray(
            ret_df[['ex_ret']][time_range_mask]
        )

        if X.shape[0] != 0:
            try:
                X = sm.add_constant(X)
                model = sm.OLS(y, X).fit()
                ivol = np.std(
                    model.resid
                )
                ivol_df.loc[idx, col] = ivol
            except Exception as e:
                print(f"Skipping column {i} due to {e}")
                continue

Skipping column 0
Skipping column 1
Skipping column 2
Skipping column 3
Skipping column 4
Skipping column 5
Skipping column 6
Skipping column 7
Skipping column 8
Skipping column 9
Skipping column 10
Skipping column 11
Skipping column 12
Skipping column 13
Skipping column 14
Skipping column 15
Skipping column 16
Skipping column 17
Processing column 18 out of 8425
Skipping column 19
Skipping column 20
Skipping column 21
Skipping column 22
Skipping column 23
Skipping column 24
Skipping column 25
Skipping column 26
Skipping column 27
Skipping column 28
Skipping column 29
Skipping column 30
Skipping column 31
Skipping column 32
Skipping column 33
Skipping column 34
Skipping column 35
Skipping column 36
Skipping column 37
Skipping column 38
Skipping column 39
Skipping column 40
Skipping column 41
Skipping column 42
Skipping column 43
Skipping column 44
Skipping column 45
Skipping column 46
Skipping column 47
Skipping column 48
Skipping column 49
Skipping column 50
Skipping column 51
Skipping

In [8]:
ivol_df.to_csv('./ivol_part_1.csv')