In [None]:
#============================================= Libraries ==============================================================
import pandas as pd
import numpy as np
import math

In [None]:
#============================================= Data Path ==============================================================
path_prices = 
path feat = 
path_spx = 
path_out = 
path_out2 =
num_days = 125

In [None]:
#============================================= Functions ==============================================================

def compute_returns(x):
    x.sort_values('date', inplace=True)
    x['returns'] = x.PX_LAST/x.PX_LAST.shift(1) - 1
    x['logreturns'] = x.PX_LAST.apply(lambda y: math.log(y)) - \
                      x.PX_LAST.shift(1).apply(lambda y: math.log(y))
    return x

def compute_vol(x, nd):
    x.sort_values('date', inplace=True)
    x['vol_{}d'.format(nd)] = x.logreturns.rolling(nd).std()
    x['vol_{}d'.format(2*nd)] = x.logreturns.rolling(2*nd).std()
    return x

def compute_beta(x, y, nd):
    x = x.merge(y, how='inner', on='date')
    x['return_beta_adj'] = x.returns - x.SPX_returns
    x['beta_{}d'.format(nd)] = x.returns.rolling(nd).corr(x.SPX_returns)* \
                               x['vol_{}d'.format(nd)]/x['SPX_vol_{}d'.format(nd)]
    x['beta_{}d'.format(2*nd)] = x.returns.rolling(2*nd).corr(x.SPX_returns)* \
                               x['vol_{}d'.format(2*nd)]/x['SPX_vol_{}d'.format(2*nd)]
    x['vol_beta_ret_20d'] = x.return_beta_adj.rolling(20).std()
    x['vol_beta_ret_62d'] = x.return_beta_adj.rolling(62).std()
    x['vol_beta_ret_250d'] = x.return_beta_adj.rolling(250).std()
    return x

def make_monthly_variables(x):
    filtered_cols = [col for col in x if (col.startswith('vol'))|(col.startswith('beta'))]
    x = x[filtered_cols + ['date', 'stock', 'PX_LAST', 'SPX']]
    x = x.set_index('date', drop=True)
    x['PX_SHIFTED'] = x.PX_LAST.shift(-2)
    y = x.resample('1m').last()
    y['R1M_shifted'] = y.PX_LAST.shift(-1)/y.PX_SHIFTED - 1
    y['R1M'] = y.PX_LAST/y.PX_LAST.shift(1) - 1
    y['SPX1M'] = y.SPX/y.SPX.shift(1) - 1
    y['IREV1M'] = y.R1M - y.SPX1M
    y['IREVVOL1M'] = y.IREV1M/y.vol_beta_ret_20d
    y['R3M'] = y.PX_LAST/y.PX_LAST.shift(3) - 1
    y['SPX3M'] = y.SPX/y.SPX.shift(3) - 1
    y['IREV3M'] = y.R3M - y.SPX3M
    y['IREVVOL3M'] = y.IREV3M/y.vol_beta_ret_62d
    y['R12M'] = y.PX_LAST/y.PX_LAST.shift(12) - 1
    y['SPX12M'] = y.SPX/y.SPX.shift(12) - 1
    y['IREV12M'] = y.R12M - y.SPX12M
    y['IREVVOL12M'] = y.IREV12M/y.vol_beta_ret_250d
    return y

def make_3monthly(x):
    x.set_index('date', inplace=True. drop=True)
    y = x.resample('3m').last()
    return y.reset_index()

In [None]:
#============================================= Data Import ============================================================
prices = pd.read_csv(path_prices)
spx = pd.read_csv(path_spx)

In [None]:
#============================================= First Cleaning =========================================================
prices = prices[['date_num', 'Close', 'stock']].rename(columns={'Close':'PX_LAST'})
spx = spx[['Date', 'Adj Close']].rename(columns={'Date':'date',
                                                 'Adj Close':'PX_LAST'})
prices['date'] = pd.to_datetime(prices.date_num.astype('str'), format='%Y%m%d')
spx.date = pd.to_datetime(spx.date, format='%Y-%m-%d')
prices.drop('date_num', axis=1, inplace=True)
prices.PX_LAST[prices.PX_LAST == 0] = np.nan

In [None]:
#============================================= Make Returns and Volatilities ==========================================
aggprices = prices.groupby('stock', as_index=False).apply(compute_returns)
aggprices = aggprices.groupby('stock', as_index=False).apply(compute_vol, num_days)
spx = compute_returns(spx)
spx = compute_vol(spx, num_days).rename(columns={
    'PX_LAST':'SPX',
    'returns':'SPX_returns',
    'vol_{}d'.format(num_days):'SPX_vol_{}d'.format(num_days),
    'vol_{}d'.format(2*num_days):'SPX_vol_{}d'.format(2*num_days),    
})
spx.drop('logreturns', axis=1, inplace=True)

In [None]:
#============================================= Make Beta Variable =====================================================
test = aggprices.groupby('stock', as_index=False).apply(compute_beta, spx, num_days)
drop_col= ['SPX_vol_{}d'.format(num_days), 'SPX_vol_{}d'.format(2*num_days)]
test.drop(drop_col, axis=1, inplace=True)

In [None]:
#============================================= Make Monthly ===========================================================
final = test.groupby('stock', as_index=False).apply(make_monthly_variables)

In [None]:
#============================================= Clean and Save =========================================================
final.reset_index(inplace=True)
final.drop(['level_0','PX_SHIFTED'], axis=1, inplace=True)
final.to_csv(path_out)