## Importing Libraries

In [40]:
import warnings
warnings.filterwarnings('ignore')

import numpy as np
import pandas as pd
import plotly.express as px

import quandl
import yfinance as yf

from alphalens.utils import get_clean_factor_and_forward_returns
from alphalens.performance import factor_information_coefficient, mean_return_by_quantile, \
    factor_returns, factor_rank_autocorrelation, mean_information_coefficient

In [4]:
t_bill_yield = quandl.get(
    "USTREASURY/YIELD",
    authtoken="QNPX4RGB15s3fi5em9_Q",
    start_date='2010-01-01',
    end_date='2021-12-31'
)

# need to divide by 100 to get absolute values
t_bill_yield = t_bill_yield/100
t_bill_yield = t_bill_yield['1 MO']
t_bill_yield.name = '1M'

In [5]:
t_bill_yield

Date
2010-01-04    0.0005
2010-01-05    0.0003
2010-01-06    0.0003
2010-01-07    0.0002
2010-01-08    0.0002
               ...  
2021-12-27    0.0004
2021-12-28    0.0003
2021-12-29    0.0001
2021-12-30    0.0006
2021-12-31    0.0006
Name: 1M, Length: 3004, dtype: float64

In [6]:
# loading the mid-cap screen
screen = pd.read_csv('../data/mid_cap_screen.csv')
screen = screen[['ticker', 'market_cap', '5D', 'Price', 'Sector']]
screen

Unnamed: 0,ticker,market_cap,5D,Price,Sector
0,WPP,19534250000.0,-0.02388,79.71,Consumer Cyclical
1,ULTA,18910670000.0,-0.066919,349.42,Consumer Cyclical
2,FWONA,18423480000.0,-0.03813,55.75,Consumer Cyclical
3,AMCR,18321370000.0,-0.033953,11.95,Unknown Sector
4,WDC,18014920000.0,-0.132503,57.81,Technology
5,BIP,17855790000.0,0.014605,60.44,Utilities
6,PKX,17611260000.0,-0.073132,57.92,Basic Materials
7,CVE,17265620000.0,-0.053872,14.05,Energy
8,GNRC,17107480000.0,-0.114666,271.16,Industrials
9,VICI,17094750000.0,-0.047652,27.18,Real Estate


In [7]:
tickers = screen.ticker.tolist()
start_date = '2018-01-01'
end_date = '2022-01-01'

In [8]:
data_screen = screen[['ticker', 'Sector']]
data_screen.groupby('Sector').describe()

Unnamed: 0_level_0,ticker,ticker,ticker,ticker
Unnamed: 0_level_1,count,unique,top,freq
Sector,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
Basic Materials,3,3,PKX,1
Communication Services,3,3,SJR,1
Consumer Cyclical,10,10,WPP,1
Consumer Defensive,2,2,NWL,1
Energy,9,9,CVE,1
Healthcare,3,3,MASI,1
Industrials,9,9,GNRC,1
Real Estate,2,2,VICI,1
Technology,4,4,WDC,1
Unknown,3,3,EMRAF,1


In [9]:
factor_index = pd.date_range(start=start_date, end=end_date, freq='B')
prices = pd.DataFrame(index=factor_index)
for ticker in tickers:
    try:
        ticker_data = yf.Ticker(ticker)
        price = ticker_data.history(start=start_date, end=end_date)
        price = price['Close']
        price.name = ticker
        prices = pd.merge(prices, price, how='left', left_index=True, right_index=True)
    except Exception:
        print(f'ticker - {ticker} not found')

- C-P-N: No data found, symbol may be delisted


In [10]:
prices = prices.fillna(method='ffill')

In [11]:
prices.index.freq = 'B'

In [12]:
factor = pd.DataFrame()
for ticker in tickers:
    try:
        short = quandl.get(f"FINRA/FNSQ_{ticker}", start_date=start_date, end_date=end_date)
        short = short['ShortVolume']/(short['TotalVolume'] - short['ShortExemptVolume'])
        short = short/short.shift(10)
        short.name = 'data'
        short_data = pd.DataFrame(index=factor_index)
        short_data.index.name = 'Date'
        short_data = pd.merge(short_data, pd.DataFrame(short), left_index=True, right_index=True, how='left')
        short_data['ticker'] = ticker
        short_data = short_data.reset_index().set_index(['Date', 'ticker'])
        factor = factor.append(short_data)
    except Exception:
        print(f'no data found for {ticker}')

no data found for EMRAF
no data found for AUOTY


In [13]:
factor = factor.sort_index()
factor.index.levels[0].freq = 'B'

In [14]:
factor_groups = screen[['ticker', 'Sector']]
factor_groups = factor_groups.set_index('ticker')
factor_groups = factor_groups['Sector'].to_dict()

In [53]:
factor_data = get_clean_factor_and_forward_returns(
    factor,
    prices,
    groupby=factor_groups,
    quantiles=5,
    periods=(1, 5),
    filter_zscore=None
)

Dropped 11.5% entries from factor data: 11.5% in forward returns computation and 0.0% in binning phase (set max_loss=0 to see potentially suppressed Exceptions).
max_loss is 35.0%, not exceeded: OK!


## Clean Factor Data for Backtest

In [57]:
def compute_z_score(data: pd.Series):
    value = (data - data.mean())/data.std()
    value[abs(value) > 3] = np.sign(value) * value
    return value

grouper = factor_data.index.get_level_values('date')
factor_data['factor'] = factor_data['factor'].groupby(grouper).apply(compute_z_score)

In [58]:
factor_data

Unnamed: 0_level_0,Unnamed: 1_level_0,1D,5D,factor,group,factor_quantile
date,asset,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2018-01-17,AQN,0.014679,0.012844,0.851256,Utilities,5
2018-01-17,BEP,-0.004764,-0.000595,-1.758220,Utilities,1
2018-01-17,BIP,-0.007636,-0.001851,1.077172,Utilities,5
2018-01-17,CLF,-0.009423,-0.016490,-0.103060,Basic Materials,3
2018-01-17,CLH,-0.030529,-0.002130,0.144829,Industrials,4
...,...,...,...,...,...,...
2021-12-23,ULTA,0.000000,0.031654,0.280600,Consumer Cyclical,4
2021-12-23,VICI,0.000000,0.035986,-0.046353,Real Estate,4
2021-12-23,WDC,0.000000,0.068137,0.740364,Technology,5
2021-12-23,WES,0.000000,0.024496,-0.342870,Energy,3


## Running Backtest


In [59]:
ic = factor_information_coefficient(factor_data=factor_data)
mean_ic = mean_information_coefficient(factor_data=factor_data)

In [60]:
pd.options.plotting.backend = "plotly"
fig = ic.boxplot()
fig.show()

In [61]:
mean_ic

1D   -0.015039
5D   -0.004468
dtype: float64

In [62]:
ic_fig = px.bar(mean_ic)
ic_fig.show()

In [63]:
quantile_returns, std_err_ret = mean_return_by_quantile(factor_data=factor_data)

In [64]:
quantile_returns

Unnamed: 0_level_0,1D,5D
factor_quantile,Unnamed: 1_level_1,Unnamed: 2_level_1
1,0.000552,-6.6e-05
2,0.000314,-6e-06
3,-8.7e-05,0.001346
4,-0.000226,-0.000119
5,-0.000541,-0.001052


In [65]:
t_stats_quantile_returns = quantile_returns/std_err_ret
t_stats_quantile_returns

Unnamed: 0_level_0,1D,5D
factor_quantile,Unnamed: 1_level_1,Unnamed: 2_level_1
1,2.319657,-0.111613
2,1.270876,-0.010662
3,-0.304983,1.32099
4,-1.017785,-0.233381
5,-2.499651,-1.871542


In [73]:
quantile_plot_df = quantile_returns.reset_index().melt(id_vars=['factor_quantile'])
quantile_plot_df['factor_quantile'] = quantile_plot_df['factor_quantile'].astype(str)
quantile_plot_df = quantile_plot_df.set_index('factor_quantile')
fig = px.histogram(quantile_plot_df, x=quantile_plot_df.index, y="value",
                   color='variable', barmode='group',
                   height=400)
fig.show()

In [67]:
factor_return_corr = factor_rank_autocorrelation(factor_data=factor_data)
factor_return_corr

date
2018-01-17         NaN
2018-01-18    0.579525
2018-01-19    0.260191
2018-01-22    0.285106
2018-01-23    0.065310
                ...   
2021-12-17    0.324255
2021-12-20    0.132246
2021-12-21    0.418082
2021-12-22    0.344404
2021-12-23    0.428071
Name: 1, Length: 991, dtype: float64

In [70]:
factor_return_corr.mean()

0.28559525498212673

In [71]:
cumulative_returns = (1 + factor_returns(factor_data=factor_data)).cumprod()
cumulative_returns

Unnamed: 0_level_0,1D,5D
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2018-01-17,0.999695,1.002436
2018-01-18,0.998198,1.004928
2018-01-19,1.003990,1.020767
2018-01-22,1.002362,1.038436
2018-01-23,1.006235,1.049310
...,...,...
2021-12-17,0.550231,0.553298
2021-12-20,0.549351,0.550508
2021-12-21,0.548864,0.551573
2021-12-22,0.549678,0.551583


In [75]:
cumulative_returns.plot.line(title='Growth of Dollar - Factor Portfolio')

In [38]:
# add drawdown of factor portfolio
# returns : inception to date, monthly and average return