# Common Alpha Factors

### Loading Libraries

In [26]:
# Numerical Computing
import numpy as np

# Data Manipulation
import pandas as pd

# Data Visualization
import seaborn as sns
import matplotlib.pyplot as plt

# Warnings
import warnings

# Path
from pathlib import Path

In [27]:
idx = pd.IndexSlice

sns.set_style('whitegrid')

warnings.filterwarnings('ignore')

deciles = np.arange(.1, 1, .1).round(1)

### Loading Data

In [33]:
data = pd.read_hdf('data.h5', 'data/top500')

price_sample = pd.read_hdf('data.h5', 'data/sample')

### TA-Lib: Function Groups

In [36]:
function_groups = ['Overlap Studies',
                   'Momentum Indicators',
                   'Volume Indicators',
                   'Volatility Indicators',
                   'Price Transform',
                   'Cycle Indicators',
                   'Pattern Recognition',
                   'Statistic Functions',
                   'Math Transform',
                   'Math Operators']

In [38]:
talib_grps = talib.get_function_groups()

### Moving Averages

#### Simple Moving Average (SMA)

In [43]:
df = price_sample.loc['2012': '2013', ['close']]

In [45]:
for t in [5, 21, 63]:
    df[f'SMA_{t}'] = talib.SMA(df.close,
                               timeperiod=t)

In [47]:
ax = df.plot(figsize=(14, 5), rot=0)
sns.despine()
ax.set_xlabel('');
plt.show()

#### Exponential Moving Average (EMA)

In [50]:
df = price_sample.loc['2012', ['close']]

In [52]:
for t in [5, 21, 63]:
    df[f'EMA_{t}'] = talib.EMA(df.close,
                               timeperiod=t)

In [54]:
ax = df.plot(figsize=(14, 5), rot=0)
sns.despine()
ax.set_xlabel('');
plt.show()

#### Weighted Moving Average (WMA)

In [None]:
df = price_sample.loc['2012', ['close']]

In [59]:
for t in [5, 21, 63]:
    df[f'WMA_{t}'] = talib.WMA(df.close,
                               timeperiod=t)

In [61]:
ax = df.plot(figsize=(14, 5), rot=0)
sns.despine()
ax.set_xlabel('');
plt.show()

#### Double Exponential Moving Average (DEMA)

In [64]:
df = price_sample.loc['2012', ['close']]

In [66]:
for t in [5, 21, 63]:
    df[f'DEMA_{t}'] = talib.DEMA(df.close,
                                timeperiod=t)

In [90]:
ax = df.plot(figsize=(14, 5), rot=0)
sns.despine()
ax.set_xlabel('');
plt.show()

#### Triple Exponential Moving Average (TEMA)

In [93]:
df = price_sample.loc['2012', ['close']]

In [95]:
for t in [5, 21, 63]:
    df[f'TEMA_{t}'] = talib.TEMA(df.close,
                                timeperiod=t)

In [97]:
ax = df.plot(figsize=(14, 5), rot=0)
sns.despine()
ax.set_xlabel('');
plt.show()

#### Triangular Moving Average (TRIMA)

In [102]:
df = price_sample.loc['2012', ['close']]

In [104]:
for t in [5, 21, 63]:
    df[f'TRIMA_{t}'] = talib.TRIMA(df.close,
                                timeperiod=t)

In [106]:
ax = df.plot(figsize=(14, 5), rot=0)
sns.despine()
ax.set_xlabel('');
plt.show()

#### Kaufman Adaptive Moving Average (KAMA)

In [111]:
df = price_sample.loc['2012', ['close']]

In [113]:
for t in [5, 21, 63]:
    df[f'KAMA_{t}'] = talib.KAMA(df.close,
                                timeperiod=t)

In [115]:
ax = df.plot(figsize=(14, 5), rot=0)
sns.despine()
ax.set_xlabel('');
plt.show()

#### MESA Adaptive Moving Average (MAMA)

In [118]:
df = price_sample.loc['2012', ['close']]

In [120]:
len(talib.MAMA(df.close,
                         fastlimit=.5,
                         slowlimit=.05))

In [122]:
mama, fama = talib.MAMA(df.close,
                        fastlimit=.5,
                        slowlimit=.05)
df['mama'] = mama
df['fama'] = fama

In [124]:
ax = df.plot(figsize=(14, 5), rot=0)
sns.despine()
ax.set_xlabel('');
plt.show()

### Comparison

In [127]:
df = price_sample.loc['2012', ['close']]
t = 21

In [129]:
df['SMA'] = talib.SMA(df.close, timeperiod=t)
df['WMA'] = talib.WMA(df.close, timeperiod=t)
df['TRIMA'] = talib.TRIMA(df.close, timeperiod=t)

ax = df[['close', 'SMA', 'WMA', 'TRIMA']].plot(figsize=(16, 8), rot=0)

sns.despine()
ax.set_xlabel('')
plt.tight_layout();
plt.show()

In [136]:
df['EMA'] = talib.EMA(df.close, timeperiod=t)
df['DEMA'] = talib.DEMA(df.close, timeperiod=t)
df['TEMA'] = talib.TEMA(df.close, timeperiod=t)

ax = df[['close', 'EMA', 'DEMA', 'TEMA']].plot(figsize=(16, 8), rot=0)

ax.set_xlabel('')
sns.despine()
plt.tight_layout();
plt.show()

In [138]:
df['KAMA'] = talib.KAMA(df.close, timeperiod=t)
mama, fama = talib.MAMA(df.close,
                        fastlimit=.5,
                        slowlimit=.05)
df['MAMA'] = mama
df['FAMA'] = fama
ax = df[['close', 'KAMA', 'MAMA', 'FAMA']].plot(figsize=(16, 8), rot=0)

ax.set_xlabel('')
sns.despine()
plt.tight_layout();
plt.show()

In [140]:
fig, axes = plt.subplots(nrows=3, figsize=(14, 10), sharex=True, sharey=True)

df[['close', 'SMA', 'WMA', 'TRIMA']].plot(rot=0,
                                          ax=axes[0],
                                          title='Simple, Weighted and Triangular Moving Averages',
                                          lw=1, style=['-', '--', '-.', ':'], c='k')
df[['close', 'EMA', 'DEMA', 'TEMA']].plot(rot=0, ax=axes[1],
                                          title='Simple, Double, and Triple Exponential Moving Averages',
                                          lw=1, style=['-', '--', '-.', ':'], c='k')

df[['close', 'KAMA', 'MAMA', 'FAMA']].plot(rot=0, ax=axes[2],
                                          title='Mesa and Kaufman Adaptive Moving Averages',
                                          lw=1, style=['-', '--', '-.', ':'], c='k')
axes[2].set_xlabel('')
sns.despine()
plt.tight_layout();

### Overlap Studies

#### Bollinger Bands

In [143]:
s = talib.BBANDS(df.close,   # Number of periods (2 to 100000)
                 timeperiod=20,
                 nbdevup=2,    # Deviation multiplier for lower band
                 nbdevdn=2,    # Deviation multiplier for upper band
                 matype=1      # default: SMA
                 )

In [150]:
bb_bands = ['upper', 'middle', 'lower']

In [152]:
df = price_sample.loc['2012', ['close']]
df = df.assign(**dict(zip(bb_bands, s)))
ax = df.loc[:, ['close'] + bb_bands].plot(figsize=(16, 5), lw=1)

ax.set_xlabel('')
sns.despine()
plt.tight_layout();
plt.show()

#### Normalized Squeeze & Mean Reversion Indicators

In [157]:
fig, ax = plt.subplots(figsize=(16,5))
df.upper.div(df.close).plot(ax=ax, label='bb_up')
df.lower.div(df.close).plot(ax=ax, label='bb_low')
df.upper.div(df.lower).plot(ax=ax, label='bb_squeeze', rot=0)

plt.legend()
ax.set_xlabel('')
sns.despine()
plt.tight_layout();
plt.show()

In [159]:
def compute_bb_indicators(close, timeperiod=20, matype=0):
    high, mid, low = talib.BBANDS(close, 
                                  timeperiod=20,
                                  matype=matype)
    bb_up = high / close -1
    bb_low = low / close -1
    squeeze = (high - low) / close
    return pd.DataFrame({'BB_UP': bb_up, 
                         'BB_LOW': bb_low, 
                         'BB_SQUEEZE': squeeze}, 
                        index=close.index)

In [195]:
data = (data.join(data
                  .groupby(level='ticker')
                  .close
                  .apply(compute_bb_indicators)))

#### Visualizing Distribution

In [198]:
bb_indicators = ['BB_UP', 'BB_LOW', 'BB_SQUEEZE']

In [200]:
q = .01
with sns.axes_style('white'):
    fig, axes = plt.subplots(ncols=3, figsize=(14, 4), sharey=True, sharex=True)
    df_ = data[bb_indicators]
    df_ = df_.clip(df_.quantile(q), 
                   df_.quantile(1-q), axis=1)
    for i, indicator in enumerate(bb_indicators):
        sns.distplot(df_[indicator], ax=axes[i])
    fig.suptitle('Distribution of normalized Bollinger Band indicators', fontsize=12)

    sns.despine()
    fig.tight_layout()
    fig.subplots_adjust(top=.93);

#### Plotting Outlier Price Series

In [203]:
ncols = len(bb_indicators)

fig, axes = plt.subplots(ncols=ncols, figsize=(5*ncols, 4), sharey=True)

for i, indicator in enumerate(bb_indicators):
    ticker, date = data[indicator].nlargest(1).index[0]
    p = data.loc[idx[ticker, :], :].close.reset_index('ticker', drop=True)
    p = p.div(p.dropna().iloc[0])
    p.plot(ax=axes[i], label=ticker, rot=0)
    c = axes[i].get_lines()[-1].get_color()
    axes[i].axvline(date, ls='--', c=c, lw=1)
    ticker, date = data[indicator].nsmallest(1).index[0]
    p = data.loc[idx[ticker, :], :].close.reset_index('ticker', drop=True)
    p = p.div(p.dropna().iloc[0])
    p.plot(ax=axes[i], label=ticker, rot=0)    
    c = axes[i].get_lines()[-1].get_color()
    axes[i].axvline(date, ls='--', c=c, lw=1)
    axes[i].set_title(indicator.upper())
    axes[i].legend()
    axes[i].set_xlabel('')

sns.despine()
fig.tight_layout();
plt.show()

#### Hilbert Transform - Instantaneous Trendline

In [206]:
df = price_sample.loc['2012', ['close']]

df['HT_TRENDLINE'] = talib.HT_TRENDLINE(df.close)

In [208]:
ax = df.plot(figsize=(16, 4), style=['-', '--'], rot=0)

ax.set_xlabel('')
sns.despine()
plt.tight_layout();
plt.show()

#### Computing Hilbert-Based Normalized Indicator

In [211]:
data['HT'] = (data
              .groupby(level='ticker', group_keys=False)
              .close
              .apply(talib.HT_TRENDLINE)
              .div(data.close).sub(1))

In [213]:
q=0.005

with sns.axes_style('white'):
    sns.distplot(data.HT.clip(data.HT.quantile(q), data.HT.quantile(1-q)))
    sns.despine();

#### Parabolic SAR

In [218]:
df = price_sample.loc['2012', ['close', 'high', 'low']]

df['SAR'] = talib.SAR(df.high, df.low, 
                      acceleration=0.02, # common value
                      maximum=0.2)     

In [220]:
ax = df[['close', 'SAR']].plot(figsize=(16, 4), style=['-', '--'], title='Parabolic SAR')
ax.set_xlabel('')
sns.despine()
plt.tight_layout();
plt.show()

#### Normalized SAR Indicator

In [223]:
def compute_sar_indicator(x, acceleration=.02, maximum=0.2):
    sar = talib.SAR(x.high, 
                    x.low,
                    acceleration=acceleration,
                    maximum=maximum)
    return sar/x.close - 1

In [225]:
data['SAR'] = (data.groupby(level='ticker', group_keys=False)
                  .apply(compute_sar_indicator))

In [227]:
q=0.005

with sns.axes_style('white'):
    sns.distplot(data.SAR.clip(data.SAR.quantile(q), data.SAR.quantile(1-q)))
    sns.despine()

### Momentum Indicators

#### Average Directional Movement Index (ADX)

In [244]:
df = price_sample.loc['2012': '2013', ['high', 'low', 'close']]

In [246]:
df['PLUS_DM'] = talib.PLUS_DM(df.high, df.low, timeperiod=10)

df['MINUS_DM'] = talib.MINUS_DM(df.high, df.low, timeperiod=10)

In [248]:
ax = df[['close', 'PLUS_DM', 'MINUS_DM']].plot(figsize=(14, 4),
                                               secondary_y=[
                                                   'PLUS_DM', 'MINUS_DM'],
                                               style=['-', '--', '_'],
                                              rot=0)
ax.set_xlabel('')
sns.despine()
plt.tight_layout()
plt.show()

#### Plus/Minus Directional Index (PLUS_DI/MINUS_DI)

In [251]:
df = price_sample.loc['2012': '2013', ['high', 'low', 'close']]

In [253]:
df['PLUS_DI'] = talib.PLUS_DI(df.high, df.low, df.close, timeperiod=14)

df['MINUS_DI'] = talib.MINUS_DI(df.high, df.low, df.close, timeperiod=14)

In [255]:
ax = df[['close', 'PLUS_DI', 'MINUS_DI']].plot(figsize=(14, 5), style=['-', '--', '_'], rot=0)

ax.set_xlabel('')
sns.despine()
plt.tight_layout();
plt.show()

#### Average directional movement index (ADX)

In [258]:
df = price_sample.loc[:, ['high', 'low', 'close']]

In [260]:
df['ADX'] = talib.ADX(df.high, 
                      df.low, 
                      df.close, 
                      timeperiod=14)

In [262]:
ax = df[['close', 'ADX']].plot(figsize=(14, 4), secondary_y='ADX', style=['-', '--'], rot=0)
ax.set_xlabel('')
sns.despine()
plt.tight_layout();
plt.show()

In [264]:
def compute_adx(x, timeperiod=14):
    return talib.ADX(x.high, 
                    x.low,
                    x.close,
                    timeperiod=timeperiod)

In [266]:
data['ADX'] = (data.groupby(level='ticker', group_keys=False)
                  .apply(compute_adx))

#### Visualize Distribution

In [287]:
with sns.axes_style("white"):
    sns.distplot(data.ADX)
    sns.despine();

#### Average Directional Movement Index Rating

In [290]:
df = price_sample.loc[:, ['high', 'low', 'close']]

In [292]:
df['ADXR'] = talib.ADXR(df.high,
                        df.low,
                        df.close,
                        timeperiod=14)

In [294]:
ax = df[['close', 'ADXR']].plot(figsize=(14, 5),
                                secondary_y='ADX',
                                style=['-', '--'], rot=0)
ax.set_xlabel('')
sns.despine()
plt.tight_layout()
plt.show()

In [296]:
def compute_adxr(x, timeperiod=14):
    return talib.ADXR(x.high, 
                    x.low,
                    x.close,
                    timeperiod=timeperiod)

In [298]:
data['ADXR'] = (data.groupby(level='ticker', group_keys=False)
                .apply(compute_adxr))

In [None]:
with sns.axes_style('white'):
    sns.distplot(data.ADXR)
    sns.despine();

#### Absolute Price Oscillator (APO)

In [301]:
df = price_sample.loc[:, ['close']]

In [303]:
df['APO'] = talib.APO(df.close,
                      fastperiod=12,
                      slowperiod=26,
                      matype=0)

In [307]:
ax = df.plot(figsize=(14,4), secondary_y='APO', rot=0, style=['-', '--'])
ax.set_xlabel('')
sns.despine()
plt.tight_layout();
plt.show()

### Percentage Price Oscillator (PPO)

In [310]:
df = price_sample.loc[:, ['close']]

In [312]:
df['PPO'] = talib.PPO(df.close,
                      fastperiod=12,
                      slowperiod=26,
                      matype=0)

In [314]:
ax = df.plot(figsize=(14,4), secondary_y=['APO', 'PPO'], rot=0,  style=['-', '--'])

ax.set_xlabel('')
sns.despine()
plt.tight_layout();
plt.show()

In [335]:
data['PPO'] = (data.groupby(level='ticker')
               .close
               .apply(talib.PPO, 
                      fastperiod=12, 
                      slowperiod=26, 
                      matype=1))

In [337]:
q = 0.001

with sns.axes_style("white"):
    sns.distplot(data.PPO.clip(lower=data.PPO.quantile(q),
                               upper=data.PPO.quantile(1-q)))
    sns.despine()

### Aroon Oscillator

#### Aroon Up/Down Indicator

In [340]:
df = price_sample.loc['2013', ['high', 'low', 'close']]

In [342]:
aroonup, aroondwn = talib.AROON(high=df.high,
                                low=df.low,
                                timeperiod=14)
df['AROON_UP'] = aroonup
df['AROON_DWN'] = aroondwn

In [344]:
fig, axes = plt.subplots(nrows=2, figsize=(14, 7), sharex=True)
df.close.plot(ax=axes[0], rot=0)
df[['AROON_UP', 'AROON_DWN']].plot(ax=axes[1], rot=0)

axes[1].set_xlabel('')
sns.despine()
plt.tight_layout();
plt.show()

#### Aroon Oscillator

In [347]:
df = price_sample.loc['2013', ['high', 'low', 'close']]

In [349]:
df['AROONOSC'] = talib.AROONOSC(high=df.high,
                                low=df.low,
                                timeperiod=14)

In [351]:
ax = df[['close', 'AROONOSC']].plot(figsize=(14,4), rot=0, style=['-', '--'], secondary_y='AROONOSC')
ax.set_xlabel('')
sns.despine()
plt.tight_layout();
plt.show()

In [353]:
data['AARONOSC'] = (data.groupby('ticker',
                                 group_keys=False)
                    .apply(lambda x: talib.AROONOSC(high=x.high,
                                                    low=x.low,
                                                    timeperiod=14)))

In [355]:
with sns.axes_style("white"):
    sns.distplot(data.AARONOSC)
    sns.despine()

#### Balance of Power (BOP)

In [358]:
df = price_sample.loc['2013', ['open', 'high', 'low', 'close']]

In [360]:
df['BOP'] = talib.BOP(open=df.open,
                      high=df.high,
                      low=df.low,
                      close=df.close)

In [362]:
axes = df[['close', 'BOP']].plot(figsize=(14, 7), rot=0, subplots=True, title=['AAPL', 'BOP'], legend=False)
axes[1].set_xlabel('')
sns.despine()
plt.tight_layout();
plt.show()

In [364]:
by_ticker = data.groupby('ticker', group_keys=False)

In [366]:
data['BOP'] = (by_ticker
               .apply(lambda x: talib.BOP(x.open,
                                          x.high,
                                          x.low,
                                          x.close)))

In [368]:
q = 0.0005

with sns.axes_style("white"):
    sns.distplot(data.BOP.clip(lower=data.BOP.quantile(q),
                               upper=data.BOP.quantile(1-q)))
    sns.despine()

#### Commodity Channel Index (CCI)

In [387]:
df = price_sample.loc['2013', ['high', 'low', 'close']]

In [389]:
df['CCI'] = talib.CCI(high=df.high,
                      low=df.low,
                      close=df.close,
                      timeperiod=14)

In [391]:
axes = df[['close', 'CCI']].plot(figsize=(14, 7), 
                                 rot=0, 
                                 subplots=True, 
                                 title=['AAPL', 'CCI'], 
                                 legend=False)
axes[1].set_xlabel('')
sns.despine()
plt.tight_layout()
plt.show()

In [393]:
data['CCI'] = (by_ticker
               .apply(lambda x: talib.CCI(x.high,
                                          x.low,
                                          x.close,
                                          timeperiod=14)))

In [395]:
with sns.axes_style('white'):
    sns.distplot(data.CCI)
    sns.despine();

#### Moving Average Convergence/Divergence (MACD)

In [398]:
df = price_sample.loc['2013', ['close']]

In [400]:
macd, macdsignal, macdhist = talib.MACD(df.close,
                                        fastperiod=12,
                                        slowperiod=26,
                                        signalperiod=9)
df['MACD'] = macd
df['MACDSIG'] = macdsignal
df['MACDHIST'] = macdhist

In [402]:
axes = df.plot(figsize=(14, 8),
               rot=0,
               subplots=True,
               title=['AAPL', 'MACD', 'MACDSIG', 'MACDHIST'],
               legend=False)

axes[-1].set_xlabel('')
sns.despine()
plt.tight_layout()

In [404]:
def compute_macd(close, fastperiod=12, slowperiod=26, signalperiod=9):
    macd, macdsignal, macdhist = talib.MACD(close,
                                            fastperiod=fastperiod,
                                            slowperiod=slowperiod,
                                            signalperiod=signalperiod)
    return pd.DataFrame({'MACD': macd,
                         'MACD_SIGNAL': macdsignal,
                         'MACD_HIST': macdhist},
                        index=close.index)

In [406]:
data = (data.join(data
                  .groupby(level='ticker')
                  .close
                  .apply(compute_macd)))

In [408]:
macd_indicators = ['MACD', 'MACD_SIGNAL', 'MACD_HIST']

In [410]:
data[macd_indicators].corr()

In [412]:
q = .005

with sns.axes_style('white'):
    fig, axes = plt.subplots(ncols=3, figsize=(14, 4))
    df_ = data[macd_indicators]
    df_ = df_.clip(df_.quantile(q), 
                   df_.quantile(1-q), axis=1)
    for i, indicator in enumerate(macd_indicators):
        sns.distplot(df_[indicator], ax=axes[i])
    sns.despine()
    fig.tight_layout();

#### Chande Momentum Oscillator (CMO)

In [427]:
df = price_sample.loc['2013', ['close']]

In [429]:
df['CMO'] = talib.CMO(df.close, timeperiod=14)

In [431]:
ax = df.plot(figsize=(14, 4), rot=0, secondary_y=['CMO'], style=['-', '--'])

ax.set_xlabel('')
sns.despine()
plt.tight_layout()
plt.show()

In [433]:
data['CMO'] = (by_ticker
               .apply(lambda x: talib.CMO(x.close,
                                          timeperiod=14)))

In [435]:
sns.distplot(data.CMO);

### Money Flow Index

In [438]:
df = price_sample.loc['2013', ['high', 'low', 'close', 'volume']]

In [440]:
df['MFI'] = talib.MFI(df.high, 
                      df.low, 
                      df.close, 
                      df.volume, 
                      timeperiod=14)

In [442]:
axes = df[['close', 'volume', 'MFI']].plot(figsize=(14, 8),
                                           rot=0,
                                           subplots=True,
                                           title=['Close', 'Volume', 'MFI'],
                                           legend=False)
axes[-1].set_xlabel('')
sns.despine()
plt.tight_layout()
plt.show()

In [444]:
data['MFI'] = (by_ticker
               .apply(lambda x: talib.MFI(x.high,
                                          x.low,
                                          x.close,
                                          x.volume,
                                          timeperiod=14)))

In [450]:
with sns.axes_style('white'):
    sns.distplot(data.MFI)
    sns.despine()

#### Relative Strength Index

#### Relative Strength Index

In [459]:
df = price_sample.loc['2013', ['close']]

In [466]:
df['RSI'] = talib.RSI(df.close, timeperiod=14)

In [468]:
ax = df.plot(figsize=(14, 4), rot=0, secondary_y=['RSI'], style=['-', '--'])
ax.set_xlabel('')
sns.despine()
plt.tight_layout()
plt.show()

In [471]:
data['RSI'] = (by_ticker
               .apply(lambda x: talib.RSI(x.close,
                                          timeperiod=14)))

In [473]:
with sns.axes_style('white'):
    sns.distplot(data.RSI)
    sns.despine();

#### Stochastic RSI (STOCHRSI)

In [479]:
df = price_sample.loc['2013', ['close']]

In [481]:
fastk, fastd = talib.STOCHRSI(df.close,
                              timeperiod=14, 
                              fastk_period=14, 
                              fastd_period=3, 
                              fastd_matype=0)
df['fastk'] = fastk
df['fastd'] = fastd

In [483]:
ax = df.plot(figsize=(14, 4),
             rot=0,
             secondary_y=['fastk', 'fastd'], style=['-', '--'])
ax.set_xlabel('')
sns.despine()
plt.tight_layout()

In [485]:
data['STOCHRSI'] = (by_ticker.apply(lambda x: talib.STOCHRSI(x.close,
                                                             timeperiod=14,
                                                             fastk_period=14,
                                                             fastd_period=3,
                                                             fastd_matype=0)[0]))

#### Stochastic (STOCH)

In [494]:
df = price_sample.loc['2013', ['high', 'low', 'close']]

In [496]:
slowk, slowd = talib.STOCH(df.high,
                           df.low,
                           df.close,
                           fastk_period=14,
                           slowk_period=3,
                           slowk_matype=0,
                           slowd_period=3,
                           slowd_matype=0)
df['STOCH'] = slowd / slowk

In [498]:
ax = df[['close', 'STOCH']].plot(figsize=(14, 4),
                                 rot=0,
                                 secondary_y='STOCH', style=['-', '--'])
ax.set_xlabel('')
sns.despine()
plt.tight_layout()
plt.show()

In [500]:
def compute_stoch(x, fastk_period=14, slowk_period=3, 
                  slowk_matype=0, slowd_period=3, slowd_matype=0):
    slowk, slowd = talib.STOCH(x.high, x.low, x.close,
                           fastk_period=fastk_period,
                           slowk_period=slowk_period,
                           slowk_matype=slowk_matype,
                           slowd_period=slowd_period,
                           slowd_matype=slowd_matype)
    return slowd/slowk-1

In [None]:
data['STOCH'] = by_ticker.apply(compute_stoch)
data.loc[data.STOCH.abs() > 1e5, 'STOCH'] = np.nan

In [None]:
q = 0.005

with sns.axes_style('white'):
    sns.distplot(data.STOCH.clip(lower=data.STOCH.quantile(q),
                             upper=data.STOCH.quantile(1-q)));

    sns.despine();

#### Ultimate Oscillator (ULTOSC)Ultimate Oscillator (ULTOSC)

In [517]:
df = price_sample.loc['2013', ['high', 'low', 'close']]

In [519]:
df['ULTOSC'] = talib.ULTOSC(df.high,
                            df.low,
                            df.close,
                            timeperiod1=7,
                            timeperiod2=14,
                            timeperiod3=28)

In [521]:
ax = df[['close', 'ULTOSC']].plot(figsize=(14, 4),
                                  rot=0,
                                  secondary_y='ULTOSC', style=['-', '--'])
ax.set_xlabel('')
sns.despine()
plt.tight_layout()
plt.show()

In [523]:
def compute_ultosc(x, timeperiod1=7, timeperiod2=14, timeperiod3=28):
    return talib.ULTOSC(x.high,
                        x.low,
                        x.close,
                        timeperiod1=timeperiod1,
                        timeperiod2=timeperiod2,
                        timeperiod3=timeperiod3)

In [525]:
data['ULTOSC'] = by_ticker.apply(compute_ultosc)

In [527]:
with sns.axes_style('white'):
    sns.distplot(data.ULTOSC)
    sns.despine();

#### Williams' %R (WILLR)

In [530]:
df = price_sample.loc['2013', ['high', 'low', 'close']]

In [532]:
df['WILLR'] = talib.WILLR(df.high,
                          df.low,
                          df.close,
                          timeperiod=14)

In [534]:
ax = df[['close', 'WILLR']].plot(figsize=(14, 4),
                                 rot=0,
                                 secondary_y='WILLR', style=['-', '--'])
ax.set_xlabel('')
sns.despine()
plt.tight_layout()
plt.show()

In [536]:
data['WILLR'] = by_ticker.apply(lambda x: talib.WILLR(x.high, x.low, x.close, timeperiod=14))

In [538]:
with sns.axes_style('white'):
    sns.distplot(data.WILLR)
    sns.despine();

### Volume Indicators

#### Chaikin A/D Line

In [547]:
df = price_sample.loc['2013', ['high', 'low', 'close', 'volume']]

In [551]:
df['AD'] = talib.AD(df.high,
                    df.low,
                    df.close,
                    df.volume)

In [553]:
ax = df[['close', 'AD']].plot(figsize=(14, 4),
                              rot=0,
                              secondary_y='AD', style=['-', '--'])

ax.set_xlabel('')
sns.despine()
plt.tight_layout()

In [555]:
data['AD'] = by_ticker.apply(lambda x: talib.AD(x.high, x.low, x.close, x.volume)/x.volume.mean())

In [557]:
data.AD.replace((np.inf, -np.inf), np.nan).dropna().describe()

In [559]:
q = 0.005

AD = data.AD.replace((np.inf, -np.inf), np.nan).dropna()

with sns.axes_style('white'):
    sns.distplot(AD.clip(lower=AD.quantile(q),
                     upper=AD.quantile(1-q)));

    sns.despine();

#### Chaikin A/D Oscillator (ADOSC)

In [576]:
df = price_sample.loc['2013', ['high', 'low', 'close', 'volume']]

In [578]:
df['ADOSC'] = talib.ADOSC(df.high,
                          df.low,
                          df.close,
                          df.volume,
                          fastperiod=3,
                          slowperiod=10)

In [580]:
ax = df[['close', 'ADOSC']].plot(figsize=(14, 4),
                                 rot=0,
                                 secondary_y='ADOSC', style=['-', '--'])

ax.set_xlabel('')
sns.despine()
plt.tight_layout()

In [582]:
data['ADOSC'] = by_ticker.apply(lambda x: talib.ADOSC(x.high,
                                                      x.low,
                                                      x.close,
                                                      x.volume,
                                                      fastperiod=3,
                                                      slowperiod=10)/x.rolling(14).volume.mean())

In [584]:
q = 0.0001

with sns.axes_style('white'):
    sns.distplot(data.ADOSC.clip(lower=data.ADOSC.quantile(q),
                             upper=data.ADOSC.quantile(1-q)))
    sns.despine();

#### On Balance Volume (OBV)

In [587]:
df = price_sample.loc['2013', ['close', 'volume']]

In [589]:
df['OBV'] = talib.OBV(df.close,
                      df.volume)

In [591]:
ax = df[['close', 'OBV']].plot(figsize=(14, 4),
                               rot=0,
                               secondary_y='OBV', style=['-', '--'])
ax.set_xlabel('')
sns.despine()
plt.tight_layout()
plt.show()

In [593]:
data['OBV'] = by_ticker.apply(lambda x: talib.OBV(x.close,
                                                  x.volume)/x.expanding().volume.mean())

In [595]:
q = 0.0025

with sns.axes_style('white'):
    sns.distplot(data.OBV.clip(lower=data.OBV.quantile(q),
                               upper=data.OBV.quantile(1-q)))
    sns.despine()

### Volatility Indicators

#### ATR

In [None]:
df = price_sample.loc['2013', ['high', 'low', 'close']]

In [None]:
df['ATR'] = talib.ATR(df.high,
                      df.low,
                      df.close,
                      timeperiod=14)

In [None]:
ax = df[['close', 'ATR']].plot(figsize=(14, 4),
                          rot=0,
                          secondary_y='ATR', style=['-', '--'])

ax.set_xlabel('')
sns.despine()
plt.tight_layout()
plt.show()

#### Computing Normalized Version of ATR using Rolling Mean of Price

In [616]:
data['ATR'] = by_ticker.apply(lambda x: talib.ATR(x.high,
                                                  x.low,
                                                  x.close,
                                                  timeperiod=14)/x.rolling(14).close.mean())

In [618]:
q = 0.001

with sns.axes_style('white'):
    sns.distplot(data.ATR.clip(upper=data.ATR.quantile(1-q)))
    sns.despine()

#### NATR

In [621]:
df = price_sample.loc['2013', ['high', 'low', 'close']]

In [623]:
df['NATR'] = talib.NATR(df.high,
                        df.low,
                        df.close,
                        timeperiod=14)

In [625]:
ax = df[['close', 'NATR']].plot(figsize=(14, 4),
                           rot=0,
                           secondary_y='NATR', style=['-', '--'])

ax.set_xlabel('')
sns.despine()
plt.tight_layout()

In [627]:
data['NATR'] = by_ticker.apply(lambda x: talib.NATR(x.high,
                                                    x.low,
                                                    x.close)

In [629]:
q = 0.001
sns.distplot(data.NATR.clip(upper=data.NATR.quantile(1-q)));

### Rolling Factor Betas

In [632]:
factor_data = (web.DataReader('F-F_Research_Data_5_Factors_2x3_daily', 'famafrench', 
                              start=2005)[0].rename(columns={'Mkt-RF': 'MARKET'}))

factor_data.index.names = ['date']

In [634]:
factors = factor_data.columns[:-1]

factors

In [636]:
t = 1

ret = f'ret_{t:02}'

windows = [21, 63, 252]

for window in windows:
    print(window)
    betas = []
    for ticker, df in data.groupby('ticker', group_keys=False):
        model_data = df[[ret]].merge(factor_data, on='date').dropna()
        model_data[ret] -= model_data.RF

        rolling_ols = RollingOLS(endog=model_data[ret], 
                                 exog=sm.add_constant(model_data[factors]), window=window)
        factor_model = rolling_ols.fit(params_only=True).params.rename(columns={'const':'ALPHA'})
        result = factor_model.assign(ticker=ticker).set_index('ticker', append=True).swaplevel()
        betas.append(result)
    betas = pd.concat(betas).rename(columns=lambda x: f'{x}_{window:02}')
    data = data.join(betas)

### Size proxy

### Size Proxy

In [649]:
data['size_factor'] = by_ticker.close.apply(lambda x: x.fillna(method='bfill').div(x.iloc[0]))

In [651]:
data['size_proxy'] = data.marketcap.mul(data.size_factor).div(1e6)

### Persist Results

In [654]:
data = (data
        .drop(['open', 'high', 'low', 'close', 'volume', 'marketcap'], axis=1)
        .replace((np.inf, -np.inf), np.nan))

In [656]:
data.dropna(how='all').info(null_counts=True)

In [658]:
with pd.HDFStore('data.h5') as store:
    store.put('factors/common', data)