# 101 Formulaic Alphas

### Loading Libraries

In [3]:
# Numerical Computing
import numpy as np

# Data Manipulation
import pandas as pd

# Data Visualization
import seaborn as sns
import matplotlib.pyplot as plt

# Warnings
import warnings

# Scikit-Learn
from sklearn.feature_selection import mutual_info_regression

# SciPy
from scipy.stats import spearmanr

# Technical Analysis
from talib import WMA

In [4]:
idx= pd.IndexSlice

sns.set_style('whitegrid')

warnings.filterwarnings('ignore')

### Functions

#### Cross-section

In [8]:
def rank(df):
    """Return the cross-sectional percentile rank

     Args:
         :param df: tickers in columns, sorted dates in rows.

     Returns:
         pd.DataFrame: the ranked values
     """
    return df.rank(axis=1, pct=True)

In [10]:
def scale(df):
    """
    Scaling time serie.
    :param df: a pandas DataFrame.
    :param k: scaling factor.
    :return: a pandas DataFrame rescaled df such that sum(abs(df)) = k
    """
    return df.div(df.abs().sum(axis=1), axis=0)

#### Operators

In [13]:
def log(df):
    return np.log1p(df)

In [15]:
def sign(df):
    return np.sign(df)

In [17]:
def power(df, exp):
    return df.pow(exp)

### Time Series

#### Pandas Implementation

In [20]:
def ts_lag(df: pd.DataFrame, t: int = 1) -> pd.DataFrame:
    """Return the lagged values t periods ago.

    Args:
        :param df: tickers in columns, sorted dates in rows.
        :param t: lag

    Returns:
        pd.DataFrame: the lagged values
    """
    return df.shift(t)

In [22]:
def ts_delta(df, period=1):
    """
    Wrapper function to estimate difference.
    :param df: a pandas DataFrame.
    :param period: the difference grade.
    :return: a pandas DataFrame with today’s value minus the value 'period' days ago.
    """
    return df.diff(period)

In [24]:
def ts_sum(df: pd.DataFrame, window: int = 10) -> pd.DataFrame:
    """Computes the rolling ts_sum for the given window size.

    Args:
        df (pd.DataFrame): tickers in columns, dates in rows.
        window      (int): size of rolling window.

    Returns:
        pd.DataFrame: the ts_sum over the last 'window' days.
    """
    return df.rolling(window).sum()

In [26]:
def ts_mean(df, window=10):
    """Computes the rolling mean for the given window size.

    Args:
        df (pd.DataFrame): tickers in columns, dates in rows.
        window      (int): size of rolling window.

    Returns:
        pd.DataFrame: the mean over the last 'window' days.
    """
    return df.rolling(window).mean()

In [28]:
def ts_weighted_mean(df, period=10):
    """
    Linear weighted moving average implementation.
    :param df: a pandas DataFrame.
    :param period: the LWMA period
    :return: a pandas DataFrame with the LWMA.
    """
    return (df.apply(lambda x: WMA(x, timeperiod=period)))

In [30]:
def ts_std(df, window=10):
    """
    Wrapper function to estimate rolling standard deviation.
    :param df: a pandas DataFrame.
    :param window: the rolling window.
    :return: a pandas DataFrame with the time-series min over the past 'window' days.
    """
    return (df
            .rolling(window)
            .std())

In [32]:
def ts_rank(df, window=10):
    """
    Wrapper function to estimate rolling rank.
    :param df: a pandas DataFrame.
    :param window: the rolling window.
    :return: a pandas DataFrame with the time-series rank over the past window days.
    """
    return (df
            .rolling(window)
            .apply(lambda x: x.rank().iloc[-1]))

In [34]:
def ts_product(df, window=10):
    """
    Wrapper function to estimate rolling ts_product.
    :param df: a pandas DataFrame.
    :param window: the rolling window.
    :return: a pandas DataFrame with the time-series ts_product over the past 'window' days.
    """
    return (df
            .rolling(window)
            .apply(np.prod))

In [36]:
def ts_min(df, window=10):
    """
    Wrapper function to estimate rolling min.
    :param df: a pandas DataFrame.
    :param window: the rolling window.
    :return: a pandas DataFrame with the time-series min over the past 'window' days.
    """
    return df.rolling(window).min()

In [38]:
def ts_max(df, window=10):
    """
    Wrapper function to estimate rolling min.
    :param df: a pandas DataFrame.
    :param window: the rolling window.
    :return: a pandas DataFrame with the time-series max over the past 'window' days.
    """
    return df.rolling(window).max()

In [40]:
def ts_argmax(df, window=10):
    """
    Wrapper function to estimate which day ts_max(df, window) occurred on
    :param df: a pandas DataFrame.
    :param window: the rolling window.
    :return: well.. that :)
    """
    return df.rolling(window).apply(np.argmax).add(1)

In [42]:
def ts_argmin(df, window=10):
    """
    Wrapper function to estimate which day ts_min(df, window) occurred on
    :param df: a pandas DataFrame.
    :param window: the rolling window.
    :return: well.. that :)
    """
    return (df.rolling(window)
            .apply(np.argmin)
            .add(1))

In [44]:
def ts_corr(x, y, window=10):
    """
    Wrapper function to estimate rolling correlations.
    :param x, y: pandas DataFrames.
    :param window: the rolling window.
    :return: a pandas DataFrame with the time-series min over the past 'window' days.
    """
    return x.rolling(window).corr(y)

In [46]:
def ts_cov(x, y, window=10):
    """
    Wrapper function to estimate rolling covariance.
    :param df: a pandas DataFrame.
    :param window: the rolling window.
    :return: a pandas DataFrame with the time-series min over the past 'window' days.
    """
    return x.rolling(window).cov(y)

### Loading Data

#### 500 Most-Traded Stocks

In [122]:
ohlcv = ['open', 'high', 'low', 'close', 'volume']

data = (pd.read_hdf('data.h5', 'data/top500')
        .loc[:, ohlcv + ['ret_01', 'sector', 'ret_fwd']]
        .rename(columns={'ret_01': 'returns'})
        .sort_index())

In [124]:
adv20 = data.groupby('ticker').rolling(20).volume.mean().reset_index(0, drop=True)

In [126]:
data = data.assign(adv20=adv20)

In [128]:
data = data.join(data.groupby('date')[ohlcv].rank(axis=1, pct=True), rsuffix='_rank')

In [130]:
data.info(null_counts=True)

In [132]:
data.to_hdf('factors.h5', 'data')

### Input Data

In [135]:
o = data.open.unstack('ticker')
h = data.high.unstack('ticker')
l = data.low.unstack('ticker')
c = data.close.unstack('ticker')
v = data.volume.unstack('ticker')

vwap = o.add(h).add(l).add(c).div(4)
adv20 = v.rolling(20).mean()

r = data.returns.unstack('ticker')

### Evaluate Alphas

In [142]:
alphas = data[['returns', 'ret_fwd']].copy()
mi,ic = {}, {}

In [144]:
def get_mutual_info_score(returns, alpha, n=100000):
    df = pd.DataFrame({'y': returns, 'alpha': alpha}).dropna().sample(n=n)
    return mutual_info_regression(y=df.y, X=df[['alpha']])[0]

### Alpha 001

In [147]:
def alpha001(c, r):
    """(rank(ts_argmax(power(((returns < 0)
        ? ts_std(returns, 20)
        : close), 2.), 5)) -0.5)"""
    c[r < 0] = ts_std(r, 20)
    return (rank(ts_argmax(power(c, 2), 5)).mul(-.5)
            .stack().swaplevel())

In [149]:
alpha = 1

In [153]:
%%time
alphas[f'{alpha:03}'] = alpha001(c, r)

In [155]:
alphas.info()

In [157]:
alphas[f'{alpha:03}'].to_hdf('alphas.h5', f'alphas/{alpha:03}')

In [159]:
sns.distplot(alphas[f'{alpha:03}']);
plt.show()

In [161]:
g = sns.jointplot(x=f'{alpha:03}', y='ret_fwd', data=alphas)

In [163]:
mi[1] = get_mutual_info_score(alphas.ret_fwd, alphas[f'{alpha:03}'])
mi[1]

### Alpha 002

In [166]:
def alpha002(o, c, v):
    """(-1 * ts_corr(rank(ts_delta(log(volume), 2)), rank(((close - open) / open)), 6))"""
    s1 = rank(ts_delta(log(v), 2))
    s2 = rank((c / o) - 1)
    alpha = -ts_corr(s1, s2, 6)
    return alpha.stack('ticker').swaplevel().replace([-np.inf, np.inf], np.nan)

In [168]:
alpha = 2

In [170]:
%%time
alphas[f'{alpha:03}'] = alpha002(o, c, v)

In [172]:
alphas[f'{alpha:03}'].to_hdf('alphas.h5', f'alphas/{alpha:03}')

In [174]:
sns.distplot(alphas[f'{alpha:03}']);

In [176]:
g = sns.jointplot(x=f'{alpha:03}', y='ret_fwd', data=alphas)

In [178]:
mi[alpha] = get_mutual_info_score(alphas.ret_fwd, alphas[f'{alpha:03}'])
mi[2]

### Alpha 003

In [181]:
def alpha003(o, v):
    """(-1 * ts_corr(rank(open), rank(volume), 10))"""

    return (-ts_corr(rank(o), rank(v), 10)
            .stack('ticker')
            .swaplevel()
            .replace([-np.inf, np.inf], np.nan))

In [183]:
alpha = 3

In [185]:
%%time
alphas[f'{alpha:03}'] = alpha003(o, v)

In [187]:
alphas[f'{alpha:03}'].to_hdf('alphas.h5', f'alphas/{alpha:03}')

In [189]:
sns.distplot(alphas[f'{alpha:03}'].clip(lower=-1));

In [191]:
g = sns.jointplot(x=f'{alpha:03}', y='ret_fwd', data=alphas);

In [193]:
mi[alpha] = get_mutual_info_score(alphas.ret_fwd, alphas[f'{alpha:03}'])
mi[alpha]

### Alpha 004

In [196]:
def alpha004(l):
    """(-1 * Ts_Rank(rank(low), 9))"""
    return (-ts_rank(rank(l), 9)
            .stack('ticker')
            .swaplevel())

In [198]:
alpha = 4

In [253]:
%%time
alphas[f'{alpha:03}'] = alpha004(l)

In [255]:
alphas[f'{alpha:03}'].to_hdf('alphas.h5', f'alphas/{alpha:03}')

In [257]:
sns.distplot(alphas[f'{alpha:03}']);

In [259]:
g = sns.jointplot(x=f'{alpha:03}', y='ret_fwd', data=alphas);

In [261]:
mi[alpha] = get_mutual_info_score(alphas.ret_fwd, alphas[f'{alpha:03}'])
mi[alpha]

### Alpha 005

In [263]:
def alpha005(o, vwap, c):
    """(rank((open - ts_mean(vwap, 10))) * (-1 * abs(rank((close - vwap)))))"""
    return (rank(o.sub(ts_mean(vwap, 10)))
            .mul(rank(c.sub(vwap)).mul(-1).abs())
            .stack('ticker')
            .swaplevel())

In [265]:
alpha = 5

In [267]:
%%time
alphas[f'{alpha:03}'] = alpha005(o, vwap, c)

In [269]:
alphas[f'{alpha:03}'].to_hdf('alphas.h5', f'alphas/{alpha:03}')

In [271]:
sns.distplot(alphas[f'{alpha:03}']);

In [273]:
g = sns.jointplot(x=f'{alpha:03}', y='ret_fwd', data=alphas);

In [275]:
mi[alpha] = get_mutual_info_score(alphas.ret_fwd, alphas[f'{alpha:03}'])
mi[alpha]

### Alpha 006

In [279]:
def alpha006(o, v):
    """(-ts_corr(open, volume, 10))"""
    return (-ts_corr(o, v, 10)
            .stack('ticker')
            .swaplevel())

In [281]:
alpha = 6

In [283]:
%%time
alphas[f'{alpha:03}'] = alpha006(o, v)

In [285]:
alphas[f'{alpha:03}'].to_hdf('alphas.h5', f'alphas/{alpha:03}')

In [287]:
sns.distplot(alphas[f'{alpha:03}']);

In [289]:
g = sns.jointplot(x=f'{alpha:03}', y='ret_fwd', data=alphas);

In [291]:
mi[alpha] = get_mutual_info_score(alphas.ret_fwd, alphas[f'{alpha:03}'])

In [293]:
mi[alpha]

### Alpha 007

In [296]:
def alpha007(c, v, adv20):
    """(adv20 < volume) 
        ? ((-ts_rank(abs(ts_delta(close, 7)), 60)) * sign(ts_delta(close, 7))) 
        : -1
    """
    
    delta7 = ts_delta(c, 7)
    return (-ts_rank(abs(delta7), 60)
            .mul(sign(delta7))
            .where(adv20<v, -1)
            .stack('ticker')
            .swaplevel())

In [298]:
alpha = 7

In [300]:
%%time
alphas[f'{alpha:03}'] = alpha007(c, v, adv20)

In [302]:
alphas[f'{alpha:03}'].to_hdf('alphas.h5', f'alphas/{alpha:03}')

In [304]:
ax = sns.distplot(alphas[f'{alpha:03}'], kde=False)
ax.set_yscale('log')
ax.set_ylabel('Frequency (log scale)')
plt.tight_layout();
plt.show()

In [306]:
g = sns.jointplot(x=f'{alpha:03}', y='ret_fwd', data=alphas);

In [308]:
mi[alpha] = get_mutual_info_score(alphas.ret_fwd, alphas[f'{alpha:03}'])

In [310]:
mi[alpha]

### Alpha 008

In [None]:
def alpha008(o, r):
    """-rank(((ts_sum(open, 5) * ts_sum(returns, 5)) - 
        ts_lag((ts_sum(open, 5) * ts_sum(returns, 5)),10)))
    """
    return (-(rank(((ts_sum(o, 5) * ts_sum(r, 5)) -
                       ts_lag((ts_sum(o, 5) * ts_sum(r, 5)), 10))))
           .stack('ticker')
            .swaplevel())

In [None]:
alpha = 8

In [313]:
%%time
alphas[f'{alpha:03}'] = alpha008(o, r)

In [315]:
alphas[f'{alpha:03}'].to_hdf('alphas.h5', f'alphas/{alpha:03}')

In [317]:
sns.distplot(alphas[f'{alpha:03}']);

In [319]:
g = sns.jointplot(x=f'{alpha:03}', y='ret_fwd', data=alphas);

In [321]:
mi[alpha] = get_mutual_info_score(alphas.ret_fwd, alphas[f'{alpha:03}'])

In [323]:
mi[alpha]

### Alpha 009

In [326]:
def alpha009(c):
    """(0 < ts_min(ts_delta(close, 1), 5)) ? ts_delta(close, 1) 
    : ((ts_max(ts_delta(close, 1), 5) < 0) 
    ? ts_delta(close, 1) : (-1 * ts_delta(close, 1)))
    """
    close_diff = ts_delta(c, 1)
    alpha = close_diff.where(ts_min(close_diff, 5) > 0,
                             close_diff.where(ts_max(close_diff, 5) < 0,
                                              -close_diff))
    return (alpha
            .stack('ticker')
            .swaplevel())

In [328]:
alpha = 9

In [330]:
%%time
alphas[f'{alpha:03}'] = alpha009(c)

In [332]:
alphas[f'{alpha:03}'].to_hdf('alphas.h5', f'alphas/{alpha:03}')

In [334]:
q = 0.01

sns.distplot(alphas[f'{alpha:03}'].clip(lower=alphas[f'{alpha:03}'].quantile(q),
                                        upper=alphas[f'{alpha:03}'].quantile(1-q)));

In [336]:
g = sns.jointplot(x=f'{alpha:03}', y='ret_fwd', data=alphas);

In [338]:
mi[alpha] = get_mutual_info_score(alphas.ret_fwd, alphas[f'{alpha:03}'])

In [340]:
mi[alpha]

In [342]:
pd.Series(mi)

### Alpha 010

In [345]:
def alpha010(c):
    """rank(((0 < ts_min(ts_delta(close, 1), 4)) 
        ? ts_delta(close, 1) 
        : ((ts_max(ts_delta(close, 1), 4) < 0)
            ? ts_delta(close, 1) 
            : (-1 * ts_delta(close, 1)))))
    """
    close_diff = ts_delta(c, 1)
    alpha = close_diff.where(ts_min(close_diff, 4) > 0,
                             close_diff.where(ts_min(close_diff, 4) > 0,
                                              -close_diff))

    return (rank(alpha)
            .stack('ticker')
            .swaplevel())

In [347]:
alpha = 10

In [349]:
%%time
alphas[f'{alpha:03}'] = alpha010(c)

In [351]:
alphas[f'{alpha:03}'].to_hdf('alphas.h5', f'alphas/{alpha:03}')

In [353]:
sns.distplot(alphas[f'{alpha:03}']);

In [355]:
g = sns.jointplot(x=f'{alpha:03}', y='ret_fwd', data=alphas);

In [357]:
mi[alpha] = get_mutual_info_score(alphas.ret_fwd, alphas[f'{alpha:03}'])

In [362]:
mi[alpha]

In [364]:
pd.Series(mi).to_csv('mi.csv')

### Alpha 011

In [410]:
def alpha011(c, vwap, v):
    """(rank(ts_max((vwap - close), 3)) + 
        rank(ts_min(vwap - close), 3)) * 
        rank(ts_delta(volume, 3))
        """
    return (rank(ts_max(vwap.sub(c), 3))
            .add(rank(ts_min(vwap.sub(c), 3)))
            .mul(rank(ts_delta(v, 3)))
            .stack('ticker')
            .swaplevel())

In [412]:
alpha = 11

In [414]:
%%time
alphas[f'{alpha:03}'] = alpha011(c, vwap, v)

In [416]:
alphas[f'{alpha:03}'].to_hdf('alphas.h5', f'alphas/{alpha:03}')

In [418]:
sns.distplot(alphas[f'{alpha:03}']);

In [420]:
g = sns.jointplot(x=f'{alpha:03}', y='ret_fwd', data=alphas);

In [422]:
mi[alpha] = get_mutual_info_score(alphas.ret_fwd, alphas[f'{alpha:03}'])

In [424]:
mi[alpha]

### Alpha 012

In [427]:
def alpha012(v, c):
    """(sign(ts_delta(volume, 1)) * 
            (-1 * ts_delta(close, 1)))
        """
    return (sign(ts_delta(v, 1)).mul(-ts_delta(c, 1))
            .stack('ticker')
            .swaplevel())

In [429]:
alpha = 12

In [431]:
%%time
alphas[f'{alpha:03}'] = alpha012(v, c)

In [433]:
alphas[f'{alpha:03}'].to_hdf('alphas.h5', f'alphas/{alpha:03}')

In [435]:
q = 0.01

sns.distplot(alphas[f'{alpha:03}'].clip(lower=alphas[f'{alpha:03}'].quantile(q),
                                        upper=alphas[f'{alpha:03}'].quantile(1-q)));

In [437]:
g = sns.jointplot(x=f'{alpha:03}', y='ret_fwd', data=alphas);

In [439]:
mi[alpha] = get_mutual_info_score(alphas.ret_fwd, alphas[f'{alpha:03}'])

In [441]:
mi[alpha]

### Alpha 013

In [444]:
def alpha013(c, v):
    """-rank(ts_cov(rank(close), rank(volume), 5))"""
    return (-rank(ts_cov(rank(c), rank(v), 5))
            .stack('ticker')
            .swaplevel())

In [446]:
alpha = 13

In [448]:
%%time
alphas[f'{alpha:03}'] = alpha013(c, v)

In [450]:
alphas[f'{alpha:03}'].to_hdf('alphas.h5', f'alphas/{alpha:03}')

In [452]:
sns.distplot(alphas[f'{alpha:03}']);

In [454]:
g = sns.jointplot(x=f'{alpha:03}', y='ret_fwd', data=alphas);

In [456]:
mi[alpha] = get_mutual_info_score(alphas.ret_fwd, alphas[f'{alpha:03}'])

In [458]:
mi[alpha]

In [460]:
pd.Series(mi).to_csv('mi.csv')

### Alpha 014

In [467]:
def alpha014(o, v, r):
    """
    (-rank(ts_delta(returns, 3))) * ts_corr(open, volume, 10))
    """

    alpha = -rank(ts_delta(r, 3)).mul(ts_corr(o, v, 10)
                                      .replace([-np.inf,
                                                np.inf],
                                               np.nan))
    return (alpha
            .stack('ticker')
            .swaplevel())

In [469]:
alpha = 14

In [473]:
%%time
alphas[f'{alpha:03}'] = alpha014(o, v, r)

In [475]:
alphas[f'{alpha:03}'].to_hdf('alphas.h5', f'alphas/{alpha:03}')

In [477]:
sns.distplot(alphas[f'{alpha:03}']);

In [479]:
g = sns.jointplot(x=f'{alpha:03}', y='ret_fwd', data=alphas);

In [481]:
mi[alpha] = get_mutual_info_score(alphas.ret_fwd, alphas[f'{alpha:03}'])

In [483]:
mi[alpha]

### Alpha 015

In [486]:
def alpha015(h, v):
    """(-1 * ts_sum(rank(ts_corr(rank(high), rank(volume), 3)), 3))"""
    alpha = (-ts_sum(rank(ts_corr(rank(h), rank(v), 3)
                          .replace([-np.inf, np.inf], np.nan)), 3))
    return (alpha
            .stack('ticker')
            .swaplevel())

In [488]:
alpha = 15

In [490]:
%%time
alphas[f'{alpha:03}'] = alpha015(h, v)

In [492]:
alphas[f'{alpha:03}'].to_hdf('alphas.h5', f'alphas/{alpha:03}')

In [494]:
sns.distplot(alphas[f'{alpha:03}']);

In [496]:
g = sns.jointplot(x=f'{alpha:03}', y='ret_fwd', data=alphas);

In [508]:
mi[alpha] = get_mutual_info_score(alphas.ret_fwd, alphas[f'{alpha:03}'])

In [510]:
mi[alpha]

### Alpha 016

In [515]:
def alpha016(h, v):
    """(-1 * rank(ts_cov(rank(high), rank(volume), 5)))"""
    return (-rank(ts_cov(rank(h), rank(v), 5))
            .stack('ticker')
            .swaplevel())

In [517]:
alpha = 16

In [519]:
%%time
alphas[f'{alpha:03}'] = alpha016(h, v)

In [521]:
alphas[f'{alpha:03}'].to_hdf('alphas.h5', f'alphas/{alpha:03}')

In [523]:
sns.distplot(alphas[f'{alpha:03}']);

In [525]:
g = sns.jointplot(x=f'{alpha:03}', y='ret_fwd', data=alphas);

In [527]:
mi[alpha] = get_mutual_info_score(alphas.ret_fwd, alphas[f'{alpha:03}'])

In [529]:
mi[alpha]

In [587]:
pd.Series(mi).to_csv('mi.csv')

### Alpha 017

In [8]:
def alpha017(c, v):
    """(((-1 * rank(ts_rank(close, 10))) * rank(ts_delta(ts_delta(close, 1), 1))) *rank(ts_rank((volume / adv20), 5)))
        """
    adv20 = ts_mean(v, 20)
    return (-rank(ts_rank(c, 10))
            .mul(rank(ts_delta(ts_delta(c, 1), 1)))
            .mul(rank(ts_rank(v.div(adv20), 5)))
            .stack('ticker')
            .swaplevel())

In [10]:
alpha = 17

In [12]:
%%time
alphas[f'{alpha:03}'] = alpha017(c, v)

In [14]:
alphas[f'{alpha:03}'].to_hdf('alphas.h5', f'alphas/{alpha:03}')

In [16]:
sns.distplot(alphas[f'{alpha:03}']);

In [18]:
g = sns.jointplot(x=f'{alpha:03}', y='ret_fwd', data=alphas);

In [20]:
mi[alpha] = get_mutual_info_score(alphas.ret_fwd, alphas[f'{alpha:03}'])

In [22]:
mi[alpha]

### Alpha 18

In [25]:
def alpha018(o, c):
    """-rank((ts_std(abs((close - open)), 5) + (close - open)) +
            ts_corr(close, open,10))
    """
    return (-rank(ts_std(c.sub(o).abs(), 5)
                  .add(c.sub(o))
                  .add(ts_corr(c, o, 10)
                       .replace([-np.inf,
                                 np.inf],
                                np.nan)))
            .stack('ticker')
            .swaplevel())

In [27]:
alpha = 18

In [31]:
%%time
alphas[f'{alpha:03}'] = alpha018(o, c)

In [33]:
alphas[f'{alpha:03}'].to_hdf('alphas.h5', f'alphas/{alpha:03}')

In [35]:
sns.distplot(alphas[f'{alpha:03}']);

In [37]:
g = sns.jointplot(x=f'{alpha:03}', y='ret_fwd', data=alphas);

In [39]:
mi[alpha] = get_mutual_info_score(alphas.ret_fwd, alphas[f'{alpha:03}'])

In [41]:
mi[alpha]

### Alpha 019

In [44]:
def alpha019(c, r):
    """((-1 * sign(((close - ts_lag(close, 7)) + ts_delta(close, 7)))) * 
    (1 + rank((1 + ts_sum(returns,250)))))
    """
    return (-sign(ts_delta(c, 7) + ts_delta(c, 7))
            .mul(1 + rank(1 + ts_sum(r, 250)))
            .stack('ticker')
            .swaplevel())

In [48]:
alpha = 19

In [50]:
%%time
alphas[f'{alpha:03}'] = alpha019(c, r)

In [52]:
alphas[f'{alpha:03}'].to_hdf('alphas.h5', f'alphas/{alpha:03}')

In [54]:
sns.distplot(alphas[f'{alpha:03}']);

In [56]:
g = sns.jointplot(x=f'{alpha:03}', y='ret_fwd', data=alphas);

In [58]:
mi[alpha] = get_mutual_info_score(alphas.ret_fwd, alphas[f'{alpha:03}'])

In [60]:
mi[alpha]

In [62]:
pd.Series(mi).to_csv('mi.csv')

### Alpha 020

In [65]:
def alpha020(o, h, l, c):
    """-rank(open - ts_lag(high, 1)) * 
        rank(open - ts_lag(close, 1)) * 
        rank(open -ts_lag(low, 1))"""
    return (rank(o - ts_lag(h, 1))
            .mul(rank(o - ts_lag(c, 1)))
            .mul(rank(o - ts_lag(l, 1)))
            .mul(-1)
            .stack('ticker')
            .swaplevel())

In [67]:
alpha = 20

In [69]:
%%time
alphas[f'{alpha:03}'] = alpha020(o, h, l, c)

In [71]:
alphas[f'{alpha:03}'].to_hdf('alphas.h5', f'alphas/{alpha:03}')

In [73]:
sns.distplot(alphas[f'{alpha:03}']);

In [75]:
g = sns.jointplot(x=f'{alpha:03}', y='ret_fwd', data=alphas);

In [79]:
mi[alpha] = get_mutual_info_score(alphas.ret_fwd, alphas[f'{alpha:03}'])

In [77]:
mi[alpha]

### Alpha 021

In [47]:
def alpha021(c, v):
    """ts_mean(close, 8) + ts_std(close, 8) < ts_mean(close, 2)
        ? -1
        : (ts_mean(close,2) < ts_mean(close, 8) - ts_std(close, 8)
            ? 1
            : (volume / adv20 < 1
                ? -1
                : 1))
    """
    sma2 = ts_mean(c, 2)
    sma8 = ts_mean(c, 8)
    std8 = ts_std(c, 8)

    cond_1 = sma8.add(std8) < sma2
    cond_2 = sma8.add(std8) > sma2
    cond_3 = v.div(ts_mean(v, 20)) < 1

    val = np.ones_like(c)
    alpha = pd.DataFrame(np.select(condlist=[cond_1, cond_2, cond_3],
                                   choicelist=[-1, 1, -1], default=1),
                         index=c.index,
                         columns=c.columns)

    return (alpha
            .stack('ticker')
            .swaplevel())

In [49]:
alpha = 21

In [51]:
# %%time
# alphas[f'{alpha:03}'] = alpha021(c, v)

In [53]:
# alphas[f'{alpha:03}'].to_hdf('alphas.h5', f'alphas/{alpha:03}')

In [55]:
# alphas[f'{alpha:03}'].value_counts()

In [57]:
# g = sns.jointplot(x=f'{alpha:03}', y='ret_fwd', data=alphas);

In [None]:
# mi[alpha] = get_mutual_info_score(alphas.ret_fwd, alphas[f'{alpha:03}'])

In [None]:
# mi[alpha]

### Alpha 022

In [60]:
def alpha022(h, c, v):
    """-(ts_delta(ts_corr(high, volume, 5), 5) * 
        rank(ts_std(close, 20)))
    """

    return (ts_delta(ts_corr(h, v, 5)
                     .replace([-np.inf,
                               np.inf],
                              np.nan), 5)
            .mul(rank(ts_std(c, 20)))
            .mul(-1)
            .stack('ticker')
            .swaplevel())

In [62]:
alpha = 22

In [64]:
# %%time
# alphas[f'{alpha:03}'] = alpha022(h, c, v)

In [66]:
# alphas[f'{alpha:03}'].to_hdf('alphas.h5', f'alphas/{alpha:03}')

In [68]:
# g = sns.jointplot(x=f'{alpha:03}', y='ret_fwd', data=alphas);

In [70]:
# mi[alpha] = get_mutual_info_score(alphas.ret_fwd, alphas[f'{alpha:03}'])

In [72]:
# mi[alpha]

In [74]:
# pd.Series(mi).to_csv('mi.csv')

### Alpha 023

In [82]:
def alpha023(h, c):
    """((ts_mean(high, 20) < high)
            ? (-1 * ts_delta(high, 2))
            : 0
        """

    return (ts_delta(h, 2)
            .mul(-1)
            .where(ts_mean(h, 20) < h, 0)
            .stack('ticker')
            .swaplevel())

In [85]:
alpha = 23

In [87]:
# %%time
# alphas[f'{alpha:03}'] = alpha023(h, c)

In [None]:
alphas[f'{alpha:03}'].to_hdf('alphas.h5', f'alphas/{alpha:03}')

In [102]:
# q = 0.025

# sns.distplot(alphas[f'{alpha:03}'].clip(lower=alphas[f'{alpha:03}'].quantile(q),
#                                         upper=alphas[f'{alpha:03}'].quantile(1-q)));

In [104]:
# g = sns.jointplot(x=f'{alpha:03}', y='ret_fwd', data=alphas);

In [106]:
# mi[alpha] = get_mutual_info_score(alphas.ret_fwd, alphas[f'{alpha:03}'])

In [108]:
# mi[alpha]

### Alpha 024

In [111]:
def alpha024(c):
    """((((ts_delta((ts_mean(close, 100)), 100) / ts_lag(close, 100)) <= 0.05)  
        ? (-1 * (close - ts_min(close, 100))) 
        : (-1 * ts_delta(close, 3)))
    """
    cond = ts_delta(ts_mean(c, 100), 100) / ts_lag(c, 100) <= 0.05

    return (c.sub(ts_min(c, 100)).mul(-1).where(cond, -ts_delta(c, 3))
            .stack('ticker')
            .swaplevel())

In [113]:
alpha = 24

In [115]:
# %%time
# alphas[f'{alpha:03}'] = alpha024(c)

In [117]:
# alphas[f'{alpha:03}'].to_hdf('alphas.h5', f'alphas/{alpha:03}')

In [119]:
# q = 0.01

# sns.distplot(alphas[f'{alpha:03}'].clip(lower=alphas[f'{alpha:03}'].quantile(q),
#                                         upper=alphas[f'{alpha:03}'].quantile(1-q)));

In [121]:
# g = sns.jointplot(x=f'{alpha:03}', y='ret_fwd', data=alphas);

In [123]:
# mi[alpha] = get_mutual_info_score(alphas.ret_fwd, alphas[f'{alpha:03}'])

In [139]:
# mi[alpha]

### Alpha 025

In [142]:
def alpha025(h, c, r, vwap, adv20):
    """rank((-1 * returns) * adv20 * vwap * (high - close))"""
    return (rank(-r.mul(adv20)
                 .mul(vwap)
                 .mul(h.sub(c)))
            .stack('ticker')
            .swaplevel())

In [144]:
alpha = 25

In [146]:
# %%time
# alphas[f'{alpha:03}'] = alpha025(h, c, r, vwap, adv20)

In [148]:
# alphas[f'{alpha:03}'].to_hdf('alphas.h5', f'alphas/{alpha:03}')

In [150]:
# sns.distplot(alphas[f'{alpha:03}']);

In [152]:
# g = sns.jointplot(x=f'{alpha:03}', y='ret_fwd', data=alphas);

In [154]:
# mi[alpha] = get_mutual_info_score(alphas.ret_fwd, alphas[f'{alpha:03}'])

In [156]:
# mi[alpha]

In [160]:
# pd.Series(mi).to_csv('mi.csv')

### Alpha 026

In [185]:
def alpha026(h, v):
    """(-1 * ts_max(ts_corr(ts_rank(volume, 5), ts_rank(high, 5), 5), 3))"""
    return (ts_max(ts_corr(ts_rank(v, 5), 
                           ts_rank(h, 5), 5)
                   .replace([-np.inf, np.inf], np.nan), 3)
            .mul(-1)
            .stack('ticker')
            .swaplevel())

In [187]:
alpha = 26

In [189]:
# %%time
# alphas[f'{alpha:03}'] = alpha026(h, v)

In [191]:
# alphas[f'{alpha:03}'].to_hdf('alphas.h5', f'alphas/{alpha:03}')

In [193]:
# sns.distplot(alphas[f'{alpha:03}']);

In [195]:
# g = sns.jointplot(x=f'{alpha:03}', y='ret_fwd', data=alphas);

In [197]:
# mi[alpha] = get_mutual_info_score(alphas.ret_fwd, alphas[f'{alpha:03}'])

In [199]:
# mi[alpha]

### Alpha 027

In [202]:
def alpha027(v, vwap):
    """((0.5 < rank(ts_mean(ts_corr(rank(volume), rank(vwap), 6), 2))) 
            ? -1
            : 1)"""
    cond = rank(ts_mean(ts_corr(rank(v),
                                rank(vwap), 6), 2))
    alpha = cond.notnull().astype(float)
    return (alpha.where(cond <= 0.5, -alpha)
            .stack('ticker')
            .swaplevel())

In [204]:
alpha = 27

In [210]:
# %%time
# alphas[f'{alpha:03}'] = alpha027(v, vwap)

In [212]:
# alphas[f'{alpha:03}'].to_hdf('alphas.h5', f'alphas/{alpha:03}')

In [214]:
# sns.distplot(alphas[f'{alpha:03}']);

In [216]:
# g = sns.jointplot(x=f'{alpha:03}', y='ret_fwd', data=alphas);

In [218]:
# mi[alpha] = get_mutual_info_score(alphas.ret_fwd, alphas[f'{alpha:03}'])

In [220]:
# mi[alpha]

### Alpha 028

In [223]:
def alpha028(h, l, c, v, adv20):
    """scale(((ts_corr(adv20, low, 5) + (high + low) / 2) - close))"""
    return (scale(ts_corr(adv20, l, 5)
                  .replace([-np.inf, np.inf], 0)
                  .add(h.add(l).div(2).sub(c)))
            .stack('ticker')
            .swaplevel())

In [225]:
alpha = 28

In [227]:
# %%time
# alphas[f'{alpha:03}'] = alpha028(h, l, c, v, adv20)

In [229]:
# alphas[f'{alpha:03}'].to_hdf('alphas.h5', f'alphas/{alpha:03}')

In [240]:
# sns.distplot(alphas[f'{alpha:03}']);

In [242]:
# g = sns.jointplot(x=f'{alpha:03}', y='ret_fwd', data=alphas);

In [244]:
# mi[alpha] = get_mutual_info_score(alphas.ret_fwd, alphas[f'{alpha:03}'])

In [246]:
# mi[alpha]

In [250]:
# pd.Series(mi).to_csv('mi.csv')

### Alpha 029

In [253]:
def alpha029(c, r):
    """(ts_min(ts_product(rank(rank(scale(log(ts_sum(ts_min(rank(rank((-1 * 
            rank(ts_delta((close - 1),5))))), 2), 1))))), 1), 5)
        + ts_rank(ts_lag((-1 * returns), 6), 5))
    """
    return (ts_min(rank(rank(scale(log(ts_sum(rank(rank(-rank(ts_delta((c - 1), 5)))), 2))))), 5)
            .add(ts_rank(ts_lag((-1 * r), 6), 5))
            .stack('ticker')
            .swaplevel())

In [273]:
alpha = 29

In [275]:
# %%time
# alphas[f'{alpha:03}'] = alpha029(c, r)

In [277]:
# alphas[f'{alpha:03}'].to_hdf('alphas.h5', f'alphas/{alpha:03}')

In [279]:
# sns.distplot(alphas[f'{alpha:03}']);

In [281]:
# g = sns.jointplot(x=f'{alpha:03}', y='ret_fwd', data=alphas);

In [283]:
# mi[alpha] = get_mutual_info_score(alphas.ret_fwd, alphas[f'{alpha:03}'])

In [285]:
# mi[alpha]

### Alpha 030

In [288]:
def alpha030(c, v):
    """(((1.0 - rank(((sign((close - ts_lag(close, 1))) +
            sign((ts_lag(close, 1) - ts_lag(close, 2)))) +
            sign((ts_lag(close, 2) - ts_lag(close, 3)))))) *
            ts_sum(volume, 5)) / ts_sum(volume, 20))"""
    close_diff = ts_delta(c, 1)
    return (rank(sign(close_diff)
                 .add(sign(ts_lag(close_diff, 1)))
                 .add(sign(ts_lag(close_diff, 2))))
            .mul(-1).add(1)
            .mul(ts_sum(v, 5))
            .div(ts_sum(v, 20))
            .stack('ticker')
            .swaplevel())

In [292]:
alpha = 30

In [294]:
# %%time
# alphas[f'{alpha:03}'] = alpha030(c, v)

In [296]:
# alphas[f'{alpha:03}'].to_hdf('alphas.h5', f'alphas/{alpha:03}')

In [298]:
# sns.distplot(alphas[f'{alpha:03}']);

In [300]:
# g = sns.jointplot(x=f'{alpha:03}', y='ret_fwd', data=alphas);

In [302]:
# mi[alpha] = get_mutual_info_score(alphas.ret_fwd, alphas[f'{alpha:03}'])

In [304]:
# mi[alpha]

### Alpha 031

In [82]:
def alpha031(l, c, adv20):
    """((rank(rank(rank(ts_weighted_mean((-1 * rank(rank(ts_delta(close, 10)))), 10)))) +
        rank((-1 * ts_delta(close, 3)))) + sign(scale(ts_corr(adv20, low, 12))))
    """
    return (rank(rank(rank(ts_weighted_mean(rank(rank(ts_delta(c, 10))).mul(-1), 10))))
            .add(rank(ts_delta(c, 3).mul(-1)))
            .add(sign(scale(ts_corr(adv20, l, 12)
                            .replace([-np.inf, np.inf],
                                     np.nan))))
            .stack('ticker')
            .swaplevel())

In [84]:
alpha = 31

In [None]:
%%time
alphas[f'{alpha:03}'] = alpha031(l, c, adv20)

In [90]:
# alphas[f'{alpha:03}'].to_hdf('alphas.h5', f'alphas/{alpha:03}')

In [92]:
# sns.distplot(alphas[f'{alpha:03}']);

In [94]:
# g = sns.jointplot(x=f'{alpha:03}', y='ret_fwd', data=alphas);

### Alpha 032

In [97]:
def alpha032(c, vwap):
    """scale(ts_mean(close, 7) - close) + 
        (20 * scale(ts_corr(vwap, ts_lag(close, 5),230)))"""
    return (scale(ts_mean(c, 7).sub(c))
            .add(20 * scale(ts_corr(vwap,
                                    ts_lag(c, 5), 230)))
            .stack('ticker')
            .swaplevel())

In [99]:
alpha = 32

In [101]:
# %%time
# alphas[f'{alpha:03}'] = alpha032(c, vwap)

In [103]:
# alphas[f'{alpha:03}'].to_hdf('alphas.h5', f'alphas/{alpha:03}')

In [105]:
# sns.distplot(alphas[f'{alpha:03}']);

In [107]:
# g = sns.jointplot(x=f'{alpha:03}', y='ret_fwd', data=alphas);

In [109]:
# mi[alpha] = get_mutual_info_score(alphas.ret_fwd, 
#                                   alphas[f'{alpha:03}'])

In [111]:
# mi[alpha]

### Alpha 033

In [114]:
def alpha033(o, c):
    """rank(-(1 - (open / close)))"""
    return (rank(o.div(c).mul(-1).add(1).mul(-1))
            .stack('ticker')
            .swaplevel())

In [116]:
alpha = 33

In [118]:
# %%time
# alphas[f'{alpha:03}'] = alpha033(o, c)

In [120]:
# alphas[f'{alpha:03}'].to_hdf('alphas.h5', f'alphas/{alpha:03}')

In [122]:
# sns.distplot(alphas[f'{alpha:03}']);

In [124]:
# g = sns.jointplot(x=f'{alpha:03}', y='ret_fwd', data=alphas);

In [126]:
# mi[alpha] = get_mutual_info_score(alphas.ret_fwd, alphas[f'{alpha:03}'])

In [128]:
# mi[alpha]

### Alpha 034

In [131]:
def alpha034(c, r):
    """rank(((1 - rank((ts_std(returns, 2) / ts_std(returns, 5)))) + (1 - rank(ts_delta(close, 1)))))"""

    return (rank(rank(ts_std(r, 2).div(ts_std(r, 5))
                      .replace([-np.inf, np.inf],
                               np.nan))
                 .mul(-1)
                 .sub(rank(ts_delta(c, 1)))
                 .add(2))
            .stack('ticker')
            .swaplevel())

In [133]:
# alpha = 34

In [135]:
# %%time
# alphas[f'{alpha:03}'] = alpha034(c, r)

In [137]:
# alphas[f'{alpha:03}'].to_hdf('alphas.h5', f'alphas/{alpha:03}')

In [139]:
# sns.distplot(alphas[f'{alpha:03}']);

In [141]:
# g = sns.jointplot(x=f'{alpha:03}', y='ret_fwd', data=alphas);

In [143]:
# mi[alpha] = get_mutual_info_score(alphas.ret_fwd, alphas[f'{alpha:03}'])

In [145]:
# mi[alpha]

In [149]:
# pd.Series(mi).to_csv('mi.csv')

### Alpha 035

In [167]:
def alpha035(h, l, c, v, r):
    """((ts_Rank(volume, 32) *
        (1 - ts_Rank(((close + high) - low), 16))) *
        (1 -ts_Rank(returns, 32)))
    """
    return (ts_rank(v, 32)
            .mul(1 - ts_rank(c.add(h).sub(l), 16))
            .mul(1 - ts_rank(r, 32))
            .stack('ticker')
            .swaplevel())

In [169]:
alpha = 35

In [171]:
# %%time
# alphas[f'{alpha:03}'] = alpha035(h, l, c, v, r)

In [173]:
# alphas[f'{alpha:03}'].to_hdf('alphas.h5', f'alphas/{alpha:03}')

In [175]:
# sns.distplot(alphas[f'{alpha:03}']);

In [177]:
# g = sns.jointplot(x=f'{alpha:03}', y='ret_fwd', data=alphas);

In [179]:
# mi[alpha] = get_mutual_info_score(alphas.ret_fwd, alphas[f'{alpha:03}'])

In [181]:
# mi[alpha]

### Alpha 036