# Separating Signal & Noise â€“ How to Use `Alphalens`

### Loading Libraries

In [2]:
# Numerical Computing
import numpy as np

# Data Manipulation
import pandas as pd

# Data Visualization
import seaborn as sns
import matplotlib.pyplot as plt

# Alphalens
from alphalens.tears import *
from alphalens.plotting import *
from alphalens.performance import *
from alphalens.utils import get_clean_factor_and_forward_returns

# Warnings
import warnings

# Regular Expressions
import re

In [3]:
sns.set_style('whitegrid')

warnings.filterwarnings('ignore')

## Creating Forward Returns & Factor Quantiles

In [5]:
performance = pd.read_pickle('single_factor.pickle')

In [6]:
performance.info()

In [7]:
prices = pd.concat([df.to_frame(d) for d, df in performance.prices.dropna().items()],axis=1).T
prices.columns = [re.findall(r"\[(.+)\]", str(col))[0] for col in prices.columns]
prices.index = prices.index.normalize()
prices.info()

In [8]:
factor_data = pd.concat([df.to_frame(d) for d, df in performance.factor_data.dropna().items()],axis=1).T
factor_data.columns = [re.findall(r"\[(.+)\]", str(col))[0] for col in factor_data.columns]
factor_data.index = factor_data.index.normalize()
factor_data = factor_data.stack()
factor_data.index.names = ['date', 'asset']
factor_data.head()

In [9]:
with pd.HDFStore('../data/assets.h5') as store:
    sp500 = store['sp500/stooq'].close

sp500 = sp500.resample('D').ffill().tz_localize('utc').filter(prices.index.get_level_values(0))
sp500.head()

In [10]:
HOLDING_PERIODS = (5, 10, 21, 42)

QUANTILES = 5

alphalens_data = get_clean_factor_and_forward_returns(factor=factor_data,
                                                      prices=prices,
                                                      periods=HOLDING_PERIODS,
                                                      quantiles=QUANTILES)

In [11]:
alphalens_data.head()

In [12]:
alphalens_data.reset_index().head().to_csv('factor_data.csv', index=False)

## Summary Tear Sheet

In [13]:
create_summary_tear_sheet(alphalens_data)

### Predictive Performance by Factor Quantiles - Returns Analysis

In [14]:
mean_return_by_q, std_err = mean_return_by_quantile(alphalens_data)

mean_return_by_q_norm = mean_return_by_q.apply(lambda x: x.add(1).pow(1/int(x.name[:-1])).sub(1))

#### Mean Return by Holding Period & Quintile

In [15]:
plot_quantile_returns_bar(mean_return_by_q)

plt.tight_layout()
sns.despine();

In [16]:
mean_return_by_q_daily, std_err = mean_return_by_quantile(alphalens_data, by_date=True)

#### Cumulative 5D Return

In [17]:
plot_cumulative_returns_by_quantile(mean_return_by_q_daily['5D'], period='5D', freq=None)
plt.tight_layout()
sns.despine();

#### Return Distribution by Holding Period & Quintile

In [20]:
plot_quantile_returns_violin(mean_return_by_q_daily)
plt.tight_layout()
sns.despine();

### Information Coefficient

#### 5D Information Coefficient (Rolling Average)

In [21]:
ic = factor_information_coefficient(alphalens_data)
plot_ic_ts(ic[['5D']])

plt.tight_layout()
sns.despine();

#### Information Coefficient by Holding Period

In [22]:
ic = factor_information_coefficient(alphalens_data)
ic_by_year = ic.resample('A').mean()
ic_by_year.index = ic_by_year.index.year
ic_by_year.plot.bar(figsize=(14, 6))
plt.tight_layout();

### Turnover Tear Sheet

In [23]:
create_turnover_tear_sheet(alphalens_data);