# Testing The Signal Quality with Alphalens

### Loading Libraries

In [1]:
# Warnings
import warnings

# Numerical Computing
import numpy as np

# Data Manipulation
import pandas as pd

# Data visualization
import seaborn as sns
import matplotlib.pyplot as plt

# Technical Analysis
import talib

# SciPy
from scipy.stats import spearmanr

# Path
from pathlib import Path
from itertools import product

# OS & Time
import sys, os
from time import time
from io import StringIO

# Notebook Optimizer
from tqdm import tqdm

# StatsModel
import statsmodels.api as sm

# Scikit-Learn
from sklearn.linear_model import LinearRegression

# LightBoosting Gradient
import lightgbm as lgb

# AlphaLens
from alphalens.tears import (create_summary_tear_sheet,
                             create_full_tear_sheet)

from alphalens.utils import get_clean_factor_and_forward_returns

In [2]:
np.random.seed(42)

sns.set_style('white')

warnings.filterwarnings('ignore')

In [3]:
idx = pd.IndexSlice

In [4]:
results_path = Path('results', 'return_predictions')

if not results_path.exists():
    results_path.mkdir(parents=True)

### Evaluating The Cross-Validation Results

In [6]:
lookahead = 1

In [7]:
cv_store = Path(results_path / 'parameter_tuning.h5')

### Getting AlphaLens Input

In [8]:
DATA_DIR = Path('..', 'data')

In [9]:
def get_trade_prices(tickers):
    store = DATA_DIR / 'assets.h5'
    prices = pd.read_hdf(store, 'stooq/jp/tse/stocks/prices')
    return (prices.loc[idx[tickers, '2014': '2019'], 'open']
            .unstack('ticker')
            .sort_index()
            .shift(-1)
            .dropna()
            .tz_localize('UTC'))

In [11]:
best_predictions = pd.read_hdf(results_path / 'predictions.h5', f'test/{lookahead:02}')
best_predictions.info()

In [12]:
test_tickers = best_predictions.index.get_level_values('ticker').unique()

In [13]:
trade_prices = get_trade_prices(test_tickers)
trade_prices.info()

In [14]:
factor = (best_predictions
          .iloc[:, :3]
          .mean(1)
          .tz_localize('UTC', level='date')
          .swaplevel()
          .dropna()
          .reset_index()
          .drop_duplicates()
          .set_index(['date', 'ticker']))

In [15]:
factor_data = get_clean_factor_and_forward_returns(factor=factor,
                                                   prices=trade_prices,
                                                   quantiles=5,
                                                   periods=(1, 5, 10, 21))
factor_data.sort_index().info()

### Summary Tearsheet

In [16]:
create_summary_tear_sheet(factor_data)

### Evaluating The Out-of-Sample Predictions

#### Preparing Factor Data

In [17]:
t = 1

predictions = pd.read_hdf(results_path / 'predictions.h5',
                          f'test/{t:02}').drop('y_test', axis=1)

In [18]:
predictions.info()

In [19]:
factor = (predictions.iloc[:, :10]
                   .mean(1)
                   .sort_index().tz_localize('UTC', level='date').swaplevel().dropna())

factor.head()

### Next Available Trade Prices Selection

In [20]:
tickers = factor.index.get_level_values('ticker').unique()
trade_prices = get_trade_prices(tickers)

trade_prices.info()

### Getting AlphaLens Inputs

In [21]:
factor_data = get_clean_factor_and_forward_returns(factor=factor,
                                                   prices=trade_prices,
                                                   quantiles=5,
                                                   periods=(1, 5, 10, 21))

factor_data.sort_index().info()

### Summary Tearsheet

In [22]:
create_summary_tear_sheet(factor_data)