# Short-term reversal alpha factor

## Introduction

The concept of market efficiency is frequently being debated by academics and those in the finance industry. On one hand, many believe in the Efficient Market Hypothesis. The EMH, suggests that stock prices fully reflect all available information about a firm’s value making it impossible for someone to gain excess profits. This opinion is supported by research conducted by Eugene F. Fama in the 1960s (1). Under this hypothesis, short term deviations in prices are expected to be random and unpredictable (2). 

On the other hand, an extensive range of academics have provided evidence that anomalies exist in financial markets which somewhat explain deviation in stock prices (3). Stylized facts are a term used to refer to “empirical findings that are so consistent across markets that they are accepted as truth” (4). ‘Dependence’ is a stylized fact category which looks at autocorrelation (both positive and negative) in stock returns. Short-term reversal is one of the most documented financial anomalies in asset pricing literature which capitalizes on predicting negative autocorrelation in stock returns. It is the theory that last week’s ‘winning’ shares will see abnormal negative returns for the following week, while last week’s ‘losing’ shares will see abnormal positive returns. 

This Research Notebook aims to provide an understanding of the short-term reversal phenomenon and will analyse whether it exists in international equity markets. 


#### Importing packages

In [None]:
from quantopian.pipeline import Pipeline, CustomFactor
from quantopian.research import run_pipeline

from quantopian.pipeline.data import USEquityPricing, EquityPricing, factset
from quantopian.pipeline.filters import QTradableStocksUS
from quantopian.pipeline.classifiers.fundamentals import Sector

from alphalens.utils import get_clean_factor_and_forward_returns
from alphalens.tears import create_full_tear_sheet, create_information_tear_sheet, create_returns_tear_sheet
from alphalens.performance import mean_information_coefficient

import pandas as pd
import numpy as np

#### Creating a factor

In [None]:
class FiveDayReversalUS(CustomFactor):
    
    inputs = [USEquityPricing.close]
    window_length = 5 
# the current price minus the price 5 days ago, divided by the price 5 days ago 
    def compute(self,today,assets,out,close):
        out[:] = -(close[self.window_length-1] - close[0])/close[0]
        
reversal_factor5 = FiveDayReversalUS()

class TenDayReversalUS(CustomFactor):
    
    inputs = [USEquityPricing.close]
    window_length = 10 
 
    def compute(self,today,assets,out,close):
        out[:] = -(close[self.window_length-1] - close[0])/close[0]
        
reversal_factor10 = TenDayReversalUS()

class MonthlyReversalUS(CustomFactor):
    
    inputs = [USEquityPricing.close]
    window_length = 21 
 
    def compute(self,today,assets,out,close):
        out[:] = -(close[self.window_length-1] - close[0])/close[0]
        
reversal_factor21 = MonthlyReversalUS()

combined_factor = reversal_factor5 + reversal_factor10 + reversal_factor21

In [None]:
def make_pipeline():
    return Pipeline(
        
        columns = {
            'weekly_reversal': reversal_factor5,
            'fortnitely_reversal': reversal_factor10,
            'monthly_reversal': reversal_factor21,
            'combined_factor': combined_factor,
        },
        
        screen = (
            QTradableStocksUS() 
            & reversal_factor5.notnull() 
            & reversal_factor10.notnull()
            & reversal_factor21.notnull()
            & combined_factor.notnull()
        )
    )

factor_data = run_pipeline(pipeline = make_pipeline(),start_date='2010-01-01', end_date='2016-01-01')

factor_data.head()

In [None]:
pricing_data = get_pricing(symbols=factor_data.index.levels[1],
                          start_date='2010-01-01', end_date='2016-03-01', fields='open_price')

In [None]:
# Defines the time periods over which we will calculate the IC mean figures.
longest_look_forward_period = 40 # Common time periods: week = 5, month = 21, quarter = 63, year = 252
range_step = 5 # A larger number here makes things run faster, but makes the resulting chart less precise

# We will populate this empty dataframe with IC Mean data
factor_ic_decay = pd.DataFrame()

for factor_name in factor_data.columns:
    
    # Excludes the 'sector' column in pipeline_output
    if factor_name != 'sector':
        
        # Gets the IC decay for each factor
        asset_factor_data = get_clean_factor_and_forward_returns(
            factor = factor_data[factor_name],
            prices = pricing_data,
            periods = range(1, longest_look_forward_period, range_step)
        )
        factor_ic_decay[factor_name] = mean_information_coefficient(asset_factor_data)

# Plots the dataframe that has been populated with IC decay information
factor_ic_decay.plot()

In [None]:
def make_pipeline():
    return Pipeline(
        
        columns = {
            'weekly_reversal': reversal_factor5,
        },
        
        screen = (QTradableStocksUS() & reversal_factor5.notnull() 
        )
    )

factor1_data = run_pipeline(pipeline = make_pipeline(),start_date='2010-01-01', end_date='2016-01-01')

factor1_data.head()

In [None]:
merged_data = get_clean_factor_and_forward_returns(
    factor = factor1_data,
    prices = pricing_data,
    quantiles=10,
    periods= [5]

    )

merged_data.head()

In [None]:
create_full_tear_sheet(merged_data)

In [None]:
 class Vol_3M(CustomFactor):

        inputs = [USEquityPricing.close]
        window_length = 63

        def compute(self, today, assets, out, close):

            vols = []
            for col in close.T:
                # compute returns
                log_col_returns = np.log(col / np.roll(col, 1))[1:]
                vols.append(np.nanstd(log_col_returns))
            out[:] = vols
            
volatility_min = Vol_3M()

volatility_filter = (volatility_min.percentile_between(50,100,))

In [None]:
class MomentumPeriodReturnsUS(CustomFactor):
    
    inputs = [USEquityPricing.close]
    window_length = 250 
 
    def compute(self,today,assets,out,close):
        out[:] = (close[self.window_length-1] - close[0])/close[0]
        
momentum_max = MomentumPeriodReturnsUS()

momentum_filter = (momentum_max.percentile_between(15,90,)) 

In [None]:
class MaxVolumeUS(CustomFactor):
    inputs=[USEquityPricing.volume]
    window_length=5
    mask = QTradableStocksUS()
    def compute(self, today, asset_ids, out, values):
        out[:] = np.min(values, axis=0)
        
# Create a volume filter that filters for stocks in the bottom 50% of our MinVolume factor.
volume_max = MaxVolumeUS()
volume_filter = (volume_max.percentile_between(0, 50, mask=(volume_max > 0)))    

In [None]:
def make_pipeline():
    return Pipeline(
        
        columns = {
            'weekly_reversal': reversal_factor5,
        },
        
        screen = (
            QTradableStocksUS() 
            & reversal_factor5.notnull()
            & momentum_filter
            & volume_filter
        )
    )

factor2_data = run_pipeline(pipeline = make_pipeline(),start_date='2010-01-01', end_date='2016-01-01')

factor2_data.head()

In [None]:
merged_data = get_clean_factor_and_forward_returns(
    factor = factor2_data,
    prices = pricing_data,
    quantiles=10,
    periods= [5]

    )

merged_data.head()

In [None]:
create_full_tear_sheet(merged_data)

In [None]:
from quantopian.pipeline.domain import (
    AT_EQUITIES, # Austria
    AU_EQUITIES, # Australia
    BE_EQUITIES, # Belgium
    BR_EQUITIES, # Brazil
    CA_EQUITIES, # Canada
    CH_EQUITIES, # Switzerland
    CN_EQUITIES, # China
    DE_EQUITIES, # Germany
    DK_EQUITIES, # Denmark
    ES_EQUITIES, # Spain
    FI_EQUITIES, # Finland
    FR_EQUITIES, # France
    GB_EQUITIES, # Great Britain
    HK_EQUITIES, # Hong Kong
    IE_EQUITIES, # Ireland
    IN_EQUITIES, # India
    IT_EQUITIES, # Italy
    JP_EQUITIES, # Japan
    KR_EQUITIES, # South Korea
    NL_EQUITIES, # Netherlands
    NO_EQUITIES, # Norway
    NZ_EQUITIES, # New Zealand
    PT_EQUITIES, # Portugal
    SE_EQUITIES, # Sweden
    SG_EQUITIES, # Singapore
    US_EQUITIES, # United States
)

import time

In [None]:
def evaluate_factor(factor, 
                    domain, 
                    start_date, 
                    end_date,
                    factor_screen=None,
                    quantiles=5,
                    returns_lengths=(1, 5, 10)):
    """Analyze a Pipeline Factor using Alphalens.
    
    Parameters
    ----------
    factor : quantopian.pipeline.factors.Factor
        Factor producing scores to be evaluated.
    domain : quantopian.pipeline.domain.Domain
        Domain on which the factor should be evaluated.
    start_date : str or pd.Timestamp
        Start date for evaluation period.
    end_date : str or pd.Timestamp
        End date for evaluation period.
    standardize : 
    factor_screen : quantopian.pipeline.filters.Filter, optional
        Filter defining which assets ``factor`` should be evaluated on.
        Default is ``factor.notnull()``.
    quantiles : int, optional
        Number of buckets to use for quantile groups. Default is 5
    returns_lengths : sequence[int]
        Forward-returns horizons to use when evaluating ``factor``. 
        Default is 1-day, 5-day, and 10-day returns.
        
    Returns
    -------
    factor_data : pd.DataFrame
        A (date, asset)-indexed DataFrame with the following columns:
            'factor': float64
                Values produced by ``factor``.
            'factor_quantiles': int64
                Daily quantile label for each
    """
    calendar = domain.calendar
    # Roll input dates to the next trading session.
    start_date = calendar.minute_to_session_label(pd.Timestamp(start_date, tz='UTC'))
    end_date = calendar.minute_to_session_label(pd.Timestamp(end_date, tz='UTC'))
    
    if factor_screen is None:
        factor_screen = factor.notnull()
        
    # Run pipeline to get factor values and quantiles.
    factor_pipe = Pipeline(
        {'factor': factor, 
         'factor_quantile': factor.quantiles(quantiles, mask=factor_screen)},
        screen=factor_screen,
        domain=domain,
    )
    factor_results = run_pipeline(factor_pipe, start_date, end_date, chunksize=250)
    
    column_order = []
    returns_cols = {}
    for length in returns_lengths:
        colname = '{}D'.format(length)
        column_order.append(colname)
        # Add 1 because "1-day" returns needs 2 price observations.
        returns_cols[colname] = Returns(window_length=length + 1)
    returns_pipe = Pipeline(returns_cols, domain=domain)
    
    # Compute returns for the period after the factor pipeline, then 
    # shift the results back to align with our factor values.
    returns_start_date = start_date
    returns_end_date = end_date + domain.calendar.day * max(returns_lengths)
    raw_returns = run_pipeline(returns_pipe, returns_start_date, returns_end_date, chunksize=500)
    
    shifted_returns = {}
    for name, length in zip(column_order, returns_lengths):
        # Shift 1-day returns back by a day, 5-day returns back by 5 days, etc.
        raw = raw_returns[name]
        shifted_returns[name] = backshift_returns_series(raw, length)
        
    # Merge backshifted returns into a single frame indexed like our desired output.
    merged_returns = pd.DataFrame(
        data=shifted_returns, 
        index=factor_results.index, 
        columns=column_order,
    )
    
    # Concat factor results and forward returns column-wise.
    merged = pd.concat([factor_results, merged_returns], axis=1)
    merged.index.set_names(['date', 'asset'], inplace=True)
    
    # Drop NaNs
    merged = merged.dropna(how='any')
    
    # Add a Business Day Offset to the DateTimeIndex
    merged.index.levels[0].freq = pd.tseries.offsets.BDay()
    
    return merged

def backshift_returns_series(series, N):
    """Shift a multi-indexed series backwards by N observations in the first level.
    
    This can be used to convert backward-looking returns into a forward-returns series.
    """
    ix = series.index
    dates, sids = ix.levels
    date_labels, sid_labels = map(np.array, ix.labels)
    # Output date labels will contain the all but the last N dates.
    new_dates = dates[:-N]
    # Output data will remove the first M rows, where M is the index of the
    # last record with one of the first N dates.
    cutoff = date_labels.searchsorted(N)
    new_date_labels = date_labels[cutoff:] - N
    new_sid_labels = sid_labels[cutoff:]
    new_values = series.values[cutoff:]
    assert new_date_labels[0] == 0
    new_index = pd.MultiIndex(
        levels=[new_dates, sids],
        labels=[new_date_labels, new_sid_labels],
        sortorder=1,
        names=ix.names,
    )
    return pd.Series(data=new_values, index=new_index)

def backshift_returns_series(series, N):
    """Shift a multi-indexed series backwards by N observations in the first level.
    
    This can be used to convert backward-looking returns into a forward-returns series.
    """
    ix = series.index
    dates, sids = ix.levels
    date_labels, sid_labels = map(np.array, ix.labels)

    # Output date labels will contain the all but the last N dates.
    new_dates = dates[:-N]

    # Output data will remove the first M rows, where M is the index of the
    # last record with one of the first N dates.
    cutoff = date_labels.searchsorted(N)
    new_date_labels = date_labels[cutoff:] - N
    new_sid_labels = sid_labels[cutoff:]
    new_values = series.values[cutoff:]

    assert new_date_labels[0] == 0

    new_index = pd.MultiIndex(
        levels=[new_dates, sids],
        labels=[new_date_labels, new_sid_labels],
        sortorder=1,
        names=ix.names,
    )

    return pd.Series(data=new_values, index=new_index)

In [None]:
class Daily5Reversal(CustomFactor):
    
    inputs = [EquityPricing.close]
    window_length = 5
 
    def compute(self,today,assets,out,close):
        out[:] = -(close[self.window_length-1] - close[0])/close[0]
        
int_reversal_factor5 = Daily5Reversal()

int_reversal_z = int_reversal_factor5.zscore()


In [None]:
class MomentumPeriodReturns(CustomFactor):
    
    inputs = [EquityPricing.close]
    window_length = 250 
 
    def compute(self,today,assets,out,close):
        out[:] = (close[self.window_length-1] - close[0])/close[0]
        
int_momentum_max = MomentumPeriodReturns()

int_momentum_filter = (int_momentum_max.percentile_between(15,90,)) 

In [None]:
class MaxVolume(CustomFactor):
    inputs=[EquityPricing.volume]
    window_length=5

    def compute(self, today, asset_ids, out, values):
        out[:] = np.min(values, axis=0)
        
# Create a volume filter that filters for stocks in the bottom 50% of our MinVolume factor.
int_volume_max = MaxVolume()
int_volume_filter = (int_volume_max.percentile_between(50, 75,))

In [None]:
al_data = evaluate_factor(
    int_reversal_factor5, 
   Ch_EQUITIES, 
    '2010-01-01', 
    '2016-01-01',
    quantiles=10, 
    returns_lengths=[5],
    factor_screen=  int_volume_filter & int_momentum_filter & int_reversal_factor5.notnull() )

In [None]:
create_full_tear_sheet(al_data)