In [1]:
import pandas as pd
import numpy as np
import scipy as sp

In [2]:
from quantopian.pipeline import Pipeline
from quantopian.pipeline.data.builtin import USEquityPricing
from quantopian.research import run_pipeline
from quantopian.pipeline.filters.morningstar import Q500US, Q1500US
from quantopian.pipeline.factors import SimpleMovingAverage
from quantopian.pipeline import CustomFactor
from quantopian.pipeline.classifiers.morningstar import Sector


SECTOR_CODE_NAMES = {
    Sector.BASIC_MATERIALS: 'Basic Materials',
    Sector.CONSUMER_CYCLICAL: 'Consumer Cyclical',
    Sector.FINANCIAL_SERVICES: 'Financial Services',
    Sector.REAL_ESTATE: 'Real Estate',
    Sector.CONSUMER_DEFENSIVE: 'Consumer Defensive',
    Sector.HEALTHCARE: 'Healthcare',
    Sector.UTILITIES: 'Utilities',
    Sector.COMMUNICATION_SERVICES: 'Communication Services',
    Sector.ENERGY: 'Energy',
    Sector.INDUSTRIALS: 'Industrials',
    Sector.TECHNOLOGY: 'Technology',
}


class OneDayPDelta(CustomFactor):
    inputs = [USEquityPricing.close]
    window_length = 2
    
    def compute(self, today, assets, out, close):
        out[:] = close[1] - close[0]

        
class LSimpleMovingAverage(SimpleMovingAverage):
    """
    Average Value of an arbitrary column

    **Default Inputs**: None

    **Default Window Length**: None
    """
    # numpy's nan functions throw warnings when passed an array containing only
    # nans, but they still returns the desired value (nan), so we ignore the
    # warning.

    def compute(self, today, assets, out, data):
        out[:] = np.nanmean(data[0:self.window_length - 1], axis=0)

        
class LastClose(CustomFactor):
    windown_length = 2
    def compute(self, today, assets, out, data):
        out[:] = data[0]

        
def get_data(start, end):
    """
    Get the USEquityPricing data masked by the Q1500US universe.
    Args:
        start: start date interested in.
        end: end date interested in.
    """
    pipe = Pipeline(
        columns={'close': USEquityPricing.close.latest,
                 'last_close': LastClose(inputs=[USEquityPricing.close], window_length=2),
                 'lsma10': LSimpleMovingAverage(inputs=[USEquityPricing.close], window_length=11),
                 'lsma15': LSimpleMovingAverage(inputs=[USEquityPricing.close], window_length=16),
                 'lsma20': LSimpleMovingAverage(inputs=[USEquityPricing.close], window_length=21),
                 'lsma25': LSimpleMovingAverage(inputs=[USEquityPricing.close], window_length=26),
                 'lsma30': LSimpleMovingAverage(inputs=[USEquityPricing.close], window_length=31),
                 'sma30': SimpleMovingAverage(inputs=[USEquityPricing.close], window_length=30),
                 'pdelta1': OneDayPDelta(inputs=[USEquityPricing.close], window_length=2),
                 'sector': Sector()},
        screen=Q1500US()
    )

    research_range = (start, end)
    data = run_pipeline(pipe, *research_range)
    
    return data

data = get_data('2008-01-01', '2008-01-01').head()
data

Unnamed: 0,Unnamed: 1,close,last_close,lsma10,lsma15,lsma20,lsma25,lsma30,pdelta1,sector,sma30
2008-01-02 00:00:00+00:00,Equity(2 [ARNC]),36.54,36.81,35.7972,36.135467,36.0291,35.93928,35.891067,-0.27,101,35.897067
2008-01-02 00:00:00+00:00,Equity(24 [AAPL]),198.08,199.78,191.7642,191.827467,189.6886,187.0834,183.624867,-1.7,311,184.752667
2008-01-02 00:00:00+00:00,Equity(62 [ABT]),56.02,57.16,57.555,57.823333,57.7835,57.3884,56.980333,-1.14,206,57.032
2008-01-02 00:00:00+00:00,Equity(67 [ADSK]),49.75,50.19,50.113,49.864667,49.352,48.692,48.298667,-0.44,311,48.37
2008-01-02 00:00:00+00:00,Equity(76 [TAP]),51.64,52.12,51.8885,52.458333,52.97625,52.866496,52.644375,-0.48,205,52.62605


In [3]:
data1 = get_data(start = '2014-01-01', end = '2017-01-01')

data1.head()

Unnamed: 0,Unnamed: 1,close,last_close,lsma10,lsma15,lsma20,lsma25,lsma30,pdelta1,sector,sma30
2014-01-02 00:00:00+00:00,Equity(2 [ARNC]),10.63,10.53,10.1175,9.894333,9.7717,9.72756,9.6153,0.1,101,9.668967
2014-01-02 00:00:00+00:00,Equity(24 [AAPL]),561.16,554.5,557.266,558.746667,559.5885,554.8308,548.996333,6.66,311,550.203667
2014-01-02 00:00:00+00:00,Equity(62 [ABT]),38.34,38.4,37.884,37.496,37.5005,37.6396,37.7315,-0.06,206,37.740833
2014-01-02 00:00:00+00:00,Equity(67 [ADSK]),50.32,49.58,48.784,48.028,47.5135,47.1,46.436,0.74,311,46.643
2014-01-02 00:00:00+00:00,Equity(76 [TAP]),56.15,55.76,54.974,54.691333,54.50925,54.221354,54.074253,0.39,205,54.15272


In [4]:

data1['pm_delta30'] = data1.last_close - data1.lsma30
data1['pm_delta25'] = data1.last_close - data1.lsma25
data1['pm_delta20'] = data1.last_close - data1.lsma20
data1['pm_delta15'] = data1.last_close - data1.lsma15
data1['pm_delta10'] = data1.last_close - data1.lsma10

data1.head()

Unnamed: 0,Unnamed: 1,close,last_close,lsma10,lsma15,lsma20,lsma25,lsma30,pdelta1,sector,sma30,pm_delta30,pm_delta25,pm_delta20,pm_delta15,pm_delta10
2014-01-02 00:00:00+00:00,Equity(2 [ARNC]),10.63,10.53,10.1175,9.894333,9.7717,9.72756,9.6153,0.1,101,9.668967,0.9147,0.80244,0.7583,0.635667,0.4125
2014-01-02 00:00:00+00:00,Equity(24 [AAPL]),561.16,554.5,557.266,558.746667,559.5885,554.8308,548.996333,6.66,311,550.203667,5.503667,-0.3308,-5.0885,-4.246667,-2.766
2014-01-02 00:00:00+00:00,Equity(62 [ABT]),38.34,38.4,37.884,37.496,37.5005,37.6396,37.7315,-0.06,206,37.740833,0.6685,0.7604,0.8995,0.904,0.516
2014-01-02 00:00:00+00:00,Equity(67 [ADSK]),50.32,49.58,48.784,48.028,47.5135,47.1,46.436,0.74,311,46.643,3.144,2.48,2.0665,1.552,0.796
2014-01-02 00:00:00+00:00,Equity(76 [TAP]),56.15,55.76,54.974,54.691333,54.50925,54.221354,54.074253,0.39,205,54.15272,1.685747,1.538646,1.25075,1.068667,0.786


In [5]:

data1 = data1.reset_index().rename(columns = {'level_0': 'date', 'level_1': 'asset'}).set_index('date')
data1.head()

Unnamed: 0_level_0,asset,close,last_close,lsma10,lsma15,lsma20,lsma25,lsma30,pdelta1,sector,sma30,pm_delta30,pm_delta25,pm_delta20,pm_delta15,pm_delta10
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
2014-01-02 00:00:00+00:00,Equity(2 [ARNC]),10.63,10.53,10.1175,9.894333,9.7717,9.72756,9.6153,0.1,101,9.668967,0.9147,0.80244,0.7583,0.635667,0.4125
2014-01-02 00:00:00+00:00,Equity(24 [AAPL]),561.16,554.5,557.266,558.746667,559.5885,554.8308,548.996333,6.66,311,550.203667,5.503667,-0.3308,-5.0885,-4.246667,-2.766
2014-01-02 00:00:00+00:00,Equity(62 [ABT]),38.34,38.4,37.884,37.496,37.5005,37.6396,37.7315,-0.06,206,37.740833,0.6685,0.7604,0.8995,0.904,0.516
2014-01-02 00:00:00+00:00,Equity(67 [ADSK]),50.32,49.58,48.784,48.028,47.5135,47.1,46.436,0.74,311,46.643,3.144,2.48,2.0665,1.552,0.796
2014-01-02 00:00:00+00:00,Equity(76 [TAP]),56.15,55.76,54.974,54.691333,54.50925,54.221354,54.074253,0.39,205,54.15272,1.685747,1.538646,1.25075,1.068667,0.786


In [29]:
data1.groupby(['asset'])['close'].count()

asset
Equity(2 [ARNC])           714
Equity(24 [AAPL])          757
Equity(41 [ARCB])          316
Equity(52 [ABM])             1
Equity(53 [ABMD])          466
Equity(62 [ABT])           757
Equity(67 [ADSK])          757
Equity(76 [TAP])           757
Equity(88 [ACI])           185
Equity(110 [ACXM])         313
Equity(114 [ADBE])         757
Equity(122 [ADI])          757
Equity(128 [ADM])          757
Equity(161 [AEP])          757
Equity(166 [AES])          757
Equity(168 [AET])          757
Equity(185 [AFL])          757
Equity(197 [AGCO])         757
Equity(205 [AGN])          302
Equity(216 [HES])          757
Equity(239 [AIG])          757
Equity(266 [AJG])          757
Equity(270 [AKRX])         757
Equity(289 [MATX])          42
Equity(300 [ALK])          757
Equity(301 [ALKS])         757
Equity(328 [ALTR])         501
Equity(337 [AMAT])         757
Equity(338 [BEAM])          81
Equity(351 [AMD])          757
                          ... 
Equity(49051 [APLE])       275
Eq

In [91]:
# ? how to deal with the NaN in logical
# Calculate the indicator of whether pm_delta is changed sign from yesterday to today.
# Meanninig, current price bigger than PM, changde into current price smaller than PM, or the other way
# It's an indicater of when the price mean reverse(reached to the rolling meaning) happened


data1['last_pm_delta30'] = data1.groupby('asset').shift(1).pm_delta30
data1['c_l_pm_delta_sign_changed'] = data1['pm_delta30'] * data1['last_pm_delta30'] < 0

data1['cumsum'] = data1.groupby('asset')['c_l_pm_delta_sign_changed'].cumsum() 
data1['PM_delta_sign_unchange_days']= data1.groupby(['asset', 'cumsum']).cumcount()
data1.drop('cumsum', axis=1, inplace=True)


# Sample apple equity 
data1[data1.asset == symbols('AAPL')]



Unnamed: 0_level_0,asset,close,last_close,lsma10,lsma15,lsma20,lsma25,lsma30,pdelta1,sector,sma30,pm_delta30,pm_delta25,pm_delta20,pm_delta15,pm_delta10,last_pm_delta30,c_l_pm_delta_sign_changed,PM_delta_sign_unchange_days
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
2014-01-02 00:00:00+00:00,Equity(24 [AAPL]),561.160,554.500000,557.266000,558.746667,559.588500,554.830800,548.996333,6.660000,311,550.203667,5.503667,-0.330800,-5.088500,-4.246667,-2.766000,,False,0
2014-01-03 00:00:00+00:00,Equity(24 [AAPL]),553.230,561.160000,557.635000,558.384000,560.081500,556.489600,550.203667,-7.930000,311,551.356333,10.956333,4.670400,1.078500,2.776000,3.525000,5.503667,False,1
2014-01-06 00:00:00+00:00,Equity(24 [AAPL]),541.040,553.230000,557.460000,557.556667,559.424000,557.668800,551.356333,-12.190000,311,552.074333,1.873667,-4.438800,-6.194000,-4.326667,-4.230000,10.956333,False,2
2014-01-07 00:00:00+00:00,Equity(24 [AAPL]),543.949,541.040000,556.489000,556.216667,558.233000,557.966400,552.074333,2.909000,311,553.038967,-11.034333,-16.926400,-17.193000,-15.176667,-15.449000,1.873667,True,0
2014-01-08 00:00:00+00:00,Equity(24 [AAPL]),540.030,543.949000,556.438900,555.101933,557.025450,557.889160,553.038967,-3.919000,311,553.672633,-9.089967,-13.940160,-13.076450,-11.152933,-12.489900,-11.034333,False,1
2014-01-09 00:00:00+00:00,Equity(24 [AAPL]),543.490,540.030000,555.569900,554.137933,556.030450,557.247160,553.672633,3.460000,311,554.465967,-13.642633,-17.217160,-16.000450,-14.107933,-15.539900,-9.089967,False,2
2014-01-10 00:00:00+00:00,Equity(24 [AAPL]),536.500,543.490000,552.903900,553.205933,554.874950,556.934760,554.465967,-6.990000,311,554.890967,-10.975967,-13.444760,-11.384950,-9.715933,-9.413900,-13.642633,False,3
2014-01-13 00:00:00+00:00,Equity(24 [AAPL]),532.920,536.500000,549.781900,551.973933,553.417950,555.739560,554.890967,-3.580000,311,554.868300,-18.390967,-19.239560,-16.917950,-15.473933,-13.281900,-10.975967,False,4
2014-01-14 00:00:00+00:00,Equity(24 [AAPL]),535.610,532.920000,546.690900,550.785267,552.006950,554.461960,554.868300,2.690000,311,554.525967,-21.948300,-21.541960,-19.086950,-17.865267,-13.770900,-18.390967,False,5
2014-01-15 00:00:00+00:00,Equity(24 [AAPL]),546.390,535.610000,544.242900,550.195933,550.753950,553.162360,554.525967,10.780000,311,554.202967,-18.915967,-17.552360,-15.143950,-14.585933,-8.632900,-21.948300,False,6


In [92]:
data1['PM_delta_sign_unchange_days'].describe()

count    1.130016e+06
mean     1.405525e+01
std      1.486183e+01
min      0.000000e+00
25%      3.000000e+00
50%      9.000000e+00
75%      2.000000e+01
max      1.580000e+02
Name: PM_delta_sign_unchange_days, dtype: float64

In [8]:
import matplotlib.pyplot as plt
