In [105]:
import datetime
import numpy as np
import pandas as pd
import scipy.stats as stats
import statsmodels.api as sm
import matplotlib.pyplot as plt
from odo import odo
from statsmodels import regression
from quantopian.pipeline import Pipeline
from quantopian.research import run_pipeline
from quantopian.pipeline import CustomFactor
from quantopian.pipeline.data import morningstar
from statsmodels.stats.stattools import jarque_bera
from quantopian.pipeline.data.builtin import USEquityPricing
from quantopian.pipeline.factors import SimpleMovingAverage, AverageDollarVolume

In [106]:
SP500_symbol = ['AAPL', 'ABT', 'ABBV', 'ACN', 'ACE', 'ADBE', 'ADT', 'AAP', 'AES', 'AET', 'AFL',
'AMG', 'A', 'GAS', 'ARE', 'APD', 'AKAM', 'AA', 'AGN', 'ALXN', 'ALLE', 'ADS', 'ALL', 'ALTR', 'MO',
'AMZN', 'AEE', 'AAL', 'AEP', 'AXP', 'AIG', 'AMT', 'AMP', 'ABC', 'AME', 'AMGN', 'APH', 'APC', 'ADI',
'AON', 'APA', 'AIV', 'AMAT', 'ADM', 'AIZ', 'T', 'ADSK', 'ADP', 'AN', 'AZO', 'AVGO', 'AVB', 'AVY', 'BHI',
'BLL', 'BAC', 'BK', 'BCR', 'BXLT', 'BAX', 'BBT', 'BDX', 'BBBY', 'BRK.B', 'BBY', 'BLX', 'HRB', 'BA', 'BWA',
'BXP', 'BSX', 'BMY', 'BRCM', 'BF.B', 'CHRW', 'CA', 'CVC', 'COG', 'CAM', 'CPB', 'COF', 'CAH', 'HSIC', 'KMX',
'CCL', 'CAT', 'CBG', 'CBS', 'CELG', 'CNP', 'CTL', 'CERN', 'CF', 'SCHW', 'CHK', 'CVX', 'CMG', 'CB', 'CI', 
'XEC', 'CINF', 'CTAS', 'CSCO', 'C', 'CTXS', 'CLX', 'CME', 'CMS', 'COH', 'KO', 'CCE', 'CTSH', 'CL', 'CMCSA',
'CMA', 'CSC', 'CAG', 'COP', 'CNX', 'ED', 'STZ', 'GLW', 'COST', 'CCI', 'CSX', 'CMI', 'CVS', 'DHI', 'DHR', 
'DRI', 'DVA', 'DE', 'DLPH', 'DAL', 'XRAY', 'DVN', 'DO', 'DTV', 'DFS', 'DISCA', 'DISCK', 'DG', 'DLTR', 'D',
'DOV', 'DOW', 'DPS', 'DTE', 'DD', 'DUK', 'DNB', 'ETFC', 'EMN', 'ETN', 'EBAY', 'ECL', 'EIX', 'EW', 'EA', 
'EMC', 'EMR', 'ENDP', 'ESV', 'ETR', 'EOG', 'EQT', 'EFX', 'EQIX', 'EQR', 'ESS', 'EL', 'ES', 'EXC', 'EXPE',
'EXPD', 'ESRX', 'XOM', 'FFIV', 'FB', 'FAST', 'FDX', 'FIS', 'FITB', 'FSLR', 'FE', 'FISV', 'FLIR', 'FLS', 
'FLR', 'FMC', 'FTI', 'F', 'FOSL', 'BEN', 'FCX', 'FTR', 'GME', 'GPS', 'GRMN', 'GD', 'GE', 'GGP', 'GIS', 
'GM', 'GPC', 'GNW', 'GILD', 'GS', 'GT', 'GOOG', 'GWW', 'HAL', 'HBI', 'HOG', 'HAR', 'HRS', 'HIG', 
'HAS', 'HCA', 'HCP', 'HCN', 'HP', 'HES', 'HPQ', 'HD', 'HON', 'HRL', 'HSP', 'HST', 'HCBK', 'HUM', 'HBAN', 
'ITW', 'IR', 'INTC', 'ICE', 'IBM', 'IP', 'IPG', 'IFF', 'INTU', 'ISRG', 'IVZ', 'IRM', 'JEC', 'JBHT', 'JNJ', 
'JCI', 'JOY', 'JPM', 'JNPR', 'KSU', 'K', 'KEY', 'GMCR', 'KMB', 'KIM', 'KMI', 'KLAC', 'KSS', 'KRFT', 'KR', 
'LB', 'LLL', 'LH', 'LRCX', 'LM', 'LEG', 'LEN', 'LVLT', 'LUK', 'LLY', 'LNC', 'LLTC', 'LMT', 'L', 'LOW', 
'LYB', 'MTB', 'MAC', 'M', 'MNK', 'MRO', 'MPC', 'MAR', 'MMC', 'MLM', 'MAS', 'MA', 'MAT', 'MKC', 'MCD', 
'MCK', 'MJN', 'MMV', 'MDT', 'MRK', 'MET', 'KORS', 'MCHP', 'MU', 'MSFT', 'MHK', 'TAP', 'MDLZ', 'MON', 
'MNST', 'MCO', 'MS', 'MOS', 'MSI', 'MUR', 'MYL', 'NDAQ', 'NOV', 'NAVI', 'NTAP', 'NFLX', 'NWL', 'NFX', 
'NEM', 'NWSA', 'NEE', 'NLSN', 'NKE', 'NI', 'NE', 'NBL', 'JWN', 'NSC', 'NTRS', 'NOC', 'NRG', 'NUE', 'NVDA', 
'ORLY', 'OXY', 'OMC', 'OKE', 'ORCL', 'OI', 'PCAR', 'PLL', 'PH', 'PDCO', 'PAYX', 'PNR', 'PBCT', 'POM', 
'PEP', 'PKI', 'PRGO', 'PFE', 'PCG', 'PM', 'PSX', 'PNW', 'PXD', 'PBI', 'PCL', 'PNC', 'RL', 'PPG', 'PPL', 
'PX', 'PCP', 'PCLN', 'PFG', 'PG', 'PGR', 'PLD', 'PRU', 'PEG', 'PSA', 'PHM', 'PVH', 'QRVO', 'PWR', 'QCOM', 
'DGX', 'RRC', 'RTN', 'O', 'RHT', 'REGN', 'RF', 'RSG', 'RAI', 'RHI', 'ROK', 'COL', 'ROP', 'ROST', 'RLD', 
'R', 'CRM', 'SNDK', 'SCG', 'SLB', 'SNI', 'STX', 'SEE', 'SRE', 'SHW', 'SPG', 'SWKS', 'SLG', 'SJM', 'SNA', 
'SO', 'LUV', 'SWN', 'SE', 'STJ', 'SWK', 'SPLS', 'SBUX', 'HOT', 'STT', 'SRCL', 'SYK', 'STI', 'SYMC', 'SYY', 
'TROW', 'TGT', 'TEL', 'TE', 'TGNA', 'THC', 'TDC', 'TSO', 'TXN', 'TXT', 'HSY', 'TRV', 'TMO', 'TIF', 'TWX', 
'TWC', 'TJX', 'TMK', 'TSS', 'TSCO', 'RIG', 'TRIP', 'FOXA', 'TSN', 'TYC', 'UA', 'UNP', 'UNH', 'UPS', 'URI', 
'UTX', 'UHS', 'UNM', 'URBN', 'VFC', 'VLO', 'VAR', 'VTR', 'VRSN', 'VZ', 'VRTX', 'VIAB', 'V', 'VNO', 'VMC', 
'WMT', 'WBA', 'DIS', 'WM', 'WAT', 'ANTM', 'WFC', 'WDC', 'WU', 'WY', 'WHR', 'WFM', 'WMB', 'WEC', 'WYN', 
'WYNN', 'XEL', 'XRX', 'XLNX', 'XL', 'XYL', 'YHOO', 'YUM', 'ZBH', 'ZION', 'ZTS']

In [107]:
beginning='2002-01-01'
ending='2017-01-01'
prices = get_pricing(SP500_symbol, start_date=beginning, end_date=ending, frequency='daily', fields='close_price')

In [108]:
# The stocks in the S&P 500 which we want to run our tests on
prices.head()

Unnamed: 0,Equity(24 [AAPL]),Equity(62 [ABT]),Equity(43694 [ABBV]),Equity(25555 [ACN]),Equity(8580 [CB]),Equity(114 [ADBE]),Equity(43399 [ADT]),Equity(23175 [AAP]),Equity(166 [AES]),Equity(168 [AET]),...,Equity(21964 [XEL]),Equity(8354 [XRX]),Equity(8344 [XLNX]),Equity(8340 [XL]),Equity(42023 [XYL]),Equity(14848 [AABA]),Equity(17787 [YUM]),Equity(23047 [ZBH]),Equity(8399 [ZION]),Equity(44060 [ZTS])
2002-01-02 00:00:00+00:00,1.513,17.295,,20.822,29.429,15.866,,15.189,15.114,7.265,...,17.994,8.664,31.524,62.834,,9.32,6.945,28.782,43.323,
2002-01-03 00:00:00+00:00,1.532,17.317,,20.171,29.071,16.494,,14.44,14.898,7.096,...,18.077,8.681,34.658,63.244,,9.545,7.09,28.638,43.935,
2002-01-04 00:00:00+00:00,1.541,17.295,,22.077,28.684,17.895,,13.548,15.773,7.689,...,17.931,8.38,34.689,62.653,,9.45,7.352,28.218,44.295,
2002-01-07 00:00:00+00:00,1.491,17.131,,21.044,28.386,18.029,,13.261,16.061,7.676,...,18.033,8.18,33.867,61.715,,9.875,7.497,28.199,44.278,
2002-01-08 00:00:00+00:00,1.473,17.056,,21.672,28.147,18.264,,13.101,16.016,7.65,...,17.861,8.305,34.097,61.437,,9.765,7.641,29.068,44.219,


We have gotten 495 stocks from the S&P 500 in a data Panda Series. Now I will create 2 custom factors for Relative Value and Price Momentum

In [109]:
def linreg(X,Y):
    # Running the linear regression
    X = sm.add_constant(X)
    model = regression.linear_model.OLS(Y, X).fit()
    a = model.params[0]
    b = model.params[1]
    X = X[:, 1]

    # Return summary of the regression and plot results
    X2 = np.linspace(X.min(), X.max(), 100)
    Y_hat = X2 * b + a
    return [a,b]

In [115]:
class YOY_Slope(CustomFactor):
# Get the YOY slope of prices for the Price Momentum Factor
    inputs = [USEquityPricing.close]
    window_length = 365
    
    def compute(self, today, assets, out, prices):
        time = [i for i in range(window_length-20)]
        out[:] = linreg(prices[:-20], time)[1]

class Above_Mid_Percentage(CustomFactor):
# Calculate the Percetange Above the 260-Day Low the current Price is
    inputs = [USEquityPricing.close]
    window_length = 260

    def compute(self, today, assets, out, prices):
# (Current price - Minimum Price) * 100 / Minimum Price
        out[:] = (prices[-1] - min(prices)) * 100 / min(prices)
        
class Weeks_39_Return(CustomFactor):
# Calculate Return in the previous 39 weeks
    inputs = [USEquityPricing.close]
    window_length = 273
# 273 Days in 39 weeks

    def compute(self, today, assets, out, prices):
# Return the percentage change in the past 39 weeks
        monthly_prices = pricing.resample('M').mean()
        returns = (monthly_prices[-1] - monthly_prices[-10] * 100) / monthly_prices[-10] 
        out[:] = returns

### ^ Not sure how to put a 20-day lag into the Price Momentum Factors. Getting Degrees of Freedom <= 0 errors and chunksize errors and window_safe error for USEquityPricing

In [116]:
# Create a test pipeline
pipe_test = Pipeline()

# Add Price Momentum Factor to the pipeline
pipe_test.add(YOY_Slope(), 'YOY_Slope')
pipe_test.add(Above_Mid_Percentage(), 'Above_Mid_Percentage')
pipe_test.add(Weeks_39_Return(), 'Weeks_39_Return')

window_length = 365
# Run the test Pipeline for the Price Momentum Factors
run_pipeline(pipe_test, start_date='2016-01-01', end_date='2017-01-01').head()

ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()

In [None]:
class Price_to_TTM_Sales(CustomFactor):
    inputs = [morningstar.valuation_ratios.ps_ratio]
    window_length = 1
    
    def compute(self, today, assets, out, ps):
        out[:] = -ps[-1]
        
class Price_to_TTM_Cashflows(CustomFactor):
    inputs = [morningstar.valuation_ratios.pcf_ratio]
    window_length = 1
    
    def compute(self, today, assets, out, pcf):
        out[:] = -pcf[-1] 
        
class Price_to_Earnings(CustomFactor):
    inputs = [morningstar.earnings_report.diluted_eps, morningstar.earnings_report.diluted_average_shares]
    window_length = 1
    
    def compute(self, today, assets, out, deps, das):
        out[:] = deps[-1] * das

In [117]:
# Create a test pipeline
pipe_test = Pipeline()

# Add sub Custom Factors to the pipeline
pipe_test.add(Price_to_TTM_Sales(), 'Trailing_Sales')
pipe_test.add(Price_to_Earnings(), 'Trailing_Earnings')
pipe_test.add(Price_to_TTM_Cashflows(), 'Trailing_Cashflows')

# Run the test Pipeline for the Relative Value Factors
run_pipeline(pipe_test, start_date='2016-01-01', end_date='2017-01-01').head()

Unnamed: 0,Unnamed: 1,Trailing_Cashflows,Trailing_Earnings,Trailing_Sales
2016-01-04 00:00:00+00:00,Equity(2 [ARNC]),-5.7024,25880000.0,-0.5241
2016-01-04 00:00:00+00:00,Equity(21 [AAME]),-20.6389,205890.0,-0.6005
2016-01-04 00:00:00+00:00,Equity(24 [AAPL]),-7.6503,11124000000.0,-2.6601
2016-01-04 00:00:00+00:00,Equity(25 [ARNC_PR]),,,
2016-01-04 00:00:00+00:00,Equity(31 [ABAX]),-45.1988,7773420.0,-6.1006


In [98]:
# limit effect of outliers
def filter_fn(x):
    if x <= -10:
        x = -10.0
    elif x >= 10:
        x = 10.0
    return x   

# standardize using mean and sd of S&P500
def standard_frame_compute(df):
    
    # basic clean of dataset to remove infinite values
    df = df.replace([np.inf, -np.inf], np.nan)
    df = df.dropna()
    
    # need standardization params from synthetic S&P500
    df_SPY = df.sort(columns='SPY Proxy', ascending=False)

    # create separate dataframe for SPY
    # to store standardization values
    df_SPY = df_SPY.head(500)
    
    # get dataframes into numpy array
    df_SPY = df_SPY.as_matrix()
    
    # store index values
    index = df.index.values
    df = df.as_matrix()
    
    df_standard = np.empty(df.shape[0])
    
    
    for col_SPY, col_full in zip(df_SPY.T, df.T):
        
        # summary stats for S&P500
        mu = np.mean(col_SPY)
        sigma = np.std(col_SPY)
        col_standard = np.array(((col_full - mu) / sigma)) 

        # create vectorized function (lambda equivalent)
        fltr = np.vectorize(filter_fn)
        col_standard = (fltr(col_standard))
        
        # make range between -10 and 10
        col_standard = (col_standard / df.shape[1])
        
        # attach calculated values as new row in df_standard
        df_standard = np.vstack((df_standard, col_standard))
     
    # get rid of first entry (empty scores)
    df_standard = np.delete(df_standard,0,0)
    
    return (df_standard, index)

# Sum up and sort data
def composite_score(df, index):

    # sum up transformed data
    df_composite = df.sum(axis=0)
    
    # put into a pandas dataframe and connect numbers
    # to equities via reindexing
    df_composite = pd.Series(data=df_composite,index=index)
    
    # sort descending
    df_composite.sort(ascending=False)

    return df_composite

# compute the standardized values
results_standard, index = standard_frame_compute(results)

# aggregate the scores
ranked_scores = composite_score(results_standard, index)

NameError: name 'results' is not defined