## Preparing a Factor Ranking Model Using Zipline Pipelines

In [1]:
import os
import warnings

In [2]:
import numpy as np
import pandas as pd
from IPython.display import Markdown, display
from zipline.data import bundles
from zipline.data.bundles.core import load
from zipline.pipeline import Pipeline
from zipline.pipeline.data import USEquityPricing
from zipline.pipeline.engine import SimplePipelineEngine
from zipline.pipeline.factors import (
    VWAP,
    AnnualizedVolatility,
    AverageDollarVolume,
    BollingerBands,
    CustomFactor,
    DailyReturns,
    ExponentialWeightedMovingAverage,
    MaxDrawdown,
    PercentChange,
    Returns,
    SimpleMovingAverage,
    WeightedAverageValue,
)
from zipline.pipeline.loaders import USEquityPricingLoader

In [3]:
warnings.filterwarnings("ignore")

### Option 1: Use the built-in bundle with free data

This option uses the built-in data bundle provided by Zipline. It then acquires free US equities data that extend through 2018.

In [4]:
os.environ["QUANDL_API_KEY"] = "unvCDgkmsr-UjQkKkzs9"
bundle = "quandl"
bundles.ingest(bundle)

Couldn't compute ratio for dividend sid=67, ex_date=2017-11-09, amount=0.620
Couldn't compute ratio for dividend sid=93, ex_date=2017-11-09, amount=0.240
Couldn't compute ratio for dividend sid=161, ex_date=2017-11-09, amount=0.110
Couldn't compute ratio for dividend sid=283, ex_date=2017-11-09, amount=0.415
Couldn't compute ratio for dividend sid=298, ex_date=2017-11-09, amount=1.420
Couldn't compute ratio for dividend sid=318, ex_date=2017-11-09, amount=0.330
Couldn't compute ratio for dividend sid=434, ex_date=2017-11-09, amount=0.110
Couldn't compute ratio for dividend sid=516, ex_date=1996-05-30, amount=0.310
Couldn't compute ratio for dividend sid=524, ex_date=2017-11-09, amount=0.050
Couldn't compute ratio for dividend sid=556, ex_date=2017-11-09, amount=0.075
Couldn't compute ratio for dividend sid=578, ex_date=2017-11-09, amount=0.160
Couldn't compute ratio for dividend sid=605, ex_date=2017-11-09, amount=0.040
Couldn't compute ratio for dividend sid=666, ex_date=1990-03-26, a

### Option 2: Use the custom bundle with premium data

This option uses the custom bundle with premium data. Follow the steps here: https://pyquantnews.com/ingest-premium-market-data-with-zipline-reloaded/ before using.

In [None]:
os.environ["DATALINK_API_KEY"] = "unvCDgkmsr-UjQkKkzs9"
bundle = "quotemedia"

#from zipline import load_extensions

load_extensions(
    default=True,            # Load default extensions
    extensions=[],           # List of additional extensions
    strict=True,             # Raise errors if extensions fail to load
    environ=os.environ,      # Environment variables
)

bundles.ingest(bundle)

Ingest the bundle data from your selected bundle.

In [5]:
bundle_data = load(bundle, os.environ, None)

Create a USEquityPricingLoader

In [6]:
pipeline_loader = USEquityPricingLoader(
    bundle_data.equity_daily_bar_reader, bundle_data.adjustment_reader, fx_reader=None
)

Initialize a SimplePipelineEngine

In [7]:
engine = SimplePipelineEngine(
    get_loader=lambda col: pipeline_loader, asset_finder=bundle_data.asset_finder
)

Define a custom momentum factor

In [8]:
class MomentumFactor(CustomFactor):
    """Momentum factor"""

    inputs = [USEquityPricing.close, Returns(window_length=126)]
    window_length = 252

    def compute(self, today, assets, out, prices, returns):
        out[:] = (
            (prices[-21] - prices[-252]) / prices[-252]
            - (prices[-1] - prices[-21]) / prices[-21]
        ) / np.nanstd(returns, axis=0)

Define a function to create a pipeline

In [9]:
def make_pipeline():
    momentum = MomentumFactor()
    dollar_volume = AverageDollarVolume(window_length=30)

    return Pipeline(
        columns={
            "factor": momentum,
            "longs": momentum.top(50),
            "shorts": momentum.bottom(50),
            "rank": momentum.rank(),
        },
        screen=dollar_volume.top(100),
    )

Run the pipeline

In [None]:
results = engine.run_pipeline(
    make_pipeline(), pd.to_datetime("2012-01-04"), pd.to_datetime("2012-03-01")
)

Clean and display the results

In [14]:
results.dropna(subset="factor", inplace=True)
results.index.names = ["date", "symbol"]
results.sort_values(by=["date", "factor"], inplace=True)

In [15]:
display(results)

Unnamed: 0_level_0,Unnamed: 1_level_0,factor,longs,shorts,rank
date,symbol,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2012-01-04,Equity(300 [BAC]),-2.522045,False,False,165.0
2012-01-04,Equity(1264 [GS]),-2.215784,False,False,220.0
2012-01-04,Equity(1888 [MS]),-2.204802,False,False,225.0
2012-01-04,Equity(1894 [MSFT]),-1.949654,False,False,295.0
2012-01-04,Equity(457 [C]),-1.830819,False,False,345.0
...,...,...,...,...,...
2012-03-01,Equity(3105 [WMT]),3.409414,False,False,2607.0
2012-03-01,Equity(1690 [LLY]),3.809608,False,False,2642.0
2012-03-01,Equity(399 [BMY]),4.689588,True,False,2685.0
2012-03-01,Equity(1770 [MCD]),4.816880,True,False,2691.0


Define a function with create a pipeline with a VWAP factor

In [17]:
def make_pipeline_vwap():
    vwap = VWAP(window_length=5)
    dollar_volume = AverageDollarVolume(window_length=30)

    return Pipeline(
        columns={
            "factor": vwap,
            "longs": vwap.top(50),
            "shorts": vwap.bottom(50),
            "rank": vwap.rank(),
        },
        screen=dollar_volume.top(100),
    )

Run the pipeline

In [None]:
results = engine.run_pipeline(
    make_pipeline_vwap(), pd.to_datetime("2012-01-04"), pd.to_datetime("2012-03-01")
)

Clean and display the results

In [11]:
results.dropna(subset="factor", inplace=True)
results.index.names = ["date", "symbol"]
results.sort_values(by=["date", "factor"], inplace=True)

In [12]:
display(results)

Unnamed: 0_level_0,Unnamed: 1_level_0,factor,longs,shorts,rank
date,symbol,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2012-01-04,Equity(300 [BAC]),-2.522045,False,False,165.0
2012-01-04,Equity(1264 [GS]),-2.215784,False,False,220.0
2012-01-04,Equity(1888 [MS]),-2.204802,False,False,225.0
2012-01-04,Equity(1894 [MSFT]),-1.949654,False,False,295.0
2012-01-04,Equity(457 [C]),-1.830819,False,False,345.0
...,...,...,...,...,...
2012-03-01,Equity(3105 [WMT]),3.409414,False,False,2607.0
2012-03-01,Equity(1690 [LLY]),3.809608,False,False,2642.0
2012-03-01,Equity(399 [BMY]),4.689588,True,False,2685.0
2012-03-01,Equity(1770 [MCD]),4.816880,True,False,2691.0


**Jason Strimpel** is the founder of <a href='https://pyquantnews.com/'>PyQuant News</a> and co-founder of <a href='https://www.tradeblotter.io/'>Trade Blotter</a>. His career in algorithmic trading spans 20+ years. He previously traded for a Chicago-based hedge fund, was a risk manager at JPMorgan, and managed production risk technology for an energy derivatives trading firm in London. In Singapore, he served as APAC CIO for an agricultural trading firm and built the data science team for a global metals trading firm. Jason holds degrees in Finance and Economics and a Master's in Quantitative Finance from the Illinois Institute of Technology. His career spans America, Europe, and Asia. He shares his expertise through the <a href='https://pyquantnews.com/subscribe-to-the-pyquant-newsletter/'>PyQuant Newsletter</a>, social media, and has taught over 1,000+ algorithmic trading with Python in his popular course **<a href='https://gettingstartedwithpythonforquantfinance.com/'>Getting Started With Python for Quant Finance</a>**. All code is for educational purposes only. Nothing provided here is financial advise. Use at your own risk.