In [1]:
import os
import numpy as np
import pandas as pd
from zipline.data.bundles.core import load
from zipline.pipeline import Pipeline
from zipline.pipeline.data import USEquityPricing
from zipline.pipeline.engine import SimplePipelineEngine
from zipline.pipeline.factors import AverageDollarVolume, CustomFactor, Returns
from zipline.pipeline.loaders import USEquityPricingLoader

  _warn(("h5py is running against HDF5 {0} when it was built against {1}, "


In [3]:
os.environ["QUANDL_APL_KEY"] = "xJxivCnBbzYLF2A2wmt6"
bundle_data = load("quandl", os.environ, None)

In [7]:
# USA equity pricing loader
pipeline_loader = USEquityPricingLoader(
    bundle_data.equity_daily_bar_reader,
    bundle_data.adjustment_reader,
    fx_reader=None
)

In [9]:
# Use the prcing loader to create a Pipeline engine
engine = SimplePipelineEngine(
    get_loader=lambda col: pipeline_loader,
    asset_finder=bundle_data.asset_finder
)

In [11]:
# Implement a custom momentum factor that returns a measure of price momentum
class MomentumFactor(CustomFactor):
    inputs = [USEquityPricing.close, Returns(window_length=126)]
    window_length = 252
    def compute(self, today, assets, out, prices, returns):
        out[:] = (
            (prices[-21] -prices[-252]) / prices[-252]
            - (prices[-1] - prices[-21]) / prices[-21]
        ) / np.nanstd(returns, axis=0)

In [13]:
# A function that instantiates the custom momentum factor
# builds a filter for average dollar volume ovet the last 30 days and returns a Pipeline
def make_pipeline():
    momentum = MomentumFactor()
    dollar_volume = AverageDollarVolume(
        window_length=30)
    return Pipeline(
        columns={
            "factor": momentum,
            "longs": momentum.top(50),
            "shorts": momentum.bottom(50),
            "rank": momentum.rank()
        },
        screen=dollar_volume.top(100)
    )

In [15]:
# Run the pipeline
results = engine.run_pipeline(
    make_pipeline(),
    pd.to_datetime("2012-01-04"),
    pd.to_datetime("2012-03-01")
)

  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,


In [17]:
# removing recors with no factor data
# adding names to the MultiIndex and sorting the values first by date and then by factor value
results.dropna(subset="factor", inplace=True)
results.index.names = ["date", "symbol"]
results.sort_values(by=["date", "factor"], inplace=True)

In [19]:
display(results)

Unnamed: 0_level_0,Unnamed: 1_level_0,factor,longs,shorts,rank
date,symbol,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2012-01-04,Equity(300 [BAC]),-2.522045,False,False,165.0
2012-01-04,Equity(1264 [GS]),-2.215784,False,False,220.0
2012-01-04,Equity(1888 [MS]),-2.204802,False,False,225.0
2012-01-04,Equity(1894 [MSFT]),-1.949654,False,False,295.0
2012-01-04,Equity(457 [C]),-1.830819,False,False,345.0
...,...,...,...,...,...
2012-03-01,Equity(3105 [WMT]),3.409414,False,False,2607.0
2012-03-01,Equity(1690 [LLY]),3.809608,False,False,2642.0
2012-03-01,Equity(399 [BMY]),4.689588,True,False,2685.0
2012-03-01,Equity(1770 [MCD]),4.816880,True,False,2691.0
