# Zipline-Broker Pipeline and Fundamentals Examples Notebook
### Examples of using zipline-broker pipeline with fundamentals, as an asset screener and a factor constructor

#### Date: 2021-05-20

In [1]:
# Modified from :
# https://zipline-trader.readthedocs.io/en/1.5.0/notebooks/SimplePipeline.html
import os
import pandas as pd

#Fill in Start and End Dates for this notebook.
START_DATE = start_date = pd.Timestamp('2019-01-02', tz='utc')
END_DATE   = end_date   = pd.Timestamp('2021-05-19', tz='utc')

#os.environ['ZIPLINE_ROOT'] = os.path.join(os.getcwd(), '.zipline')
os.listdir(os.environ['ZIPLINE_ROOT'])
import zipline
from zipline.data import bundles

bundle_name = 'sharadar-prices'
bundle_data = bundles.load(bundle_name)
from zipline.pipeline.loaders import USEquityPricingLoader
from zipline.utils.calendars import get_calendar
from zipline.pipeline.data import USEquityPricing
from zipline.data.data_portal import DataPortal

# Set the dataloader
pricing_loader = USEquityPricingLoader(bundle_data.equity_daily_bar_reader, bundle_data.adjustment_reader)
#new pricing_loader = USEquityPricingLoader.without_fx(bundle_data.equity_daily_bar_reader, bundle_data.adjustment_reader)


# Define the function for the get_loader parameter
def choose_loader(column):
    if column not in (USEquityPricing.columns or Fundamentals.columns):
        raise Exception('Column not in USEquityPricing|Fundamentals')
    return pricing_loader

# Set the trading calendar
trading_calendar = get_calendar('NYSE')

# Create a data portal
data_portal = DataPortal(bundle_data.asset_finder,
                         trading_calendar = trading_calendar,
                         first_trading_day = start_date,
                         equity_daily_reader = bundle_data.equity_daily_bar_reader,
                         adjustment_reader = bundle_data.adjustment_reader)

  from ._conv import register_converters as _register_converters
You can access NaTType as type(pandas.NaT)
  @convert.register((pd.Timestamp, pd.Timedelta), (pd.tslib.NaTType, type(None)))


In [2]:
from zipline.pipeline.loaders.blaze import BlazeLoader, from_blaze
from zipline.utils.run_algo import load_extensions
import alphatools.fundamentals as fundies
from alphatools.fundamentals import Fundamentals

fd=Fundamentals()


['2021-04-10T03;06;58', '2021-04-16T01;50;05', '2021-04-20T02;20;19', '2021-04-21T01;36;08', '2021-04-22T02;51;18', '2021-04-30T10;21;43', '2021-05-04T02;22;23', '2021-05-06T03;07;58', '2021-05-10T03;38;13', '2021-05-11T10;37;27', '2021-05-17T03;52;22']
cur_folder= /home/hca-ws2004/hca/alphatools/alphatools/fundamentals
enac_fundamentals_pkl= /home/hca-ws2004/zipline-broker/data/fundem-sharadar-sf1/2021-05-17T03;52;22/quandal_sharadar_sf1.pkl
Non-sid Fund Count=False    102588
True       3703
Name: sid, dtype: int64
Non-sid Fund TotalCount=3703

SF1 Table needs to extend sessions from:max datekey:2021-05-14 tp  current date:2021-05-21 20:05:53.257703 ExtendRange:DatetimeIndex(['2021-05-14', '2021-05-17', '2021-05-18', '2021-05-19',
               '2021-05-20', '2021-05-21'],
              dtype='datetime64[ns, UTC]', freq='C')

Adding fundamental:revenue
Adding fundamental:opex
Adding fundamental:netinc
Adding fundamental:equity
Adding fundamental:debt
Adding fundamental:ebitda
Adding 

In [3]:
#fundies.fundamentals.df_loaders

In [3]:

# Load extensions.py; this allows you access to custom bundles
load_extensions(
    default=True,
    extensions=[],
    strict=True,
    environ=os.environ,
)

# Set-Up Pricing Data Access
trading_calendar = get_calendar('NYSE')
bundle = 'sharadar-prices' #'quandl'
bundle_data = bundles.load(bundle)

loaders = fundies.fundamentals.df_loaders

# create and empty BlazeLoader
blaze_loader = BlazeLoader()

def my_dispatcher(column):
    return loaders[column]

pipeline_loader = USEquityPricingLoader(
    bundle_data.equity_daily_bar_reader,
    bundle_data.adjustment_reader,
)

def choose_loader(column):
    if column in USEquityPricing.columns:
        return pipeline_loader
    try:
        return my_dispatcher(column)
    except:
        pass
    return blaze_loader


extension: hca_root_path = /home/hca-ws2004/hca
extension:TODAY_STR = 2021-05-21
extension:TWO_YR_AGO_STR = 2019-05-21
extension:  start_date=2019-05-21 end_date = 2021-05-21


In [4]:
from zipline.utils.calendars import get_calendar
from zipline.pipeline.data import USEquityPricing
from zipline.pipeline.engine import SimplePipelineEngine

# Create a Pipeline engine
engine = SimplePipelineEngine(get_loader = choose_loader,
                              asset_finder = bundle_data.asset_finder, calendar=trading_calendar.all_sessions)

In [5]:
#from zipline.pipeline.domain import US_EQUITIES
from zipline.pipeline.factors import AverageDollarVolume, SimpleMovingAverage
from zipline.pipeline import Pipeline

# Create a screen for our Pipeline


adv5000 = AverageDollarVolume(window_length = 30).top(100)
mcap3000 = fd.marketcap.latest.top(100)
universe = adv5000 & mcap3000

# Create an empty Pipeline with the given screen
pipeline = Pipeline(screen = universe)
#new pipeline = Pipeline(screen = universe, domain=US_EQUITIES)
pipeline.add(AverageDollarVolume(window_length = 5), "DV")
pipeline.add(fd.marketcap.latest, "MC")

pipeline.add(fd.liabilities.latest, "liabilities")
pipeline.add(fd.liabilitiesnc.latest, "liabilitiesnc")
pipeline.add(fd.assets.latest, "assets")
pipeline.add(fd.equity.latest, "equity")
pipeline.add(fd.ev.latest, "ev")
pipeline.add(fd.ebt.latest, "ebt")
pipeline.add(fd.ebit.latest, "ebit")
pipeline.add(fd.cashneq.latest, "cashneq")

In [7]:
#def make_pipeline(): 
#    mean_close_10 = SimpleMovingAverage(inputs=[USEquityPricing.close],window_length=10) 
#    mean_close_30 = SimpleMovingAverage(inputs=[USEquityPricing.close],window_length=30) 
#    
#    latest_close = USEquityPricing.close.latest  
#    
#    perc_diff = (mean_close_10 - mean_close_30) / mean_close_30  
#    
#    return Pipeline(columns={ 'Percent Difference':perc_diff, '30 Day Mean Close':mean_close_30, 'Latest Close':latest_close })

In [6]:
# Set the start and end dates
start_date = START_DATE
end_date = END_DATE

# Run our pipeline for the given start and end dates
pipeline_output = engine.run_pipeline(pipeline, start_date, end_date)
#pipeline_output = engine.run_pipeline(make_pipeline(), start_date, end_date)

pipeline_output


FrameLoad: Dates:01-02-2019 --> 05-19-2021 columns:[Fundamentals.liabilitiesnc::float64] CountNonNan:1569451
FrameLoad: Dates:01-02-2019 --> 05-19-2021 columns:[Fundamentals.assets::float64] CountNonNan:1952498
FrameLoad: Dates:01-02-2019 --> 05-19-2021 columns:[Fundamentals.equity::float64] CountNonNan:1952498
FrameLoad: Dates:01-02-2019 --> 05-19-2021 columns:[Fundamentals.ev::float64] CountNonNan:1950340
FrameLoad: Dates:01-02-2019 --> 05-19-2021 columns:[Fundamentals.ebt::float64] CountNonNan:1938049
FrameLoad: Dates:01-02-2019 --> 05-19-2021 columns:[Fundamentals.marketcap::float64] CountNonNan:1950340
FrameLoad: Dates:01-02-2019 --> 05-19-2021 columns:[Fundamentals.cashneq::float64] CountNonNan:1952498
FrameLoad: Dates:01-02-2019 --> 05-19-2021 columns:[Fundamentals.ebit::float64] CountNonNan:1938049
FrameLoad: Dates:01-02-2019 --> 05-19-2021 columns:[Fundamentals.liabilities::float64] CountNonNan:1952498


Unnamed: 0,Unnamed: 1,DV,MC,assets,cashneq,ebit,ebt,equity,ev,liabilities,liabilitiesnc
2019-01-02 00:00:00+00:00,Equity(29 [AAPL]),6.832243e+09,9.566248e+11,3.657250e+11,2.591300e+10,1.642100e+10,1.642100e+10,1.071470e+11,1.045195e+12,2.585780e+11,1.417120e+11
2019-01-02 00:00:00+00:00,Equity(44 [ABBV]),4.758079e+08,1.308067e+11,6.616400e+10,8.015000e+09,3.063000e+09,2.761000e+09,-2.921000e+09,1.632997e+11,6.908500e+10,5.369800e+10
2019-01-02 00:00:00+00:00,Equity(80 [ABT]),4.128392e+08,1.210816e+11,7.163700e+10,7.369000e+09,9.320000e+08,7.290000e+08,3.089800e+10,1.372706e+11,4.073900e+10,2.796300e+10
2019-01-02 00:00:00+00:00,Equity(153 [ACN]),3.032549e+08,9.559853e+10,2.670641e+10,4.363790e+09,1.598385e+09,1.593880e+09,1.267886e+10,9.125937e+10,1.365084e+10,3.474509e+09
2019-01-02 00:00:00+00:00,Equity(1298 [BA]),1.211436e+09,2.611558e+11,2.338833e+12,4.409600e+11,8.994000e+09,8.994000e+09,2.621580e+11,2.549308e+11,2.076675e+12,
2019-01-02 00:00:00+00:00,Equity(11682 [UNH]),8.454515e+08,9.199735e+10,4.668900e+10,4.097000e+09,2.044000e+09,1.889000e+09,3.098000e+09,1.112013e+11,4.356300e+10,3.134100e+10
2019-01-03 00:00:00+00:00,Equity(29 [AAPL]),6.908547e+09,9.566248e+11,3.657250e+11,2.591300e+10,1.642100e+10,1.642100e+10,1.071470e+11,1.045195e+12,2.585780e+11,1.417120e+11
2019-01-03 00:00:00+00:00,Equity(44 [ABBV]),5.086388e+08,1.308067e+11,6.616400e+10,8.015000e+09,3.063000e+09,2.761000e+09,-2.921000e+09,1.632997e+11,6.908500e+10,5.369800e+10
2019-01-03 00:00:00+00:00,Equity(80 [ABT]),4.737423e+08,1.210816e+11,7.163700e+10,7.369000e+09,9.320000e+08,7.290000e+08,3.089800e+10,1.372706e+11,4.073900e+10,2.796300e+10
2019-01-03 00:00:00+00:00,Equity(153 [ACN]),2.996555e+08,9.559853e+10,2.670641e+10,4.363790e+09,1.598385e+09,1.593880e+09,1.267886e+10,9.125937e+10,1.365084e+10,3.474509e+09


In [9]:
from alphatools.fundamentals import Fundamentals
from zipline.pipeline.data import USEquityPricing as USEP
from zipline.pipeline.factors import AverageDollarVolume, SimpleMovingAverage, CustomFactor
from zipline.pipeline import Pipeline

import numpy as np

NUM_TOP_INDEBTED = 20

# Average Dollar Volume without nanmean, so that recent IPOs are truly removed
class ADV_adj(CustomFactor):
    inputs = [USEP.close, USEP.volume]
    window_length = 252

    def compute(self, today, assets, out, close, volume):
        close[np.isnan(close)] = 0
        out[:] = np.mean(close * volume, 0)


def universe_filters():

    # Equities with an average daily volume greater than 750000.
    high_volume = AverageDollarVolume(window_length=252) > 750000

    # Equities for which morningstar's most recent Market Cap value is above $300
    
    # Equities whose exchange id does not start with OTC (Over The Counter).
    # startswith() is a new method available only on string-dtype Classifiers.
    # It returns a Filter.
    #not_otc = ~mstar.share_class_reference.exchange_id.latest.startswith('OTC')

    # Equities whose symbol (according to morningstar) ends with .WI
    # This generally indicates a "When Issued" offering.
    # endswith() works similarly to startswith().
    #not_wi = ~mstar.share_class_reference.symbol.latest.endswith('.WI')

    # Equities whose company name ends with 'LP' or a similar string.
    # The .matches() method uses the standard library `re` module to match
    # against a regular expression.
    #not_lp_name = ~mstar.company_reference.standard_name.latest.matches('.* L[\\. ]?P\.?$')

    # Equities with a null entry for the balance_sheet.limited_partnership field.
    # This is an alternative way of checking for LPs.
    #not_lp_balance_sheet = mstar.balance_sheet.limited_partnership.latest.isnull()

    # Highly liquid assets only. Also eliminates IPOs in the past 12 months
    # Use new average dollar volume so that unrecorded days are given value 0
    # and not skipped over
    # S&P Criterion
    
    liquid = ADV_adj()
    liq_f = liquid > 25000
    # Add logic when global markets supported
    # S&P Criterion
    #domicile = True

    #universe_filter = (high_volume & primary_share & have_market_cap & not_depositary &
    #                   common_stock & not_otc & not_wi & not_lp_name & not_lp_balance_sheet &
    #                  liquid & domicile)
    universe_filter = (high_volume & liq_f)
    #universe_filter = (high_volume)
    

    return universe_filter

def make_pipeline():
    # Base universe set to the Q500US
    universe = universe_filters() # Q3000US()
        # Create the factors we want use
    #rsi = RSI()
    price_close = USEP.close.latest
    fd=Fundamentals()
    price_volm = USEP.volume.latest
    mc   = fd.marketcap
    de   = fd.de
    dnc  = fd.debtnc
    eusd = fd.equityusd
    fcf = fd.fcf
    # Create a filter to select our 'universe'
    # Our universe is made up of stocks that have a non-null sentiment signal that was updated in
    # the last day, are not within 2 days of an earnings announcement, are not announced acquisition
    # targets, and are in the Q1500US.
    
    ltd_to_eq_rank = np.divide(dnc.latest, eusd.latest) #Fundamentals.long_term_debt_equity_ratio.latest
    # Create a screen for our Pipeline
    adv5000 = AverageDollarVolume(window_length = 30).percentile_between(80,100)
    mcap3000 = mc.latest.percentile_between(80,100) 
    universe = universe & adv5000 & mcap3000


    #adv5000 = AverageDollarVolume(window_length = 30).top(5000)
    #mcap3000 = mc.latest.top(3000)
    
    universe =  universe & adv5000 & mcap3000

    universe = universe & (fcf.latest > 1.5e8) & (mc.latest >25e6) & (price_close > 10.0) & (price_volm > 1500000) & (ltd_to_eq_rank < 32.0) #100000 is too big #10000 is too small. Cannot get subscription for ILTB
 
    de_f = de.latest #Fundamentals.long_term_debt_equity_ratio.latest
    #print(dir(universe))
    #universe=~universe.matches('.*[-]*$')

    indebted = ltd_to_eq_rank.top(NUM_TOP_INDEBTED, mask=universe) #10 30 150 60

    dnc_f  = dnc.latest
    eusd_f = eusd.latest
    fcf_f  = fcf.latest

    #mom    = Returns(inputs=[USEP.open],window_length=126,mask=indebted)
    #mom_av = SimpleMovingAverage(inputs=[mom],window_length=22,mask=indebted)

    pipe = Pipeline(columns={
        'close':price_close,
        'volm' :price_volm,
        'ltd_to_eq_rank': ltd_to_eq_rank,
        'de'  : de_f,
        'dnc' : dnc_f,
        'eusd': eusd_f,
         'fcf': fcf_f,
        'adv': adv5000,
        'mcap': mcap3000,
        #' mom' : mom,
        # 'mom_av': mom_av
        },
                    screen=indebted)
    return pipe


In [10]:
# Set the start and end dates
start_date = START_DATE
end_date   = END_DATE

# Run our pipeline for the given start and end dates
#pipeline_output = engine.run_pipeline(pipeline, start_date, end_date)
pipe2_output = engine.run_pipeline(make_pipeline(), start_date, end_date)

pipe2_output.head(20)


FrameLoad: Dates:01-02-2019 --> 05-19-2021 columns:[Fundamentals.equityusd::float64] CountNonNan:1952498
FrameLoad: Dates:01-02-2019 --> 05-19-2021 columns:[Fundamentals.de::float64] CountNonNan:1952420
FrameLoad: Dates:01-02-2019 --> 05-19-2021 columns:[Fundamentals.fcf::float64] CountNonNan:1911314
FrameLoad: Dates:01-02-2019 --> 05-19-2021 columns:[Fundamentals.marketcap::float64] CountNonNan:1950340


  overwrite_input, interpolation)
  return (lower_bounds <= data) & (data <= upper_bounds)


FrameLoad: Dates:01-02-2019 --> 05-19-2021 columns:[Fundamentals.debtnc::float64] CountNonNan:1569451


Unnamed: 0,Unnamed: 1,adv,close,de,dnc,eusd,fcf,ltd_to_eq_rank,mcap,volm
2019-01-02 00:00:00+00:00,Equity(0 [A]),True,66.268,0.869,1799000000.0,4567000000.0,337000000.0,0.393913,True,1572136.0
2019-01-02 00:00:00+00:00,Equity(1 [AA]),True,26.58,1.706,1820000000.0,5216000000.0,206000000.0,0.348926,True,2402623.0
2019-01-02 00:00:00+00:00,Equity(29 [AAPL]),True,38.394,2.413,93735000000.0,107147000000.0,16482000000.0,0.874826,True,140013864.0
2019-01-02 00:00:00+00:00,Equity(42 [ABB]),True,16.996,2.127,6619000000.0,14103000000.0,334000000.0,0.469333,True,2109981.0
2019-01-02 00:00:00+00:00,Equity(44 [ABBV]),True,80.785,-23.651,36487000000.0,-2921000000.0,4242000000.0,-12.49127,True,5722155.0
2019-01-02 00:00:00+00:00,Equity(80 [ABT]),True,69.514,1.318,19284000000.0,30898000000.0,1829000000.0,0.624118,True,6094307.0
2019-01-02 00:00:00+00:00,Equity(153 [ACN]),True,136.155,1.077,19896000.0,12678860000.0,954616000.0,0.001569,True,1826426.0
2019-01-02 00:00:00+00:00,Equity(5320 [HAIN]),True,15.86,1.457,1279601000.0,2576459000.0,203424000.0,0.496651,True,2204150.0
2019-01-02 00:00:00+00:00,Equity(6540 [KNX]),True,24.641,3.652,25523000000.0,18264000000.0,2600000000.0,1.397449,True,1517226.0
2019-01-02 00:00:00+00:00,Equity(7975 [NKE]),True,72.525,3.092,8304000000.0,4017000000.0,266000000.0,2.067214,True,5519230.0


In [11]:
#print(pipe2_output.adv.sum())

In [12]:
#print(pipe2_output.info())

In [13]:
#pipe2_output.index.values[0:-1]

In [14]:
#pipe2_output.count(axis='columns')