<a id='pipeline'></a>

#Pipeline Overview

### Accessing the data in your algorithms & research
The only method for accessing partner data within algorithms running on Quantopian is via the pipeline API. Different data sets work differently but in the case of this data, you can add this data to your pipeline as follows:

Import the data set here
> `from quantopian.pipeline.data.zacks import (`
> `EarningsSurprises`
> `)`

Then in intialize() you could do something simple like adding the raw value of one of the fields to your pipeline:
> `pipe.add(zacks.eps_mean_est.latest, 'EPS mean estimate')`

In [1]:
# Import necessary Pipeline modules
import  quantopian.pipeline.data.morningstar as mstar
from quantopian.pipeline import Pipeline  
from quantopian.pipeline import CustomFactor  
from quantopian.research import run_pipeline  
from quantopian.pipeline.data import morningstar  
from quantopian.pipeline.data.builtin import USEquityPricing  
from  quantopian.pipeline.filters import StaticAssets
import numpy as np
import talib
from statsmodels import regression
import statsmodels.api as sm
import math as m
import  quantopian.pipeline.filters
from quantopian.pipeline.filters.morningstar import Q1500US

In [2]:
class Beta(CustomFactor):
    
    # Set the default list of inputs as well as the default window_length.
    # Default values are used if the optional parameters are not specified.
    inputs = [USEquityPricing.close] 
    window_length = 100

    # Any calculation can be performed here and is applied to all stocks
    # in the universe.
    def compute(self, today, assets, out, close):
        
        benchmark_index = np.where((assets == 8554) == True)[0][0]
        
        benchmark_p = close[:, benchmark_index]
        log_benchmark_p = np.log(benchmark_p)
        log_benchmark_r = np.diff(log_benchmark_p)[1:]
        benchmark_r = 1 - np.exp(log_benchmark_r)
        X = benchmark_r
        X = sm.add_constant(X)
        
        for i in range(len(assets)):
            p = close[:, i]
            log_p = np.log(p)
            log_r = np.diff(log_p)[1:]
            r = 1 - np.exp(log_r)
            Y = r
            
            model = regression.linear_model.OLS(Y, X).fit()
            
            alpha, beta = model.params
            
            out[i] = beta

In [3]:
class MarketCap(CustomFactor):   
    
    # Pre-declare inputs and window_length
    inputs = [USEquityPricing.close, mstar.valuation.shares_outstanding] 
    window_length = 1
    
    # Compute market cap value
    def compute(self, today, assets, out, close, shares):       
        out[:] = close[-1] * shares[-1]

In [4]:
class ExtremeAccruals(CustomFactor):   
    
    # Pre-declare inputs and window_length
    inputs = [mstar.cash_flow_statement.net_income, mstar.cash_flow_statement.operating_cash_flow, mstar.balance_sheet.total_assets] 
    window_length = 1
    
    # Compute market cap value
    def compute(self, today, assets, out, income, cash_flow, total_assets):       
        out[:] = (income[-1] - cash_flow[-1])/total_assets[-1]

In [5]:
class OperatingAssets(CustomFactor):   
    
    # Pre-declare inputs and window_length
    #
    inputs = [mstar.balance_sheet.cash_and_cash_equivalents, mstar.balance_sheet.accounts_receivable, mstar.balance_sheet.inventory, mstar.balance_sheet.prepaid_assets, mstar.cash_flow_statement.deferred_tax, mstar.balance_sheet.gross_ppe, mstar.balance_sheet.goodwill_and_other_intangible_assets] 
    window_length = 1
    #Operating Assets = Cash + Total Receivables + Inventories + Prepaid Expenses + Deferred Taxes + Net PP&E 
    # + Goodwill and Intangibles
    
    
    def compute(self, today, assets, out, cash, receivables, inventories, prepaid, deferred, ppe, goodwill):
        args = np.concatenate([cash , receivables , inventories , prepaid , deferred, ppe , goodwill])
        for i in range(len(assets)):
            p = np.nan_to_num(args[:, i])
            
            
            
            
            out[i] = np.sum(p)
        
        
        

In [6]:
args = [1, 2, 3, 4, 5, 6, 7, 8 ,np.nan, 10]
cleanArgs = np.nan_to_num(args)
print (np.sum(cleanArgs))

46.0


In [7]:
class TotalAssets(CustomFactor):   
    #Must wrap this up in a custom factor to be able to combine with others
    # Pre-declare inputs and window_length
    inputs = [mstar.balance_sheet.total_assets] 
    window_length = 1
    
    # Compute market cap value
    def compute(self, today, assets, out, ta):       
        out[:] = ta[-1]

In [8]:
class OperatingLiabilities(CustomFactor):   
    
    # Pre-declare inputs and window_length
    #Want to do this from first principles to avoid glossing over any malfeasance.
    inputs = [mstar.balance_sheet.accounts_payable, mstar.balance_sheet.current_accrued_expenses, mstar.balance_sheet.income_tax_payable] 
    window_length = 1
    #accounts payable, accrued expenses, and income tax payable
    def compute(self, today, assets, out, ap, expenses, tax):       
        args = np.concatenate([ap , expenses , tax])
        for i in range(len(assets)):
            p = np.nan_to_num(args[:, i])
            
            
            
            
            out[i] = np.sum(p)

In [9]:
class LastYearRatio(CustomFactor):   
    
    # Pre-declare inputs and window_length
    #
    
    window_length = 252
    
    def compute(self, today, assets, out, ratio):       
        out[:] = ratio[-1] / ratio[0]
            
            
            
            
            

In [10]:
class LastYearToThisYearRatio(CustomFactor):   
    
    # Pre-declare inputs and window_length
    #
    
    window_length = 252
    
    def compute(self, today, assets, out, ratio):       
        out[:] = ratio[0] / ratio[-1]

In [11]:
class DEPI(CustomFactor):   
    
    # Pre-declare inputs and window_length
    #
    inputs = [mstar.balance_sheet.net_ppe, mstar.income_statement.depreciation_amortization_depletion]  
    window_length = 456
    
    def compute(self, today, assets, out, ppe, depreciation):       
        out[:] = (depreciation[0]/(depreciation[0] + ppe[0]))/(depreciation[-1]/(depreciation[-1] + ppe[-1]))

In [12]:
class AQI(CustomFactor):   
    #Asset quality index. Ratio of asset quality from this year to last year.
    # Pre-declare inputs and window_length
    #current assets, net ppe, 
    inputs = [mstar.balance_sheet.current_assets, mstar.balance_sheet.net_ppe, mstar.balance_sheet.total_assets]  
    window_length = 252
    
    def compute(self, today, assets, out, current_assets, net_ppe, total_assets):       
        aqi = (1 - (current_assets[-1] + net_ppe[-1]) / total_assets[-1])  
        aqi = aqi / (1 - (current_assets[0] + net_ppe[0]) / total_assets[0])  
        out[:] = aqi

Now that we've imported the data, let's take a look at which fields are available for each dataset.

You'll find the dataset, the available fields, and the datatypes for each of those fields.

In [13]:
print "Here are the list of available fields per dataset:"
print "---------------------------------------------------\n"

def _print_fields(dataset):
    print "Dataset: %s\n" % dataset.__name__
    print "Fields:"
    for field in list(dataset.columns):
        print "%s - %s" % (field.name, field.dtype)
    print "\n"

#for data in (EarningsSurprises):
 #   _print_fields(data)


print "---------------------------------------------------\n"


Here are the list of available fields per dataset:
---------------------------------------------------

---------------------------------------------------



In [14]:
class Previous(CustomFactor):   
    
    # Pre-declare inputs and window_length
    #
    
    window_length = 252
    
    def compute(self, today, assets, out, arr):       
        out[:] = arr[0]
            

In [15]:
class Ratio(CustomFactor):   
    
    # Pre-declare inputs and window_length
    #total assets, working capital,  , sales, retained earnings, EBIT, shares outstanding, close price, liabilities 
    
    window_length = 1
    
    def compute(self, today, assets, out, top, bottom):       
        out[:] = top[-1]/bottom[-1]

In [16]:
class MarketCapToTotalLiabilities(CustomFactor):   
    
    # Pre-declare inputs and window_length
    #total assets, working capital,  , sales, retained earnings, EBIT, shares outstanding, close price, liabilities 
    inputs = [mstar.valuation.shares_outstanding, USEquityPricing.close, mstar.balance_sheet.total_liabilities]
    window_length = 1
    
    def compute(self, today, assets, out,  shares, price, liabilities):       
        out[:] =(shares[-1] * price[-1])/ liabilities[-1]

In [17]:
class Piotroski(CustomFactor):
    inputs = [
        mstar.operation_ratios.roa,
        mstar.cash_flow_statement.operating_cash_flow,
        mstar.cash_flow_statement.cash_flow_from_continuing_operating_activities,
        
        mstar.operation_ratios.long_term_debt_equity_ratio,
        mstar.operation_ratios.current_ratio,
        mstar.valuation.shares_outstanding,
        
        mstar.operation_ratios.gross_margin,
        mstar.operation_ratios.assets_turnover,
    ]
    window_length = 252
    
    def compute(self, today, assets, out,
                roa, cash_flow, cash_flow_from_ops,
                long_term_debt_ratio, current_ratio, shares_outstanding,
                gross_margin, assets_turnover):
        profit = (
            (roa[-1] > 0).astype(int) +
            (cash_flow[-1] > 0).astype(int) +
            (roa[-1] > roa[0]).astype(int) +
            (cash_flow_from_ops[-1] > roa[-1]).astype(int)
        )
        
        leverage = (
            (long_term_debt_ratio[-1] < long_term_debt_ratio[0]).astype(int) +
            (current_ratio[-1] > current_ratio[0]).astype(int) + 
            (shares_outstanding[-1] <= shares_outstanding[0]).astype(int)
        )
        
        operating = (
            (gross_margin[-1] > gross_margin[0]).astype(int) +
            (assets_turnover[-1] > assets_turnover[0]).astype(int)
        )
        
        out[:] = profit + leverage + operating

Now that we know what fields we have access to, let's see what this data looks like when we run it through Pipeline.


This is constructed the same way as you would in the backtester. For more information on using Pipeline in Research view this thread:
https://www.quantopian.com/posts/pipeline-in-research-build-test-and-visualize-your-factors-and-filters

In [18]:
def make_pipeline():
    """
    Create and return our pipeline.
    
    We break this piece of logic out into its own function to make it easier to
    test and modify in isolation.
    """
    #sue= StandardUnexpectedEarnings()
    
    #initial_screen = filter_universe()
    #ncav = NetCurrentAssetValue()
    #custFactor, custFactorAvg = OBV() 
    
    #Too high is bad
    #dsri, aqi, sgi, depi, sgi
    
    dsri = LastYearRatio(inputs = [mstar.operation_ratios.receivable_turnover])
    gmi = LastYearToThisYearRatio(inputs = [mstar.operation_ratios.gross_margin])
    depi = DEPI()
    
    aqi = AQI()
    sgi = LastYearRatio(inputs = [mstar.operation_ratios.revenue_growth])
    
    depLastYear = Previous(inputs = [mstar.income_statement.depreciation_amortization_depletion], window_length=456)
    netPPELastYear = Previous(inputs = [morningstar.balance_sheet.net_ppe], window_length = 456)
    
    ol = OperatingLiabilities()
    
    mScore  = -6.065 + 0.823*dsri + 0.906*gmi + 0.593*aqi + 0.717*sgi + 0.107*depi  
    remove_NaN =  mScore.eq(mScore)
    
    static_assets = StaticAssets(symbols(['TXRH', 'AAPL', 'SHLD', 'WMT']))
    ea = ExtremeAccruals()
    acceptableAccruals = ea.percentile_between(0, 95, mask=Q1500US())
    #Balance sheet bloat = cumulative difference between accounting value added and cash value added.
    #Too high is bad.
    balanceSheetBloat = (OperatingAssets() - OperatingLiabilities()) /  TotalAssets()
    acceptableBalanceSheet = balanceSheetBloat.percentile_between(0,95, mask=acceptableAccruals)
    acceptableMScore = (remove_NaN & mScore.percentile_between(0,95, mask=acceptableBalanceSheet))
    
    #z-score
    
    A = Ratio(inputs = [mstar.balance_sheet.working_capital, mstar.balance_sheet.total_assets])
    B = Ratio(inputs = [mstar.balance_sheet.retained_earnings, mstar.balance_sheet.total_assets])
    C = Ratio(inputs = [mstar.income_statement.ebit, mstar.balance_sheet.total_assets])
    D = MarketCapToTotalLiabilities()
    E = Ratio(inputs = [mstar.income_statement.total_revenue, mstar.balance_sheet.total_assets])
    zScore = 1.2 * A + 1.4 * B + 3.3 * C + 0.6 * D + 1.0 *E
    #z-score:  Too low is bad
    acceptableZScore = zScore.percentile_between(5,100, mask=acceptableMScore)
    
    ebitTev =  Ratio(inputs = [mstar.income_statement.ebit, mstar.valuation.enterprise_value ])
    bargainBin = ebitTev.percentile_between(90,100, mask=acceptableZScore)
    
    pScore = Piotroski()
    goodPScore = bargainBin & (pScore > 5)
    #currentEarnings = CurrentEarningsGrowth()
    pipe = Pipeline(
        columns = {
            #'Extreme Accruals' : ea,
            
            'Total assets': mstar.balance_sheet.total_assets.latest,
            'Working capital': mstar.balance_sheet.working_capital.latest,
            'Z-score' : zScore,
            'EBIT/TEV': ebitTev,
            'Piotroski' : pScore
            
            
            #'M-Score': mScore
            #'Income tax payable': mstar.balance_sheet.income_tax_payable.latest,
            #'Accrued liabilities' : mstar.balance_sheet.current_accrued_expenses.latest
            #'Balance sheet bloat': balanceSheetBloat
           
            
            
           
             
           
            
        }, screen=goodPScore)
    
    return pipe

pipe = make_pipeline()

   

    
    

In [21]:
# run_pipeline will show the output of your pipeline

pipe_output = run_pipeline(pipe, start_date='2017-12-28', end_date='2017-12-28')



In [28]:
pipe_output.loc[pipe_output['Piotroski'] > 7]


Unnamed: 0,Unnamed: 1,EBIT/TEV,Piotroski,Total assets,Working capital,Z-score
2017-12-28 00:00:00+00:00,Equity(2618 [ESRX]),0.02767,8.0,51206200000.0,-4677800000.0,1.57215
2017-12-28 00:00:00+00:00,Equity(3321 [GPS]),0.028707,9.0,7895000000.0,1887000000.0,3.109457
2017-12-28 00:00:00+00:00,Equity(5551 [NYT]),0.024698,8.0,2238958000.0,349091000.0,2.687268
2017-12-28 00:00:00+00:00,Equity(5626 [OI]),0.024647,8.0,9999000000.0,490000000.0,0.573451
2017-12-28 00:00:00+00:00,Equity(6077 [PNM]),0.026038,8.0,6697254000.0,-336350000.0,0.613462
2017-12-28 00:00:00+00:00,Equity(7530 [TOL]),0.028836,9.0,9445225000.0,6308653000.0,2.67767
2017-12-28 00:00:00+00:00,Equity(8050 [VSH]),0.043132,8.0,3334254000.0,1632365000.0,1.862856
2017-12-28 00:00:00+00:00,Equity(8132 [WDC]),0.030341,8.0,30505000000.0,7351000000.0,1.760483
2017-12-28 00:00:00+00:00,Equity(8863 [RCL]),0.024731,8.0,22099310000.0,-4258960000.0,1.904681
2017-12-28 00:00:00+00:00,Equity(17991 [CAR]),0.028705,8.0,19873000000.0,527000000.0,0.261155


In [25]:
def filter_universe():  
    """
    11 filters:
        1. common stock
        2 & 3. not limited partnership - name and database check
        4. database has fundamental data
        5. not over the counter
        6. not when issued
        7. not depository receipts
        8. primary share
        9. high dollar volume
        10. Not a financial or utility stock
        11. We have the latest earnings data
    Check Scott's notebook for more details.
    """
    market_cap = MarketCap()
    
    mask = market_cap.top(3000)
    
    return mask

    

In [20]:
# The show_graph() method of pipeline objects produces a graph to show how it is being calculated.
pipe.show_graph(format='png')

RuntimeError: Error(s) while rendering graph: add_segment: error
add_segment: error
add_segment: error
add_segment: error


In [55]:
pipe_output.filter?