In [16]:
!pip3 install zipline==1.3.0

Collecting zipline==1.3.0
[?25l  Downloading https://files.pythonhosted.org/packages/be/59/8c5802a7897c1095fdc409fb557f04df8f75c37174e80d2ba58c8d8a6488/zipline-1.3.0.tar.gz (2.5MB)
[K     |████████████████████████████████| 2.5MB 5.3MB/s 
Collecting requests-file>=1.4.1
  Downloading https://files.pythonhosted.org/packages/77/86/cdb5e8eaed90796aa83a6d9f75cfbd37af553c47a291cd47bc410ef9bdb2/requests_file-1.5.1-py2.py3-none-any.whl
Collecting cyordereddict>=0.2.2
[?25l  Downloading https://files.pythonhosted.org/packages/d1/1a/364cbfd927be1b743c7f0a985a7f1f7e8a51469619f9fefe4ee9240ba210/cyordereddict-1.0.0.tar.gz (138kB)
[K     |████████████████████████████████| 143kB 29.2MB/s 
Collecting bcolz<1,>=0.12.1
[?25l  Downloading https://files.pythonhosted.org/packages/6c/8b/1ffa01f872cac36173c5eb95b58c01040d8d25f1b242c48577f4104cd3ab/bcolz-0.12.1.tar.gz (622kB)
[K     |████████████████████████████████| 624kB 26.1MB/s 
Building wheels for collected packages: zipline, cyordereddict, bcolz
 

In [29]:
import zipline

from collections import OrderedDict
import numpy as np

import pandas as pd
import sys

import os

from zipline.data import bundles
from zipline.pipeline import Pipeline
from zipline.utils.calendars import get_calendar
from zipline.pipeline.engine import SimplePipelineEngine
from zipline.pipeline.factors import CustomFactor, DailyReturns, AverageDollarVolume


from zipline.pipeline.data import USEquityPricing
from zipline.pipeline.loaders import USEquityPricingLoader
from zipline.assets._assets import Equity
from zipline.api import symbol

In [2]:
from google.colab import drive

drive.mount('/content/drive')

Mounted at /content/drive


In [12]:
zipline_dir = '/content/drive/MyDrive/abnormal-distribution-project-data/zipline'
os.environ['ZIPLINE_ROOT'] = zipline_dir

Ingest Zipline

In [None]:

#!zipline ingest -b 'sep'

In [16]:


METADATA_HEADERS = ['start_date', 'end_date', 'auto_close_date',
                    'symbol', 'exchange', 'asset_name']


def check_for_abnormal_returns(df, thresh=3.0):
    """Checks to see if any days have abnormal returns"""
    returns = df['close'].pct_change()
    abnormal_rets = returns[returns > thresh]
    if abnormal_rets.shape[0] > 0:
        sys.stderr.write('Abnormal returns for: {}\n'.format(df.ix[0]['ticker']))
        sys.stderr.write('{}\n'.format(str(abnormal_rets)))


def from_sep_dump(file_name, start=None, end=None):
    """
    ticker,date,open,high,low,close,volume,dividends,lastupdated
    A,2008-01-02,36.67,36.8,36.12,36.3,1858900.0,0.0,2017-11-01

    To use this make your ~/.zipline/extension.py look similar this:

    from zipline.data.bundles import register
    from alphacompiler.data.loaders.sep_quandl import from_sep_dump

    register("sep",
         from_sep_dump("/path/to/your/SEP/dump/SHARADAR_SEP_69.csv"),)

    """
    us_calendar = get_calendar("NYSE").all_sessions
    ticker2sid_map = {}

    def ingest(environ,
               asset_db_writer,
               minute_bar_writer,  # unused
               daily_bar_writer,
               adjustment_writer,
               calendar,
               cache,
               show_progress,
               output_dir,
               # pass these as defaults to make them 'nonlocal' in py2
               start=start,
               end=end):

        print("starting ingesting data from: {}".format(file_name))

        # read in the whole dump (will require ~7GB of RAM)
        df = pd.read_csv(file_name, index_col='date',
                         parse_dates=['date'], na_values=['NA'])

        # drop unused columns, dividends will be used later
        df = df.drop(['lastupdated', 'dividends', 'closeunadj'], axis=1)

        # counter of valid securites, this will be our primary key
        sec_counter = 0
        data_list = []  # list to send to daily_bar_writer
        metadata_list = []  # list to send to asset_db_writer (metadata)

        # iterate over all the unique securities and pack data, and metadata
        # for writing
        for tkr, df_tkr in df.groupby('ticker'):
            df_tkr = df_tkr.sort_index()

            row0 = df_tkr.ix[0]  # get metadata from row

            print(" preparing {}".format(row0["ticker"]))
            check_for_abnormal_returns(df_tkr)

            # check to see if there are missing dates in the middle
            this_cal = us_calendar[(us_calendar >= df_tkr.index[0]) & (us_calendar <= df_tkr.index[-1])]
            if len(this_cal) != df_tkr.shape[0]:
                print('MISSING interstitial dates for: %s using forward fill' % row0["ticker"])
                print('number of dates missing: {}'.format(len(this_cal) - df_tkr.shape[0]))
                df_desired = pd.DataFrame(index=this_cal.tz_localize(None))
                df_desired = df_desired.join(df_tkr)
                df_tkr = df_desired.fillna(method='ffill')

            # update metadata; 'start_date', 'end_date', 'auto_close_date',
            # 'symbol', 'exchange', 'asset_name'
            metadata_list.append((df_tkr.index[0],
                                  df_tkr.index[-1],
                                  df_tkr.index[-1] + pd.Timedelta(days=1),
                                  row0["ticker"],
                                  "SEP",  # all have exchange = SEP
                                  row0["ticker"]  # TODO: can we delete this?
                                  )
                                 )

            # drop metadata columns
            df_tkr = df_tkr.drop(['ticker'], axis=1)

            # pack data to be written by daily_bar_writer
            data_list.append((sec_counter, df_tkr))
            ticker2sid_map[tkr] = sec_counter  # record the sid for use later
            sec_counter += 1

        print("writing data for {} securities".format(len(metadata_list)))
        daily_bar_writer.write(data_list, show_progress=False)

        # write metadata
        asset_db_writer.write(equities=pd.DataFrame(metadata_list,
                                                    columns=METADATA_HEADERS))
        print("a total of {} securities were loaded into this bundle".format(
            sec_counter))

        # read in Dividend History
        dfd = pd.read_csv(file_name, index_col='date',
                         parse_dates=['date'], na_values=['NA'])
        # drop rows where dividends == 0.0
        dfd = dfd[dfd["dividends"] != 0.0]
        dfd = dfd.dropna()

        dfd.loc[:, 'ex_date'] = dfd.loc[:, 'record_date'] = dfd.index
        dfd.loc[:, 'declared_date'] = dfd.loc[:, 'pay_date'] = dfd.index
        dfd.loc[:, 'sid'] = dfd.loc[:, 'ticker'].apply(lambda x: ticker2sid_map[x])
        dfd = dfd.rename(columns={'dividends': 'amount'})
        dfd = dfd.drop(['open', 'high', 'low', 'close', 'volume', 'lastupdated', 'ticker', 'closeunadj'], axis=1)

        # # format dfd to have sid
        adjustment_writer.write(dividends=dfd)

    return ingest

In [17]:

def register_data(start_date, end_date, bundle_name, address):

    start_session = pd.Timestamp(start_date, tz='utc')
    end_session = pd.Timestamp(end_date, tz='utc')

    register(bundle_name, csvdir_equities(['daily'],address,),
    calendar_name='NYSE', start_session=start_session,
    end_session=end_session)


class PricingLoader(object):
    def __init__(self, bundle_data):
        self.loader = USEquityPricingLoader(
            bundle_data.equity_daily_bar_reader,
            bundle_data.adjustment_reader)

    def get_loader(self, column):
        if column not in USEquityPricing.columns:
            raise Exception('Column not in USEquityPricing')
        return self.loader

def build_pipeline_engine(bundle_data, trading_calendar):
    pricing_loader = PricingLoader(bundle_data)

    engine = SimplePipelineEngine(
        get_loader=pricing_loader.get_loader,
        calendar=trading_calendar.all_sessions,
        asset_finder=bundle_data.asset_finder)

    return engine

# Loading stock list from file
def stock_list(file_name):
    all_stocks = []
    with open(file_name, 'r') as f:
        for line in f:
            # remove linebreak which is the last character of the string
            currentPlace = line[:-1]
            # add item to the list
            all_stocks.append(currentPlace)
        return all_stocks

def get_universe_tickers(engine, universe, end_date):
    universe_end_date = pd.Timestamp(end_date, tz='UTC')

    universe_tickers = engine \
        .run_pipeline(
        Pipeline(screen=universe),
        universe_end_date,
        universe_end_date) \
        .index.get_level_values(1) \
        .values.tolist()

    return universe_tickers

In [None]:
def make_pipeline(factors, universe):
    factors_pipe = OrderedDict()
        
    for name, f in factors.items():
        factors_pipe[name] = f
                    
    pipe = Pipeline(screen=universe, columns=factors_pipe)
    
    return pipe


def make_factors():
    
    all_factors = {
        '1Y_return': DailyReturns(window_length=252)
    }
    
    return all_factors

In [23]:

trading_calendar = get_calendar('NYSE') 
ingest_func = bundles.csvdir.csvdir_equities(['daily'], 'sep')
bundles.register('sep', from_sep_dump('.'))
bundle_data = bundles.load('sep')
engine = build_pipeline_engine(bundle_data, trading_calendar)

  after removing the cwd from sys.path.


In [32]:
factor_start_date = pd.Timestamp('2014-01-03', tz='UTC')
universe_end_date = pd.Timestamp('2020-02-03', tz='UTC')

universe = AverageDollarVolume(window_length=120).top(500)
pipe= make_pipeline(make_factors(),universe=universe)
engine.run_pipeline(pipeline=pipe, start_date=factor_start_date, end_date=universe_end_date)

Unnamed: 0,Unnamed: 1,1Y_return
2014-01-03 00:00:00+00:00,Equity(0 [A]),0.351158
2014-01-03 00:00:00+00:00,Equity(40 [AAPL]),0.046072
2014-01-03 00:00:00+00:00,Equity(61 [ABBV]),0.552001
2014-01-03 00:00:00+00:00,Equity(62 [ABC]),0.641407
2014-01-03 00:00:00+00:00,Equity(119 [ABT]),0.167842
2014-01-03 00:00:00+00:00,Equity(207 [ACN]),0.207506
2014-01-03 00:00:00+00:00,Equity(261 [ADBE]),0.570596
2014-01-03 00:00:00+00:00,Equity(277 [ADI]),0.172293
2014-01-03 00:00:00+00:00,Equity(286 [ADM]),0.544473
2014-01-03 00:00:00+00:00,Equity(298 [ADP]),0.397412
