In [1]:
import pandas as pd
from zipline.pipeline import Pipeline
from zipline.pipeline.data import USEquityPricing
from sharadar.pipeline.factors import (
    MarketCap,
    EV,
    Fundamentals
)
from sharadar.pipeline.engine import load_sharadar_bundle, symbol, symbols, make_pipeline_engine
from zipline.pipeline.filters import StaticAssets
import time
import datetime
import math
from zipline.utils.calendars import get_calendar
from pandas.tseries.offsets import DateOffset
import numpy as np
from zipline.pipeline.factors import CustomFactor, Returns
from sharadar.pipeline.engine import WithAssetFinder


    Load benchmark returns and treasury yield curves for the given calendar and
    benchmark symbol.

    Benchmarks are downloaded as a Series from IEX Trading.  Treasury curves
    are US Treasury Bond rates and are downloaded from 'www.federalreserve.gov'
    by default.  For Canadian exchanges, a loader for Canadian bonds from the
    Bank of Canada is also available.

    Results downloaded from the internet are cached in
    ~/.zipline/data. Subsequent loads will attempt to read from the cached
    files before falling back to redownload.

    Parameters
    ----------
    trading_day : pandas.CustomBusinessDay, optional
        A trading_day used to determine the latest day for which we
        expect to have data.  Defaults to an NYSE trading day.
    trading_days : pd.DatetimeIndex, optional
        A calendar of trading days.  Also used for determining what cached
        dates we should expect to have cached. Defaults to the NYSE calendar.
    bm_symbol : str, optional
        Symbol for the benchmark index to load. Defaults to 'SPY', the ticker
        for the S&P 500, provided by IEX Trading.

    Returns
    -------
    (benchmark_returns, treasury_curves) : (pd.Series, pd.DataFrame)

    Notes
    -----

    Both return values are DatetimeIndexed with values dated to midnight in UTC
    of each stored date.  The columns of `treasury_curves` are:

    '1month', '3month', '6month',
    '1year','2year','3year','5year','7year','10year','20year','30year'

In [2]:
bundle = load_sharadar_bundle()
prices = bundle.equity_daily_bar_reader

In [3]:
start_dt = pd.to_datetime('2020-06-01', utc=True)
end_dt = pd.to_datetime('2020-06-05', utc=True)

In [4]:
spy = symbol('SPY')
spy.to_dict()

{'sid': 118691,
 'symbol': 'SPY',
 'asset_name': 'Spdr S&P 500 Etf Trust',
 'start_date': Timestamp('1993-01-29 00:00:00+0000', tz='UTC'),
 'end_date': Timestamp('2020-06-08 00:00:00+0000', tz='UTC'),
 'first_traded': Timestamp('1993-01-29 00:00:00+0000', tz='UTC'),
 'auto_close_date': Timestamp('2020-06-09 00:00:00+0000', tz='UTC'),
 'exchange': 'Archipelago Exchange',
 'exchange_full': 'NYSEARCA',
 'tick_size': 0.01,
 'multiplier': 1.0,
 'exchange_info': ExchangeInfo('NYSEARCA', 'Archipelago Exchange', 'US')}

In [8]:
prices.load_dataframe(['close'], start_dt, end_dt, [118691]).sort_index().pct_change(1).iloc[1:]

Unnamed: 0,0
2020-06-02 00:00:00+00:00,0.00828
2020-06-03 00:00:00+00:00,0.013308
2020-06-04 00:00:00+00:00,-0.002627
2020-06-05 00:00:00+00:00,0.025629


In [6]:
# treasury_curves '1month', '3month', '6month', '1year','2year','3year','5year','7year','10year','20year','30year'
treasury_sids = [10001,10003,10006,10012,10024,10036,10060,10084,10120,10240,10360]
prices.load_dataframe(['close'], start_dt, end_dt, treasury_sids)

[2020-06-12 21:06:44.682946] INFO: sharadar_db_bundle: Loading raw arrays for 11 assets (<class 'list'>).


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10
2020-06-01 00:00:00+00:00,0.12,0.14,0.18,0.17,0.14,0.2,0.31,0.5,0.66,1.22,1.46
2020-06-02 00:00:00+00:00,0.12,0.15,0.18,0.17,0.17,0.2,0.32,0.52,0.68,1.24,1.48
2020-06-03 00:00:00+00:00,0.12,0.16,0.19,0.17,0.19,0.26,0.38,0.59,0.77,1.32,1.56
2020-06-04 00:00:00+00:00,0.13,0.15,0.18,0.17,0.19,0.26,0.4,0.63,0.82,1.38,1.61
2020-06-05 00:00:00+00:00,0.13,0.15,0.18,0.18,0.22,0.29,0.47,0.71,0.91,1.46,1.68


In [11]:
data = prices.load_raw_arrays(['close'], start_dt, end_dt, [118691])
sessions = prices.trading_calendar.sessions_in_range(start_dt, end_dt)

In [21]:
data[0][:, 0]

array([305.55, 308.08, 312.18, 311.36, 319.34])

In [22]:
pd.Series(data[0][:, 0], index=sessions)

2020-06-01 00:00:00+00:00    305.55
2020-06-02 00:00:00+00:00    308.08
2020-06-03 00:00:00+00:00    312.18
2020-06-04 00:00:00+00:00    311.36
2020-06-05 00:00:00+00:00    319.34
Freq: C, dtype: float64