In [1]:
%load_ext nb_black

<IPython.core.display.Javascript object>

In [2]:
import os
import quandl
import pandas as pd
from zipline.data.bundles import register
from zipline.data.bundles.csvdir import csvdir_equities
from zipline.data.bundles import ingest
import trading_calendars as tc

<IPython.core.display.Javascript object>

This notebook creates a bundle out of the [Wiki Continuous Futures data](https://www.quandl.com/data/CHRIS-Wiki-Continuous-Futures/) from Quandl.

Define the start and end date of the data we want to download:

In [3]:
start_date = "2001-01-01"
end_date = "2020-11-06"

<IPython.core.display.Javascript object>

Setup quandl:

In [4]:
quandl.ApiConfig.api_key = os.environ["QUANDL_API_KEY"]

<IPython.core.display.Javascript object>

Get the metadata:

In [5]:
%%bash
wget -q \
     -O CHRIS_metadata.zip \
     https://www.quandl.com/api/v3/databases/CHRIS/metadata?api_key=$QUANDL_API_KEY

unzip CHRIS_metadata.zip

Archive:  CHRIS_metadata.zip
  inflating: CHRIS_metadata.csv      


<IPython.core.display.Javascript object>

In [6]:
metadata = pd.read_csv("CHRIS_metadata.csv")
metadata.head(5)

Unnamed: 0,code,name,description,refreshed_at,from_date,to_date
0,ASX_AP1,"ASX SPI 200 Index Futures, Continuous Contract...",Historical Futures Prices: ASX SPI 200 Index F...,2020-11-08 19:30:38,2013-10-09,2020-11-06
1,ASX_AP2,"ASX SPI 200 Index Futures, Continuous Contract...",Historical Futures Prices: ASX SPI 200 Index F...,2020-11-08 19:30:38,2013-10-09,2020-11-06
2,ASX_BB1,"NZ - 90 Day Bank Bill Futures, Continuous Cont...",Historical Futures Prices: NZ - 90 Day Bank Bi...,2019-12-28 19:26:41,2013-10-09,2019-12-21
3,ASX_BB2,"NZ - 90 Day Bank Bill Futures, Continuous Cont...",Historical Futures Prices: NZ - 90 Day Bank Bi...,2019-12-28 19:26:41,2013-10-09,2019-12-21
4,ASX_IB1,"30 Day Interbank Cash Rate Futures, Continuous...",Historical Futures Prices: 30 Day Interbank Ca...,2020-11-08 19:30:39,2013-10-09,2020-11-06


<IPython.core.display.Javascript object>

In [7]:
metadata.shape

(4047, 6)

<IPython.core.display.Javascript object>

Define the code we are interested to get:

In [8]:
currencies = [
    "CME_EC1",  # Euro FX Futures Quotes
    "CME_E71",  # E-mini Euro FX Futures Contract Specs
    "CME_JY1",  # Japanese Yen Futures
    "CME_J71",  # E-mini Japanese Yen Futures
    "CME_AD1",  # Australian Dollar Futures
    "CME_BP1",  # British Pound Futures Contract
    "CME_CD1",  # Canadian Dollar Futures
    "CME_SF1",  # Swiss Franc Futures Contract
    "CME_NE1",  # New Zealand Dollar Futures
    "CME_CNH1",  # Standard-Size USD/Offshore RMB (CNH) Futures
    "CME_MP1",  # Mexican Peso Futures
    "CME_BR1",  # Brazilian Real Futures
    "CME_RA1",  # South African Rand Futures
    "CME_RU1",  # Russian Ruble Futures
]

<IPython.core.display.Javascript object>

In [9]:
agricultural = [
    "CME_BO1",  # Soybean Oil Futures
    "CME_C1",  # Corn Futures
    "CME_KW1",  # KC HRW Wheat Futures
    "CME_O1",  # Oats Futures
    "CME_RR1",  # Rough Rice Futures
    "CME_S1",  # Soybean Futures
    "CME_SM1",  # Soybean Meal Futures
    "CME_W1",  # Wheat Futures
    "CME_YC1",  # Mini-Corn Futures
    "CME_YK1",  # Mini Soybean Futures
]

<IPython.core.display.Javascript object>

In [10]:
livestock = [
    "CME_FC1",  # Feeder Cattle Futures
    "CME_LC1",  # Live Cattle Futures
    "CME_LN1",  # Lean Hog Futures
]

<IPython.core.display.Javascript object>

In [11]:
milk = [
    "CME_CB1",  # Cash-settled Butter Futures
    "CME_CSC1",  # Cash-Settled Cheese Futures
    "CME_DA1",  # Class III Milk Futures
    "CME_DK1",  # Class IV Milk Futures
    "CME_DY1",  # Dry Whey Futures
    "CME_NF1",  # Non-fat Dry Milk Futures
]

<IPython.core.display.Javascript object>

In [12]:
energy = [
    "CME_BB1",  # Brent Crude Oil Futures
    "CME_CL1",  # Crude Oil Futures
    "CME_CPO1",  # Crude Palm Oil Futures
    "CME_NG1",  # Natural Gas (Henry Hub) Physical Futures
    "CME_QG1",  # E-mini Natural Gas Futures
    "CME_QL1",  # Coal Futures
    "CME_QM1",  # E-mini Crude Oil Futures
]

<IPython.core.display.Javascript object>

In [13]:
equity_index = [
    "CME_ES1",  # E-mini S&P 500 Futures
    "CME_GA1",  # S&P-GSCI ER Index Future
    "CME_GI1",  # S&P-GSCI Commodity Index Future
    "CME_IBV1",  # Ibovespa Futures
    "CME_JR1",  # Dow Jones Real Estate Futures
    "CME_MD1",  # E-mini S&P MidCap 400 Futures
    "CME_ME1",  # Gulf Coast Jet (Platts) Up-Down Futures
    "CME_N1Y1",  # Nikkei/Yen Futures
    "CME_ND1",  # NASDAQ 100 Futures
    "CME_NK1",  # Nikkei/USD Futures
    "CME_NQ1",  # E-mini NASDAQ 100 Futures
    "CME_SP1",  # S&P 500 Futures
    "CME_YM1",  # E-mini Dow ($5) Futures
]

<IPython.core.display.Javascript object>

In [14]:
metals = [
    "CME_ALI1",  # Aluminium Futures
    "CME_GC1",  # Gold Futures
    "CME_HG1",  # Copper Futures
    "CME_HR1",  # U.S. Midwest Domestic Hot-Rolled Coil Steel (CRU) Index Futures
    "CME_MGC1",  # E-micro Gold Futures
    "CME_SI1",  # Silver Futures
    "CME_TIO1",  # Iron Ore 62% Fe CFR China (TSI) Futures
]

<IPython.core.display.Javascript object>

In [15]:
interest_rates = [
    "CME_ED1",  # Eurodollar Futures
    "CME_EM1",  # 1-month Eurodollar Futures
    "CME_FF1",  # 30 Day Federal Funds Futures
    "CME_I31",  # 30 Year USD Deliverable Interest Rate Swap Futures
    "CME_KI1",  # ISO New England Monthly Off Peak LMP Swap Future
    "CME_N1U1",  # 10 Year USD Deliverable Interest Rate Swap Futures
    "CME_NN1",  # Henry Hub Swap Futures
    "CME_SA1",  # 5 Year USD Deliverable Interest Rate Swap Futures
    "CME_TN1",  # Ultra 10-Year U.S. Treasury Note Futures
    "CME_UL1",  # Ultra U.S. Treasury Bond Futures
    "CME_US1",  # U.S. Treasury Bond Futures
]

<IPython.core.display.Javascript object>

Get the data for the markets we are interested in:

In [16]:
all_markets = (
    currencies
    + agricultural
    + livestock
    + milk
    + energy
    + equity_index
    + metals
    + interest_rates
)

len(all_markets)

71

<IPython.core.display.Javascript object>

In [17]:
%%time

data = []


def has_required_columns(df):
    required_columns = {"Open", "High", "Low", "Last", "Volume"}
    available_columns = set(df.columns)

    return available_columns.issuperset(required_columns)


for i, market in enumerate(all_markets):
    if i % 10 == 0:
        print(f"{i + 1}. Downloading {market} ...")
        
    df = quandl.get(f"CHRIS/{market}", start_date = start_date, end_date = end_date)
    if df.empty:
        print(f"{i+1}. {market} is empty")
        continue
    
    
    # probably a bad way to deal with days where there are no/missing values
    df = df.fillna(0).fillna(method="ffill")
        
    df["code"] = market
    
    if has_required_columns(df):
        data.append(df)
    else:
        print(f"{i + 1}. {market} rejected ...")

1. Downloading CME_EC1 ...
11. Downloading CME_MP1 ...
21. Downloading CME_SM1 ...
31. Downloading CME_DK1 ...
41. Downloading CME_ES1 ...
51. Downloading CME_NQ1 ...
61. Downloading CME_ED1 ...
71. Downloading CME_US1 ...
CPU times: user 1min 26s, sys: 1.18 s, total: 1min 27s
Wall time: 3min 48s


<IPython.core.display.Javascript object>

The markets we finally end up with:

In [18]:
all_markets = pd.concat(data)["code"].unique().tolist()
len(all_markets)

71

<IPython.core.display.Javascript object>

In [19]:
all_markets

['CME_EC1',
 'CME_E71',
 'CME_JY1',
 'CME_J71',
 'CME_AD1',
 'CME_BP1',
 'CME_CD1',
 'CME_SF1',
 'CME_NE1',
 'CME_CNH1',
 'CME_MP1',
 'CME_BR1',
 'CME_RA1',
 'CME_RU1',
 'CME_BO1',
 'CME_C1',
 'CME_KW1',
 'CME_O1',
 'CME_RR1',
 'CME_S1',
 'CME_SM1',
 'CME_W1',
 'CME_YC1',
 'CME_YK1',
 'CME_FC1',
 'CME_LC1',
 'CME_LN1',
 'CME_CB1',
 'CME_CSC1',
 'CME_DA1',
 'CME_DK1',
 'CME_DY1',
 'CME_NF1',
 'CME_BB1',
 'CME_CL1',
 'CME_CPO1',
 'CME_NG1',
 'CME_QG1',
 'CME_QL1',
 'CME_QM1',
 'CME_ES1',
 'CME_GA1',
 'CME_GI1',
 'CME_IBV1',
 'CME_JR1',
 'CME_MD1',
 'CME_ME1',
 'CME_N1Y1',
 'CME_ND1',
 'CME_NK1',
 'CME_NQ1',
 'CME_SP1',
 'CME_YM1',
 'CME_ALI1',
 'CME_GC1',
 'CME_HG1',
 'CME_HR1',
 'CME_MGC1',
 'CME_SI1',
 'CME_TIO1',
 'CME_ED1',
 'CME_EM1',
 'CME_FF1',
 'CME_I31',
 'CME_KI1',
 'CME_N1U1',
 'CME_NN1',
 'CME_SA1',
 'CME_TN1',
 'CME_UL1',
 'CME_US1']

<IPython.core.display.Javascript object>

Align the downloaded data with the trading calendar:

In [20]:
def get_calendar(calendar_name="CMES", start_date="2001-01-01", end_date="2020-11-06"):
    cal = tc.get_calendar(calendar_name)

    return cal.all_sessions[
        (cal.all_sessions >= start_date) & (cal.all_sessions <= end_date)
    ]

<IPython.core.display.Javascript object>

In [21]:
def align_data(df, dates):
    market = df["code"][0]
    aligned_df = (
        df.tz_localize("utc")
        .reindex(dates, axis=0)
        .fillna(0)
        .fillna(method="ffill")
        .rename_axis("Date", axis="index")
    )

    aligned_df["code"] = market

    return aligned_df

<IPython.core.display.Javascript object>

In [22]:
cal = get_calendar("CMES", start_date, end_date)

aligned_data = [align_data(d, cal) for d in data]

<IPython.core.display.Javascript object>

Save the downloaded data into a temporary directory in a format expected by `csvdir_equities`:

In [23]:
%%bash
mkdir -p /tmp/futures_data/daily

<IPython.core.display.Javascript object>

In [24]:
def reformat_data(df):
    x = df.reset_index()
    x.columns = [c.lower() for c in x.columns]
    x = x.rename({"last": "close"}, axis=1)
    x = x[["date", "open", "high", "low", "close", "volume"]]
    x["dividend"] = 0
    x["split"] = 1.0

    return x

<IPython.core.display.Javascript object>

In [25]:
%%time

tf = "daily"

for i, df in enumerate(aligned_data, 0):
    if df.empty:
        print(f"{i + 1}. {m} is empty ...")
        continue
        
    market = df["code"][0]

    if i % 10 == 0:
        print(f"{i + 1}. Saving {market} ...")
    reformat_data(df).to_csv(f"/tmp/futures_data/{tf}/{market}.csv", index=False)


1. Saving CME_EC1 ...
11. Saving CME_MP1 ...
21. Saving CME_SM1 ...
31. Saving CME_DK1 ...
41. Saving CME_ES1 ...
51. Saving CME_NQ1 ...
61. Saving CME_ED1 ...
71. Saving CME_US1 ...
CPU times: user 14.3 s, sys: 270 ms, total: 14.5 s
Wall time: 14.9 s


<IPython.core.display.Javascript object>

Check the start and end date:

In [26]:
start_session = pd.Timestamp(pd.concat(aligned_data).index.min(), tz="utc")
end_session = pd.Timestamp(pd.concat(aligned_data).index.max(), tz="utc")

print(f"start_session: {start_session}")
print(f"end_session: {end_session}")

start_session: 2001-01-02 00:00:00+00:00
end_session: 2020-11-06 00:00:00+00:00


<IPython.core.display.Javascript object>

Now to ingest the bundle:

In [27]:
%%writefile -a ~/.zipline/extension.py

from zipline.data.bundles import register
from zipline.data.bundles.csvdir import csvdir_equities

bundle_name = "quandl_chris"


register(
    bundle_name, csvdir_equities(["daily"], "/tmp/futures_data"), calendar_name="CMES"
)

Appending to /root/.zipline/extension.py


<IPython.core.display.Javascript object>

In [28]:
%%bash

zipline ingest -b quandl_chris

 | CME_AD1: sid 0
 | CME_ALI1: sid 1
 | CME_BB1: sid 2
 | CME_BO1: sid 3
 | CME_BP1: sid 4
 | CME_BR1: sid 5
 | CME_C1: sid 6
 | CME_CB1: sid 7
 | CME_CD1: sid 8
 | CME_CL1: sid 9
 | CME_CNH1: sid 10
 | CME_CPO1: sid 11
 | CME_CSC1: sid 12
 | CME_DA1: sid 13
 | CME_DK1: sid 14
 | CME_DY1: sid 15
 | CME_E71: sid 16
 | CME_EC1: sid 17
 | CME_ED1: sid 18
 | CME_EM1: sid 19
 | CME_ES1: sid 20
 | CME_FC1: sid 21
 | CME_FF1: sid 22
 | CME_GA1: sid 23
 | CME_GC1: sid 24
 | CME_GI1: sid 25
 | CME_HG1: sid 26
 | CME_HR1: sid 27
 | CME_I31: sid 28
 | CME_IBV1: sid 29
 | CME_J71: sid 30
 | CME_JR1: sid 31
 | CME_JY1: sid 32
 | CME_KI1: sid 33
 | CME_KW1: sid 34
 | CME_LC1: sid 35
 | CME_LN1: sid 36
 | CME_MD1: sid 37
 | CME_ME1: sid 38
 | CME_MGC1: sid 39
 | CME_MP1: sid 40
 | CME_N1U1: sid 41
 | CME_N1Y1: sid 42
 | CME_ND1: sid 43
 | CME_NE1: sid 44
 | CME_NF1: sid 45
 | CME_NG1: sid 46
 | CME_NK1: sid 47
 | CME_NN1: sid 48
 | CME_NQ1: sid 49
 | CME_O1: sid 50
 | CME_QG1: sid 51
 | CME_QL1: sid 

fatal: not a git repository (or any of the parent directories): .git
[2020-11-08 23:22:40.281430] INFO: zipline.data.bundles.core: Ingesting quandl_chris.


<IPython.core.display.Javascript object>

Check that ingestion is indeed successful:

In [29]:
%%bash
zipline bundles

csvdir <no ingestions>
quandl 2020-07-01 17:26:24.698424
quandl_chris 2020-11-08 23:22:38.756757
quantopian-quandl <no ingestions>


fatal: not a git repository (or any of the parent directories): .git


<IPython.core.display.Javascript object>