In [1]:
%load_ext nb_black

<IPython.core.display.Javascript object>

In [2]:
import requests
import zipfile

<IPython.core.display.Javascript object>

In [3]:
DATA_FOLDER = "/tmp"

<IPython.core.display.Javascript object>

In [4]:
# data_url = "https://dl.dropbox.com/s/tj85sufbsi820ya/Trading%20Evolved.zip?dl=1"
data_url = "https://dl.dropbox.com/s/etocgt9zgeedo22/data.zip?dl=1"

<IPython.core.display.Javascript object>

In [5]:
r = requests.get(data_url)
assert r.ok

<IPython.core.display.Javascript object>

In [6]:
%%bash
cd /tmp
rm -rf random_test_data
rm -f random_test_data.zip

<IPython.core.display.Javascript object>

In [7]:
with open(f"{DATA_FOLDER}/random_test_data.zip", "wb") as f:
    f.write(r.content)

with zipfile.ZipFile(f"{DATA_FOLDER}/random_test_data.zip", "r") as zip_ref:
    zip_ref.extractall(f"{DATA_FOLDER}/random_test_data")

<IPython.core.display.Javascript object>

Script to register the random futures bundle:

In [8]:
%%writefile ~/.zipline/random_futures_data.py

import pandas as pd
from os import listdir
from tqdm import tqdm  # Used for progress bar

# Change the path to where you have your data
base_path = "/tmp/random_test_data/data/"
data_path = base_path + "random_futures/"
meta_path = "futures_meta/meta.csv"
futures_lookup = pd.read_csv(base_path + meta_path, index_col=0)

"""
The ingest function needs to have this exact signature,
meaning these arguments passed, as shown below.
"""


def random_futures_data(
    environ,
    asset_db_writer,
    minute_bar_writer,
    daily_bar_writer,
    adjustment_writer,
    calendar,
    start_session,
    end_session,
    cache,
    show_progress,
    output_dir,
):

    # Get list of files from path
    # Slicing off the last part
    # 'example.csv'[:-4] = 'example'
    symbols = [f[:-4] for f in listdir(data_path)]

    if not symbols:
        raise ValueError("No symbols found in folder.")

    # Prepare an empty DataFrame for dividends
    divs = pd.DataFrame(
        columns=["sid", "amount", "ex_date", "record_date", "declared_date", "pay_date"]
    )

    # Prepare an empty DataFrame for splits
    splits = pd.DataFrame(columns=["sid", "ratio", "effective_date"])

    # Prepare an empty DataFrame for metadata
    metadata = pd.DataFrame(
        columns=(
            "start_date",
            "end_date",
            "auto_close_date",
            "symbol",
            "root_symbol",
            "expiration_date",
            "notice_date",
            "tick_size",
            "exchange",
        )
    )

    # Check valid trading dates, according to the selected exchange calendar
    sessions = calendar.sessions_in_range(start_session, end_session)

    # Get data for all stocks and write to Zipline
    daily_bar_writer.write(process_futures(symbols, sessions, metadata))

    adjustment_writer.write(splits=splits, dividends=divs)

    # Prepare root level metadata
    root_symbols = futures_lookup.copy()
    root_symbols["root_symbol_id"] = root_symbols.index.values
    del root_symbols["minor_fx_adj"]

    # write the meta data
    asset_db_writer.write(futures=metadata, root_symbols=root_symbols)


def process_futures(symbols, sessions, metadata):
    # Loop the stocks, setting a unique Security ID (SID)
    sid = 0

    # Loop the symbols with progress bar, using tqdm
    for symbol in tqdm(symbols, desc="Loading data..."):
        sid += 1

        # Read the stock data from csv file.
        df = pd.read_csv(
            "{}/{}.csv".format(data_path, symbol), index_col=[0], parse_dates=[0]
        )

        # Check for minor currency quotes
        adjustment_factor = futures_lookup.loc[
            futures_lookup["root_symbol"] == df.iloc[0]["root_symbol"]
        ]["minor_fx_adj"].iloc[0]

        df["open"] *= adjustment_factor
        df["high"] *= adjustment_factor
        df["low"] *= adjustment_factor
        df["close"] *= adjustment_factor

        # Avoid potential high / low data errors in data set
        # And apply minor currency adjustment for USc quotes
        df["high"] = df[["high", "close"]].max(axis=1)
        df["low"] = df[["low", "close"]].min(axis=1)
        df["high"] = df[["high", "open"]].max(axis=1)
        df["low"] = df[["low", "open"]].min(axis=1)

        # Synch to the official exchange calendar
        df = df.reindex(sessions.tz_localize(None))[df.index[0] : df.index[-1]]

        # Forward fill missing data
        df.fillna(method="ffill", inplace=True)

        # Drop remaining NaN
        df.dropna(inplace=True)

        # Cut dates before 2000, avoiding Zipline issue
        df = df["2000-01-01":]

        # Prepare contract metadata
        make_meta(sid, metadata, df, sessions)

        del df["openinterest"]
        del df["expiration_date"]
        del df["root_symbol"]
        del df["symbol"]

        yield sid, df


def make_meta(sid, metadata, df, sessions):
    # Check first and last date.
    start_date = df.index[0]
    end_date = df.index[-1]

    # The auto_close date is the day after the last trade.
    ac_date = end_date + pd.Timedelta(days=1)

    symbol = df.iloc[0]["symbol"]
    root_sym = df.iloc[0]["root_symbol"]
    exchng = futures_lookup.loc[futures_lookup["root_symbol"] == root_sym][
        "exchange"
    ].iloc[0]
    exp_date = end_date

    # Add notice day if you have.
    # Tip to improve: Set notice date to one month prior to
    # expiry for commodity markets.
    notice_date = ac_date
    tick_size = 0.0001  # Placeholder

    # Add a row to the metadata DataFrame.
    metadata.loc[sid] = (
        start_date,
        end_date,
        ac_date,
        symbol,
        root_sym,
        exp_date,
        notice_date,
        tick_size,
        exchng,
    )


Overwriting /root/.zipline/random_futures_data.py


<IPython.core.display.Javascript object>

Update `extension.py` to be able to register this bundle:

In [9]:
%%writefile -a ~/.zipline/extension.py


from zipline.data.bundles import register

import random_futures_data
register('random_futures', random_futures_data.random_futures_data, calendar_name='us_futures')

Appending to /root/.zipline/extension.py


<IPython.core.display.Javascript object>

Ingest the bundle:

In [10]:
%%bash

PYTHONPATH="$HOME/.zipline" zipline ingest -b 'random_futures'

fatal: not a git repository (or any of the parent directories): .git
[2020-11-15 06:34:03.232512] INFO: zipline.data.bundles.core: Ingesting random_futures.
Loading data...:   0%|          | 0/735 [00:00<?, ?it/s]Loading data...:   0%|          | 1/735 [00:00<02:42,  4.53it/s]Loading data...:   0%|          | 2/735 [00:00<02:43,  4.47it/s]Loading data...:   0%|          | 3/735 [00:01<04:08,  2.95it/s]Loading data...:   1%|          | 4/735 [00:01<03:53,  3.13it/s]Loading data...:   1%|          | 5/735 [00:01<03:23,  3.59it/s]Loading data...:   1%|          | 6/735 [00:01<03:02,  4.00it/s]Loading data...:   1%|          | 7/735 [00:01<02:45,  4.41it/s]Loading data...:   1%|          | 8/735 [00:02<02:54,  4.17it/s]Loading data...:   1%|          | 9/735 [00:02<02:58,  4.06it/s]Loading data...:   1%|▏         | 10/735 [00:02<02:42,  4.45it/s]Loading data...:   1%|▏         | 11/735 [00:02<02:33,  4.73it/s]Loading data...:   2%|▏         | 12/735 [00:02<02:33,  4.71it/s]Lo

<IPython.core.display.Javascript object>

Check bundle exists:

In [11]:
%%bash

zipline bundles

csvdir <no ingestions>
quandl 2020-11-15 06:14:52.280340
quantopian-quandl <no ingestions>
random_futures 2020-11-15 06:34:02.543101


fatal: not a git repository (or any of the parent directories): .git


<IPython.core.display.Javascript object>