In [1]:
%load_ext nb_black

<IPython.core.display.Javascript object>

In [2]:
import requests
import zipfile

<IPython.core.display.Javascript object>

Download the dataset from Anreas Clenow's [website](https://www.followingthetrend.com/trading-evolved/):

In [3]:
DATA_FOLDER = "/tmp"

<IPython.core.display.Javascript object>

In [4]:
data_url = "https://dl.dropbox.com/s/etocgt9zgeedo22/data.zip?dl=1"

<IPython.core.display.Javascript object>

In [5]:
r = requests.get(data_url)
assert r.ok

<IPython.core.display.Javascript object>

In [6]:
%%bash
cd /tmp
rm -rf random_test_data
rm -f random_test_data.zip

<IPython.core.display.Javascript object>

In [7]:
with open(f"{DATA_FOLDER}/random_test_data.zip", "wb") as f:
    f.write(r.content)

with zipfile.ZipFile(f"{DATA_FOLDER}/random_test_data.zip", "r") as zip_ref:
    zip_ref.extractall(f"{DATA_FOLDER}/random_test_data")

<IPython.core.display.Javascript object>

Script to register the random stock bundle:

In [8]:
%%writefile ~/.zipline/random_stock_data.py

import pandas as pd
from os import listdir

# Change the path to where you have your data
path = "/tmp/random_test_data/data/random_stocks"



"""
The ingest function needs to have this exact signature,
meaning these arguments passed, as shown below.
"""
def random_stock_data(environ,
                  asset_db_writer,
                  minute_bar_writer,
                  daily_bar_writer,
                  adjustment_writer,
                  calendar,
                  start_session,
                  end_session,
                  cache,
                  show_progress,
                  output_dir):
    
    # Get list of files from path
    # Slicing off the last part
    # 'example.csv'[:-4] = 'example'
    symbols = [f[:-4] for f in listdir(path)]
    
    if not symbols:
        raise ValueError("No symbols found in folder.")
        
        
    # Prepare an empty DataFrame for dividends
    divs = pd.DataFrame(columns=['sid', 
                                 'amount',
                                 'ex_date', 
                                 'record_date',
                                 'declared_date', 
                                 'pay_date']
    )
    
    # Prepare an empty DataFrame for splits
    splits = pd.DataFrame(columns=['sid',
                                   'ratio',
                                   'effective_date']
    )
    
    # Prepare an empty DataFrame for metadata
    metadata = pd.DataFrame(columns=('start_date',
                                              'end_date',
                                              'auto_close_date',
                                              'symbol',
                                              'exchange'
                                              )
                                     )


    # Check valid trading dates, according to the selected exchange calendar
    sessions = calendar.sessions_in_range(start_session, end_session)
    
    # Get data for all stocks and write to Zipline
    daily_bar_writer.write(
            process_stocks(symbols, sessions, metadata, divs)
            )

    # Write the metadata
    asset_db_writer.write(equities=metadata)
    
    # Write splits and dividends
    adjustment_writer.write(splits=splits,
                            dividends=divs)    
    
    
"""
Generator function to iterate stocks,
build historical data, metadata 
and dividend data
"""
def process_stocks(symbols, sessions, metadata, divs):
    # Loop the stocks, setting a unique Security ID (SID)
    for sid, symbol in enumerate(symbols):
        
        print('Loading {}...'.format(symbol))
        # Read the stock data from csv file.
        df = pd.read_csv('{}/{}.csv'.format(path, symbol), index_col=[0], parse_dates=[0]) 
        
        # Check first and last date.
        start_date = df.index[0]
        end_date = df.index[-1]        
        
        # Synch to the official exchange calendar
        df = df.reindex(sessions.tz_localize(None))[start_date:end_date]
        
        # Forward fill missing data
        df.fillna(method='ffill', inplace=True)
        
        # Drop remaining NaN
        df.dropna(inplace=True)    
        
        # The auto_close date is the day after the last trade.
        ac_date = end_date + pd.Timedelta(days=1)
        
        # Add a row to the metadata DataFrame. Don't forget to add an exchange field.
        metadata.loc[sid] = start_date, end_date, ac_date, symbol, "NYSE"
        
        # If there's dividend data, add that to the dividend DataFrame
        if 'dividend' in df.columns:
            
            # Slice off the days with dividends
            tmp = df[df['dividend'] != 0.0]['dividend']
            div = pd.DataFrame(data=tmp.index.tolist(), columns=['ex_date'])
            
            # Provide empty columns as we don't have this data for now
            div['record_date'] = pd.NaT
            div['declared_date'] = pd.NaT
            div['pay_date'] = pd.NaT            
            
            # Store the dividends and set the Security ID
            div['amount'] = tmp.tolist()
            div['sid'] = sid
            
            # Start numbering at where we left off last time
            ind = pd.Index(range(divs.shape[0], divs.shape[0] + div.shape[0]))
            div.set_index(ind, inplace=True)
            
            # Append this stock's dividends to the list of all dividends
            divs = divs.append(div)    
            
        yield sid, df

Overwriting /root/.zipline/random_stock_data.py


<IPython.core.display.Javascript object>

Update `extension.py` to be able to register this bundle:

In [9]:
%%writefile -a ~/.zipline/extension.py


from zipline.data.bundles import register

import random_stock_data
register('random_stocks', random_stock_data.random_stock_data, calendar_name='NYSE')

Appending to /root/.zipline/extension.py


<IPython.core.display.Javascript object>

Ingest the bundle:

In [10]:
%%bash

PYTHONPATH="$HOME/.zipline" zipline ingest -b 'random_stocks'

Loading AMD...
Loading BAC-199809...
Loading SPLS-201709...
Loading BBT...
Loading DWDP...
Loading WFM-201708...
Loading AON...
Loading ICE...
Loading XOM...
Loading HES...
Loading XL...
Loading RRD...
Loading COV-201501...
Loading HBI...
Loading CCE...
Loading GDW-200609...
Loading JOS-200005...
Loading IPG...
Loading CNX...
Loading AABA...
Loading AMG...
Loading UH-200005...
Loading XLNX...
Loading ASND-199906...
Loading EXPE...
Loading SEE...
Loading SWKS...
Loading KATE-201707...
Loading ATI-199906...
Loading COF...
Loading DIGI-199809...
Loading MTL-199909...
Loading SYF...
Loading TA-199907...
Loading CBH-200803...
Loading DG...
Loading FISV...
Loading AKAM...
Loading HMA-201401...
Loading BAX...
Loading KSE-200708...
Loading EA...
Loading IP...
Loading TJX...
Loading BJS-201004...
Loading BHMSQ-200401...
Loading IDXX...
Loading HCA-200611...
Loading SHW...
Loading BF.B...
Loading XEC...
Loading SIAL-201511...
Loading PX...
Loading MSFT...
Loading CPRT...
Loading CELG...
Loading 

[2020-11-21 04:00:43.183668] INFO: zipline.data.bundles.core: Ingesting random_stocks.


<IPython.core.display.Javascript object>

Check bundle exists:

In [11]:
%%bash

zipline bundles

csvdir <no ingestions>
quandl 2020-11-21 03:15:00.901882
quandl 2020-11-21 02:49:44.034626
quandl 2020-11-20 10:22:51.269033
quandl 2020-11-15 16:10:56.034164
quandl 2020-11-15 16:10:22.008242
quandl 2020-11-15 16:07:52.722994
quantopian-quandl <no ingestions>
random_futures 2020-11-20 13:49:05.277535
random_futures 2020-11-20 11:03:32.137763
random_futures 2020-11-15 16:12:43.061226
random_stocks 2020-11-21 04:00:41.273756
random_stocks 2020-11-21 02:13:14.005937


<IPython.core.display.Javascript object>