# Creating a Zipline bundle

## Required data for Zipline

The data for zipline uses the following attributes:

| Data                  | Database name | Zipline name  |
|-----------------------|---------------|---------------|
| symbol (or isin?)     | n/a           | symbol        |
| date                  | Date          | date          |
| unadjusted_open       | Open          | open          |
| unadjusted_high       | High          | high          |
| unadjusted_low        | Low           | low           |
| unadjusted_close      | Close         | close         |
| unadjusted_volume     | Volume        | volume        |
| splits                | Splits        | splits        |
| dividends             | Dividends     | dividends     | 

In [149]:

%matplotlib inline

In [None]:
import pandas as pd 
import pg8000
from sqlalchemy import create_engine
from sqlalchemy import Table, Column, String, MetaData
from datetime import datetime, date, timedelta
import math
import numpy as np

In [None]:
universe = [
    { 'isin': 'CA0585861085', 'symbol': 'PO0.F', 'name': "Ballard Power" },
    { 'isin': 'GB00B0130H42', 'symbol': 'IJ8.F', 'name': "ITM Power" },
    { 'isin': 'NO0010081235', 'symbol': 'D7G.F', 'name': "Nel" },
    { 'isin': 'SE0006425815', 'symbol': '27W.F', 'name': "Powercell Sweden" },
    { 'isin': 'US72919P2020', 'symbol': 'PLUN.F','name': "Plug Power" },
    { 'isin': 'NO0003067902', 'symbol': '2HX.F', 'name': "Hexagon Composites" },
    { 'isin': 'FR0000120073', 'symbol': 'AIL.DE','name': "Air Liquide" },
    { 'isin': 'IE00BZ12WP82', 'symbol': 'LIN.F', 'name': "Linde" },
    { 'isin': 'US2310211063', 'symbol': 'CUM.F', 'name': 'Cummins'} ,
    { 'isin': 'FR0011742329', 'symbol': 'M6P.F', 'name': 'McPhy Energy S.A.'},    
    # { 'isin': 'US6541101050', 'name': 'Nikola Corporation','symbol': '8NI.F' }, 
    { 'isin': 'DE000A0HL8N9', 'name': '2G Energy',  'symbol': '2GB.DE' }
]

In [None]:
connection_def = "postgresql+pg8000://quotes:clue0QS-train@raspberrypi/quotes"
engine = create_engine(connection_def)

In [153]:
data_list = []

for asset in universe:
    isin = asset['isin']
    symbol=asset['symbol']
    data_table = pd.read_sql(isin, engine,index_col=False, parse_dates={'Dates': '%Y-%m-%d'})

    # rename to columns that zipline can process
    data_table.rename(columns={
            'Date': 'date',
            'Open': 'open',
            'High': 'high',
            'Low': 'low',
            'Close': 'close',
            'Volume': 'volume',
            'Dividends': 'dividends',
            'Stock Splits': 'split_ratio'
        }, inplace=True, copy=False)

    # insert symbol column as the first column, all values set to isin value.
    data_table.insert(0, 'symbol', symbol)

    data_table.sort_values( ['symbol', 'date'], ascending=[True, True] )
    data_list.append(data_table);

all_data = pd.concat(data_list, axis=0)
all_data

Unnamed: 0,symbol,date,close,high,low,open,volume,dividends,split_ratio
0,CA0585861085,2007-12-28,3.51,3.51,3.43,3.48,28084.0,0.0,0.0
1,CA0585861085,2008-01-02,3.65,3.72,3.59,3.64,19930.0,0.0,0.0
2,CA0585861085,2008-01-03,3.84,3.85,3.57,3.63,32697.0,0.0,0.0
3,CA0585861085,2008-01-04,3.64,3.93,3.62,3.84,47130.0,0.0,0.0
4,CA0585861085,2008-01-07,3.67,3.67,3.45,3.60,34835.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...
3112,DE000A0HL8N9,2020-07-13,84.90,84.90,79.00,80.10,14841.0,0.0,0.0
3113,DE000A0HL8N9,2020-07-14,81.50,83.90,78.90,81.20,11623.0,0.0,0.0
3114,DE000A0HL8N9,2020-07-15,80.60,82.70,78.60,82.00,17815.0,0.0,0.0
3115,DE000A0HL8N9,2020-07-16,79.40,80.10,77.20,79.80,11078.0,0.0,0.0


### Parse metadata

In [None]:
def gen_asset_metadata(data):

    metadata = data.groupby(
        by='symbol',
    ).agg(
        {'date': [np.min, np.max]}
    )
    metadata.reset_index(inplace=True)
    metadata['start_date'] = metadata.date.amin
    metadata['end_date'] = metadata.date.amax
    metadata['auto_close_date'] = metadata['end_date'].values + pd.Timedelta(days=1)
    del metadata['date']
    return metadata


In [138]:
metadata = gen_asset_metadata(all_data)
metadata

Unnamed: 0,symbol,start_date,end_date,auto_close_date
,,,,
0.0,CA0585861085,2007-12-28,2020-07-17,2020-07-18
1.0,DE000A0HL8N9,2007-07-31,2020-07-17,2020-07-18
2.0,FR0000120073,2007-12-28,2020-07-17,2020-07-18
3.0,FR0011742329,2014-03-26,2020-07-17,2020-07-18
4.0,GB00B0130H42,2006-03-02,2020-07-17,2020-07-18
5.0,IE00BZ12WP82,2000-09-18,2020-07-17,2020-07-18
6.0,NO0003067902,2014-03-10,2020-07-17,2020-07-18
7.0,NO0010081235,2017-10-09,2020-07-17,2020-07-18
8.0,SE0006425815,2017-11-09,2020-07-17,2020-07-18


### Parse dividends

In [139]:

def parse_dividends(data, show_progress):
    tmp = data[data['dividends'] != 0.0][['symbol', 'date', 'dividends']]
    tmp = tmp.rename(columns={'date': 'ex_date', 'dividends': 'amount'})
    tmp['record_date'] = tmp['declared_date'] = tmp['pay_date'] = pd.NaT
    return tmp


In [140]:
    parse_dividends(all_data, False)

Unnamed: 0,symbol,ex_date,amount,record_date,declared_date,pay_date
30,NO0003067902,2014-04-23,0.33,NaT,NaT,NaT
282,NO0003067902,2015-04-22,0.62,NaT,NaT,NaT
1046,NO0003067902,2018-04-20,0.30,NaT,NaT,NaT
86,FR0000120073,2008-05-14,2.25,NaT,NaT,NaT
235,FR0000120073,2009-05-13,2.25,NaT,NaT,NaT
...,...,...,...,...,...,...
2095,DE000A0HL8N9,2016-07-06,0.37,NaT,NaT,NaT
2355,DE000A0HL8N9,2017-07-12,0.40,NaT,NaT,NaT
2605,DE000A0HL8N9,2018-07-05,0.42,NaT,NaT,NaT
2849,DE000A0HL8N9,2019-06-26,0.45,NaT,NaT,NaT


### Parse splits

In [141]:
def parse_splits(data, show_progress):
    splits = data[data['split_ratio'] != 0.0][['symbol', 'date', 'split_ratio']]

    splits['ratio'] = 1.0 / splits['split_ratio']
    splits.rename(
        columns={
            'date': 'effective_date'
            #'split_ratio': 'ratio',
        },
        inplace=True,
        copy=False,
    )
    del splits['split_ratio']

    if show_progress:
        print(splits.info())
        print(splits.head())

    return splits

In [142]:
splits = parse_splits(all_data, False)
splits

Unnamed: 0,symbol,effective_date,ratio
2749,US72919P2020,2011-05-20,10.0
695,FR0000120073,2012-05-31,0.909091
2556,FR0000120073,2019-10-07,0.909091
2558,FR0000120073,2019-10-09,0.909091
813,IE00BZ12WP82,2003-12-16,0.5
