# Notebook to save Custom CSVs for Zipline

In [39]:
import os
import datetime

import matplotlib
import pandas as pd
import pyfolio as pf
import yfinance as yf

from deep_rl_asset_allocation.configs import data_config, paths_config

In [40]:
import warnings
warnings.filterwarnings('ignore')

In [41]:
# "Agg" backend is for writing to file, not for rendering in a window
matplotlib.use('Agg')

# plotting in notebook
%matplotlib inline

In [42]:
%load_ext zipline

The zipline extension is already loaded. To reload it, use:
  %reload_ext zipline


### Import Data as a df or CSV

In [43]:
START = data_config.TRAINING_START  # data_config.TESTING_START  
END = data_config.TESTING_END

In [44]:
# # load DJIA df
# DJIA_FILENAME = os.path.join(paths_config.data_csv_dir, "DJIA.csv")
# djia = pd.read_csv(DJIA_FILENAME)
# # djia.head()

# def convert_datadate_to_datetime(df):
#     datadate1 = str(df['Date'])
#     # output is '20090102'
#     year, month, day = datadate1[:4], datadate1[5:7], datadate1[8:]
#     return datetime.date(year=int(year), month=int(month), day=int(day))

# djia["date"] = djia.apply(convert_datadate_to_datetime, axis=1)
# djia = djia.drop(columns=['Date'])
# djia = djia[["date", "Open", "High", "Low", "Close", "Volume"]]
# djia.columns = map(str.lower, djia.columns)
# djia = djia.rename(columns={"date": "Date"})
# djia = djia.dropna()
# # djia.head()

# djia = djia.loc[(djia['Date'] >= START) & (djia['Date'] < END)]
# djia.reset_index(drop=True, inplace=True)
# # djia.head()

# # save csv
# CSV_FILENAME = os.path.join(paths_config.data_csv_dir, "daily/DJIA.csv")
# djia.to_csv(CSV_FILENAME)
# print(f"Saved to: {CSV_FILENAME}")

In [45]:
TICKERS = [
            "^DJI",
            'AAPL', 'AXP', 'BA', 'CAT', 'CSCO', 'CVX', 'DD', 'DIS', 'GS', 'HD',
            'IBM', 'INTC', 'JNJ', 'JPM', 'KO', 'MCD', 'MMM', 'MRK', 'MSFT', 'NKE',
            'PFE', 'PG', 'RTX', 'TRV', 'UNH', 'V', 'VZ', 'WBA', 'WMT', 'XOM',
            ]

for ticker in TICKERS:
    yf_ticker = yf.Ticker(ticker)
    # print(yf_ticker.info.keys())

    df = yf_ticker.history(start=START, end=END)
    df = df[["Open", "High", "Low", "Close", "Volume"]]
    df.columns = map(str.lower, df.columns)
    df = df.dropna()
    # print(f'{df}')

    # save csv
    CSV_FILENAME = os.path.join(paths_config.data_csv_dir, f"daily/{ticker}.csv")
    df.to_csv(CSV_FILENAME)
    print(f"Saved to: {CSV_FILENAME}")

Saved to: /Users/akeaveny/git/deep-rl-asset-allocation/deep_rl_asset_allocation/data/csv/daily/^DJI.csv
Saved to: /Users/akeaveny/git/deep-rl-asset-allocation/deep_rl_asset_allocation/data/csv/daily/AAPL.csv
Saved to: /Users/akeaveny/git/deep-rl-asset-allocation/deep_rl_asset_allocation/data/csv/daily/AXP.csv
Saved to: /Users/akeaveny/git/deep-rl-asset-allocation/deep_rl_asset_allocation/data/csv/daily/BA.csv
Saved to: /Users/akeaveny/git/deep-rl-asset-allocation/deep_rl_asset_allocation/data/csv/daily/CAT.csv
Saved to: /Users/akeaveny/git/deep-rl-asset-allocation/deep_rl_asset_allocation/data/csv/daily/CSCO.csv
Saved to: /Users/akeaveny/git/deep-rl-asset-allocation/deep_rl_asset_allocation/data/csv/daily/CVX.csv
Saved to: /Users/akeaveny/git/deep-rl-asset-allocation/deep_rl_asset_allocation/data/csv/daily/DD.csv
Saved to: /Users/akeaveny/git/deep-rl-asset-allocation/deep_rl_asset_allocation/data/csv/daily/DIS.csv
Saved to: /Users/akeaveny/git/deep-rl-asset-allocation/deep_rl_asset_all

### Zipline
Note: Need to open ~/.zipline/extension.py and edit register()
```
import pandas as pd
from zipline.data.bundles import register
from zipline.data.bundles.csvdir import csvdir_equities

start_session = pd.Timestamp('2016-1-4', tz='utc')
end_session = pd.Timestamp('2020-5-7', tz='utc')

register(
    'djia-csvdir-bundle',
    csvdir_equities(
        ['daily'],
        '/Users/akeaveny/git/deep-rl-asset-allocation/deep_rl_asset_allocation/data/csv',
    ),
    calendar_name='NYSE',  # US equities
    start_session=start_session,
    end_session=end_session)

```

In [46]:
!zipline clean -b djia-csvdir-bundle --after=2009-01-01
!zipline bundles

[2022-03-09 18:14:58.641903] INFO: zipline.data.bundles.core: Cleaning 2022-03-09T18;13;49.227461.
csvdir <no ingestions>
djia-csvdir-bundle <no ingestions>
quandl 2022-02-08 15:05:16.803138
quandl 2022-02-08 15:02:12.920795
quantopian-quandl <no ingestions>


In [49]:
!zipline clean -b djia-csvdir-bundle --after=2009-01-01
!zipline ingest -b djia-csvdir-bundle