In [1]:
from __future__ import print_function

from abc import ABCMeta, abstractmethod
import datetime
import os, os.path

import numpy as np
import pandas as pd

try:
    import Queue as queue
except ImportError:
    import queue

import time

from event import MarketEvent


In [48]:
class HistoricCSVDataHandler(object):
    
    def __init__(self,
                 events,
                 csv_dir,
                 symbol_list):
        '''
        Initializes the DataHandler by getting the location of the csv files (csv_dir) and a list of symbols to track.

        It assumes all the files are named 'symbol.csv' where 'symbol' is a string in the symbol_list

        Parameters:
        events - The Event Queue
        csv_dir - Absolute directory path to the csv files
        symbol_list - A list of symbol strings
        '''

        self.events = events
        self.csv_dir = csv_dir
        self.symbol_list = symbol_list

        self.symbol_data = {}
        self.latest_symbol_data = {}
        self.continue_backtest = True

        self._open_convert_csv_files()
        
    def _open_convert_csv_files(self):
            '''
            Opens the CSV files from the data directory, converting them into pandas DataFrames within a symbol dictionary
            This handler assumes the data was taken from my database using the following query and then copied into a CSV file
            with the name <<symbol>>.csv.

            SELECT
                p.price_date
                , p.open_price
                , p.high_price
                , p.low_price
                , p.close_price
                , p.adj_close_price
                , p.volume
            FROM dbo.daily_price p
            JOIN dbo.symbol s ON p.symbol_id = s.id
            WHERE
                ticker = 'ATVI'
            order by price_date
            '''

            comb_index = None
            for s in self.symbol_list: # for each and every symbol we care about
                # load the csv file with no head information, indexed on the date
                self.symbol_data[s] = pd.io.parsers.read_csv(
                    os.path.join(self.csv_dir, '%s.csv' % s),
                    header = 0, index_col = 0, parse_dates = True,
                    names = [
                        'price_date',
                        'open_price',
                        'high_price',
                        'low_price',
                        'close_price',
                        'adj_close_price',
                        'volume'
                    ]
                ).sort_values(by = 'price_date')   # .sort()

                # combine the index to pad forward values
                if comb_index is None: # if it's the first symbol, set the index to the dates of the first symbol
                    comb_index = self.symbol_data[s].index
                else: # if it's not the first symbol, combine the dates of all of the symbols
                    comb_index.union(self.symbol_data[s].index)

                # set the latest symbol data to None
                self.latest_symbol_data[s] = []

            # Reindex the dataframes and turn them into row generators instead of actual data frames
            for s in self.symbol_list:
                self.symbol_data[s] = self.symbol_data[s].reindex(index=comb_index, method = 'pad').iterrows()
                
    def _get_new_bar(self, symbol):
        '''
        Returns the latest bar from the data feed.
        '''
        for b in self.symbol_data[symbol]:
            yield b # return a new bar but don't store it in memory, (return it and then throw it away)
            
    def get_latest_bar(self, symbol):
        '''
        Returns the last bar from the latest_symbol list
        '''
        try:
            bars_list = self.latest_symbol_data[symbol]
        except KeyError:
            print('That symbol is not in the historical data set.')
            raise
        else:
            return bars_list[-1]
        
    def update_bars(self):
        '''
        Pushes the latest bar to the latest_symbol_data structure for all symbols in the symbol list
        '''

        for s in self.symbol_list:
            try:
                print('here1')
                bar = next(self._get_new_bar(s)) # grab the new bar
            except StopIteration:
                print('here2')
                self.continue_backtest = False # of there is no next bar then the backtest is over
            else:
                print('here3')
                if bar is not None:
                    print('here4')
                    self.latest_symbol_data[s].append(bar) # tack the next bar onto the latest_symbol_data
        self.events.put(MarketEvent())

In [65]:
csv_dir = r'C:\Users\jonat\Documents\Projects\Quantstrat\First_Backtester\Data\\' #r'C:\Users\jonat\Documents\Projects\Quantstrat\\'
symbol_list = ['ATVI']
events = queue.Queue()

datahandler = HistoricCSVDataHandler(csv_dir=csv_dir, events=events, symbol_list=symbol_list)


In [66]:
datahandler.symbol_data['ATVI']

<generator object DataFrame.iterrows at 0x0000027715E18678>

In [67]:
# manually go through backtest

if datahandler.continue_backtest == True:
    print('hey')
    datahandler.update_bars()
else:
    print('yo')
    pass

hey
here1
here3
here4


In [34]:
events.qsize()

3

In [44]:
datahandler.symbol_data['ATVI'].__next__()

(Timestamp('2000-01-12 00:00:00'), open_price             17.2500
 high_price             17.2500
 low_price              16.3700
 close_price            16.9400
 adj_close_price         1.2904
 volume             224017.0000
 Name: 2000-01-12 00:00:00, dtype: float64)

In [55]:
csv_dir = r'C:\Users\jonat\Documents\Projects\Quantstrat\First_Backtester\Data\\'

symbol_data = {}
latest_symbol_data = {}

comb_index = None
s = 'ATVI'

symbol_data[s] = pd.io.parsers.read_csv(
    os.path.join(csv_dir, '%s.csv' % s),
    header = 0, index_col = 0, parse_dates = True,
    names = [
        'price_date',
        'open_price',
        'high_price',
        'low_price',
        'close_price',
        'adj_close_price',
        'volume'
    ]
).sort_values(by = 'price_date')   # .sort()

# combine the index to pad forward values
if comb_index is None: # if it's the first symbol, set the index to the dates of the first symbol
    comb_index = symbol_data[s].index
else: # if it's not the first symbol, combine the dates of all of the symbols
    comb_index.union(symbol_data[s].index)

# set the latest symbol data to None
latest_symbol_data[s] = []