In [1]:
import os
import pickle
import sys
sys.path.append('..')

import datetime
from pathlib import Path
import pandas as pd
from dateutil import parser

from utils import GCStorage
from constants import *

In [2]:
def str_to_datetime(function):
    def wrapper(*args, **kwargs):
        
        all_kwargs = {k: kwargs[k] for k in kwargs}
        if type(kwargs['start_time']) == str:
            start_time = parser.parse(kwargs['start_time'], dayfirst=False)
            all_kwargs['start_time'] = start_time
        
        if 'end_time' in kwargs and type(kwargs['end_time']) == str:
            start_time = parser.parse(kwargs['end_time'], dayfirst=False)
            all_kwargs['end_time'] = start_time

        result = function(*args, **all_kwargs)
        return result
    return wrapper


def get_fpath(pair, lp_idx, start_time):
    whole_day = start_time.replace(minute=0, hour=0,
                                          second=0, microsecond=0)
    nice_date = whole_day.strftime('%Y%m%d')

    # fname = f'{nice_date}-{pair}-{lp_idx}.csv'
    fname = f'{nice_date}-{pair}-{lp_idx}.pickle'
    return Path(pair) / nice_date / fname, whole_day

In [55]:
class CandleDataAPI:
    '''Assumes that data actually exists'''

    MONO = None
    
    @staticmethod
    def get_DataAPI(*args, **kwargs):
        if CandleDataAPI.MONO is not None:
            return CandleDataAPI.MONO
        else:
            CandleDataAPI.MONO = CandleDataAPI(*args, **kwargs)
            print('Unique instance for CandleDataAPI has been created')
            return CandleDataAPI.MONO
    
    def __init__(self, candle_interval):
        self.processed_storage = f'candle_{candle_interval}_data'
        self.candle_interval = datetime.timedelta(seconds=candle_interval)
        self.data_path = Path(TEMP_FOLDER) / self.processed_storage
        self.storage = GCStorage(PROJECT_NAME, GC_BUCKET, CREDENTIAL_PATH)

        self.local_storage = []

    def describe_cache(self):
        for item in self.local_storage:
            print(f'{item["pair"]}[{item["lp_idx"]}]: {item["start_time"]} ({len(item["data"])})')

    @str_to_datetime
    def get(self, pair, lp_idx, start_time, length, verbose):
        
        if verbose:
            print(f'Length of cache is {len(self.local_storage)}')
        
        fpath, whole_day = get_fpath(pair, lp_idx, start_time)

        data = None
        for item in self.local_storage:
            if item['pair'] == pair and item['lp_idx'] == lp_idx and \
                item['start_time'] == whole_day:
                data = item['data']
                break

        if data is None:
            target_file = self.data_path / fpath
            data = pickle.load(open(target_file, 'rb'))

            self.local_storage.append({'pair': pair, 'lp_idx': lp_idx,
                                       'start_time': whole_day,
                                       'data': data})
    
        start_index = int((start_time - whole_day) / self.candle_interval)
                  
        if start_index + length > len(data['candle_starts']):
            raise IndexError(f'Requested data exceeds day length')

        
        print(start_index, start_time, whole_day, self.candle_interval)
        bids_df = {k: data['candle_bids'][k][start_index: start_index + length] \
                                    for k in data['candle_bids']}
        bids_df = pd.DataFrame(bids_df)
        asks_df = {k: data['candle_asks'][k][start_index: start_index + length] \
                                    for k in data['candle_asks']}
        asks_df = pd.DataFrame(asks_df)
        result = {'candle_starts': data['candle_starts'][start_index: start_index + length],
                  'candle_bids': bids_df, 'candle_asks': asks_df}
        return result



In [56]:
api = CandleDataAPI(10)

In [61]:
data = api.get(pair='AUDUSD', lp_idx=1, start_time='20190203 17:00:00', length=1000, verbose=True)

Length of cache is 1
6120 2019-02-03 17:00:00 2019-02-03 00:00:00 0:00:10


In [19]:
df = pd.read_csv('/sailhome/jingbo/CXR_RELATED/temp_store/organized_data/AUDUSD/20190203/20190203-AUDUSD-1.csv', parse_dates=['time'])

In [22]:
df

Unnamed: 0,provider,stream,currency pair,time,bid price,bid volume,ask price,ask volume,guid,tier,status,quote type
0,LP-1,STRM-1,AUDUSD,2019-02-03 17:51:44.591,0.72333,1000000,0.72833,1000000,G-605762d-168b47ccdcf-LP-1C-1,1,Active,MT
1,LP-1,STRM-1,AUDUSD,2019-02-03 17:51:44.592,0.72333,1000000,0.72833,1000000,G-605762d-168b47ccdd0-LP-1C-2,1,Active,MT
2,LP-1,STRM-1,AUDUSD,2019-02-03 17:51:44.591,0.72333,1000000,0.72833,1000000,G-605762d-168b47ccdcf-LP-1C-3,1,Active,MT
3,LP-1,STRM-1,AUDUSD,2019-02-03 17:54:13.886,0.72281,1000000,0.72589,1000000,G-605762d-168b47f14fe-LP-1C-4,1,Active,MT
4,LP-1,STRM-1,AUDUSD,2019-02-03 17:54:13.887,0.72281,1000000,0.72589,1000000,G-605762d-168b47f14ff-LP-1C-5,1,Active,MT
5,LP-1,STRM-1,AUDUSD,2019-02-03 17:54:13.887,0.72281,1000000,0.72589,1000000,G-605762d-168b47f14ff-LP-1C-6,1,Active,MT
6,LP-1,STRM-1,AUDUSD,2019-02-03 17:54:13.893,0.72230,1000000,0.72640,1000000,G-605762d-168b47f1505-LP-1C-7,1,Active,MT
7,LP-1,STRM-1,AUDUSD,2019-02-03 17:54:13.893,0.72230,1000000,0.72640,1000000,G-605762d-168b47f1505-LP-1C-8,1,Active,MT
8,LP-1,STRM-1,AUDUSD,2019-02-03 17:54:13.893,0.72230,1000000,0.72640,1000000,G-605762d-168b47f1505-LP-1C-9,1,Active,MT
9,LP-1,STRM-1,AUDUSD,2019-02-03 17:54:13.947,0.72241,1000000,0.72629,1000000,G-605762d-168b47f153b-LP-1C-a,1,Active,MT


In [60]:
data['candle_starts']

[datetime.datetime(2019, 2, 3, 17, 0),
 datetime.datetime(2019, 2, 3, 17, 0, 10),
 datetime.datetime(2019, 2, 3, 17, 0, 20),
 datetime.datetime(2019, 2, 3, 17, 0, 30),
 datetime.datetime(2019, 2, 3, 17, 0, 40),
 datetime.datetime(2019, 2, 3, 17, 0, 50),
 datetime.datetime(2019, 2, 3, 17, 1),
 datetime.datetime(2019, 2, 3, 17, 1, 10),
 datetime.datetime(2019, 2, 3, 17, 1, 20),
 datetime.datetime(2019, 2, 3, 17, 1, 30),
 datetime.datetime(2019, 2, 3, 17, 1, 40),
 datetime.datetime(2019, 2, 3, 17, 1, 50),
 datetime.datetime(2019, 2, 3, 17, 2),
 datetime.datetime(2019, 2, 3, 17, 2, 10),
 datetime.datetime(2019, 2, 3, 17, 2, 20),
 datetime.datetime(2019, 2, 3, 17, 2, 30),
 datetime.datetime(2019, 2, 3, 17, 2, 40),
 datetime.datetime(2019, 2, 3, 17, 2, 50),
 datetime.datetime(2019, 2, 3, 17, 3),
 datetime.datetime(2019, 2, 3, 17, 3, 10),
 datetime.datetime(2019, 2, 3, 17, 3, 20),
 datetime.datetime(2019, 2, 3, 17, 3, 30),
 datetime.datetime(2019, 2, 3, 17, 3, 40),
 datetime.datetime(2019, 2,