In [None]:
import sys
sys.path.append('..')

import time
import os
import re
from collections import defaultdict
import pprint as pp
import copy

from tqdm import tqdm
RE_FNAME = re.compile(r'LP\-(?P<idx>[0-9])\-STRM\-[0-9]\-(?P<pair>[A-Z]+).*\.csv')

from utils import GCStorage
from constants import CREDENTIAL_PATH, ALL_DAYS

import pandas as pd

import shutil
import csv
import numpy as np
from subprocess import call
import pickle

import datetime
from dateutil.parser import parse
from pathlib import Path

from utils import GCStorage
from constants import *
from multiprocessing import Pool, Manager, Value

In [None]:
time_interval = 60
organized_path = TEMP_FOLDER / 'organized_data'

jfolder = f'joint_{time_interval}_data'
joint_path = TEMP_FOLDER / f'{jfolder}'
joint_path.mkdir(parents=True, exist_ok=True)

storage = GCStorage('FX_Trading', 'integral_data', CREDENTIAL_PATH)
candle_interval = datetime.timedelta(seconds=time_interval)

In [None]:
# valid_pairs = ['AUDUSD', 'EURUSD', 'GBPUSD', 'NZDUSD', 'USDCAD', 'USDCHF', 'USDJPY']
# valid_pairs = ['AUDUSD']
# valid_pairs = ['EURUSD']
valid_pairs = ['NZDUSD']

In [None]:
def get_daily_pair_df(day, pair):
    print(f'Checking {pair} {day}')
    
    lp_df = []
    day = day.replace('-', '')
    for lp in range(1, 6):
        fname = TEMP_FOLDER / f'pickled_data/{pair}/{day}/{day}-{pair}-{lp}.pickle'
        
        df = pickle.load(open(fname, 'rb'))
        
        # Remove invalid rows
        df = df[(df['ask price'] != 0) & (df['bid price'] != 0) & (df['ask volume'] != 0) & (df['bid volume'] != 0)]
        
        lp_df.append(df)
        print(len(df))
        
    lp_index = [0, 0, 0, 0, 0]
    times = [lp_df[i].iloc[lp_index[i]]['time'] for i in range(5)]
    cur_max_time = np.max(times)
    max_time_lp = np.argmax(times)
    for lp in range(5):
        while lp_df[lp].iloc[lp_index[lp]]['time'] < cur_max_time:
            lp_index[lp] += 1
    
        if lp == max_time_lp:
            lp_index[lp] += 1
    
    print(f'Starting time: {cur_max_time}')
    return lp_df, lp_index

In [None]:
lp_df, lp_index = get_daily_pair_df('2019-02-01', 'AUDUSD')

In [None]:
def get_head_time(lp_df, lp_index):
    times = [lp_df[i].iloc[lp_index[i] - 1]['time'] for i in range(5)]
    cur_max_time = np.max(times)
    max_time_lp = np.argmax(times)
    # print(f'Head Time (LP={max_time_lp}): {cur_max_time}')
    # print(lp_index)
    
def direct_increment(args):
    day_string, pair, lp_df, lp_index = args
    print(f'Processing {pair} {day_string}')
    # CAP each df
    # lp_df = [df.iloc[:5 * 1000] for df in lp_df]
    
    lp_index = copy.copy(lp_index)
    max_lens= [len(df) for df in lp_df]
    print(max_lens)
    
    count = 0
    inversions = []
    inversion_status = {}

    # while (True not in [lp_index[i] >= len(lp_df[i]) for i in range(5)]) and count < 10 * 1000:
    while (True not in [lp_index[i] >= len(lp_df[i]) for i in range(5)]):
    
        get_head_time(lp_df, lp_index)
        
        cur_rows = [lp_df[lp].iloc[lp_index[lp] - 1] for lp in range(5)]
        
        cur_asks = [cur_rows[i]['ask price'] for i in range(5)]
        cur_bids = [cur_rows[i]['bid price'] for i in range(5)]
        cur_times = [cur_rows[i]['time'] for i in range(5)]
        
        cur_lowest_ask = round(np.min(cur_asks), 8)
        cur_highest_bid = round(np.max(cur_bids), 8)

        cur_lowest_ask_lp = np.argmin(cur_asks)
        cur_highest_bid_lp = np.argmax(cur_bids)
        
        cur_lowest_ask_time = cur_rows[cur_lowest_ask_lp]['time']
        cur_highest_bid_time = cur_rows[cur_highest_bid_lp]['time']
        
        all_inversions = list(inversion_status.keys())
        for inversion in all_inversions:
            cur_highest_bid_bid = cur_bids[inversion_status[inversion]['bid lp']]
            cur_lowest_ask_ask = cur_bids[inversion_status[inversion]['ask lp']]
            
            if cur_highest_bid_bid > cur_lowest_ask_ask:
                inversion_status[inversion]['last valid bid'] = cur_highest_bid_bid
                inversion_status[inversion]['last valid bid time'] = cur_times[inversion_status[inversion]['bid lp']]
                inversion_status[inversion]['last valid ask'] = cur_lowest_ask_ask
                inversion_status[inversion]['last valid ask time'] = cur_times[inversion_status[inversion]['ask lp']]
            else:
                inversion_status[inversion]['exit bid'] = cur_highest_bid_bid
                inversion_status[inversion]['exit bid time'] = cur_times[inversion_status[inversion]['bid lp']]
                inversion_status[inversion]['exit ask'] = cur_lowest_ask_ask
                inversion_status[inversion]['exit ask time'] = cur_times[inversion_status[inversion]['ask lp']]
                
                inversions.append(copy.deepcopy(inversion_status[inversion]))
                del inversion_status[inversion]

        if cur_highest_bid > cur_lowest_ask:

            if (cur_highest_bid_lp, cur_lowest_ask_lp) not in inversion_status:
            
                diff = round((cur_highest_bid - cur_lowest_ask) * 10000, 1)
                time_diff = round((cur_highest_bid_time - cur_lowest_ask_time).microseconds / 1000, 2)
                if cur_highest_bid_time < cur_lowest_ask_time:
                    time_diff = 1000 - time_diff

                inversion_dict = {'time to occur': time_diff, 'price diff': diff,
                                  'bid': cur_highest_bid, 'ask': cur_lowest_ask,
                                  'bid time': cur_highest_bid_time, 'ask time': cur_lowest_ask_time,
                                  'bid lp': cur_highest_bid_lp, 'ask lp': cur_lowest_ask_lp,
                                  'last valid bid': cur_highest_bid, 'last valid ask': cur_lowest_ask,
                                  'last valid bid time': cur_highest_bid_time, 'last valid ask time': cur_lowest_ask_time,
                                  'exit bid': None, 'exit ask': None,
                                  'exit bid time': None, 'exit ask time': None,
                                 }
                inversion_status[(cur_highest_bid_lp, cur_lowest_ask_lp)] = inversion_dict
        
        lowest_time = [lp_df[lp].iloc[lp_index[lp]]['time'] for lp in range(5)]
        lowest_time_lp = np.argmin(lowest_time)
        lp_index[lowest_time_lp] += 1

        count += 1
        if count % 1000 == 0:
            # print(count)
            print(f'Progress for {day_string}: {lp_index}/{max_lens}')
            
        if count % 50000 == 0:
            # print(count)
            print(f'Temporarily storing {day_string}: {lp_index}/{max_lens}')
            pickle.dump(inversions, open(f'inversions/{pair}/{pair}_{day_string}.pickle', 'wb'))

    #for i, df in enumerate(lp_df):
    #    print(df.iloc[:lp_index[i]][['time']])
    print(f'Finished processing for {pair} {day_string}')
    pickle.dump(inversions, open(f'inversions/{pair}/{pair}_{day_string}.pickle', 'wb'))
    return inversions

In [None]:
inversions = direct_increment('2019-02-01', lp_df, lp_index)

In [None]:
pair = 'NZDUSD'
os.makedirs(f'inversions/{pair}', exist_ok=True)

arguments = []
for day in ALL_DAYS:
    try:
        lp_df, lp_index = get_daily_pair_df(day, pair)
        arguments.append((day, pair, lp_df, lp_index))
    except:
        print(f'Unable to get {day}')

In [None]:
pl = Pool(len(ALL_DAYS))
pl.map(direct_increment, arguments)

In [None]:
for inv in inversions:
    
    print('Diff', inv["price diff"])
    print(f'Starting spread: {round(inv["last valid bid"], 8)} - {round(inv["last valid ask"], 8)}')
    print(f'Bid time: {inv["bid time"]}')
    print(f'Ask time: {inv["ask time"]}')
    
    
    #print(start_time)
    
    print('Last => Exit bid time', inv["last valid bid time"], inv["exit bid time"])
    print('Last => Exit ask time', inv["last valid ask time"], inv["exit ask time"])
          
    start_time = max(inv["bid time"], inv["ask time"])
    last_valid = min(inv["last valid bid time"], inv["last valid ask time"])
    trading_window = int(round((last_valid - start_time).total_seconds() * 1000))
        
    #print(last_valid)
    
    print('Trading Window (Miliseconds)', trading_window, f'{start_time} - {last_valid}')
    
    
    exit_time = min(inv["exit bid time"], inv["exit ask time"])
    print(exit_time)
    
    
    print('----')
    #print(f'Dif: {inv["price diff"]} pip\t{t_diff} milisec')
    #print(f'Bid: {inv["bid"]}\t{inv["bid time"]}')
    #print(f'Ask: {inv["ask"]}\t{inv["ask time"]}')
    #print('--------')

In [None]:

            #print(f'Dif: {diff} pip\t{time_diff} milisec')
            #print(f'Bid: {cur_highest_bet}\t{cur_highest_bet_time}')
            #print(f'Ask: {cur_lowest_ask}\t{cur_lowest_ask_time}')
            #print('--------')

In [None]:
def daily_increment(day, pair):
    print(f'Checking {pair} {day}')
    
    lp_df = []
    day = day.replace('-', '')
    for lp in range(1, 6):
        fname = f'/sailhome/jingbo/CXR_RELATED/temp_store/pickled_data/{pair}/{day}/{day}-{pair}-{lp}.pickle'
        
        df = pickle.load(open(fname, 'rb'))
        
        # Remove invalid rows
        df = df[(df['ask price'] != 0) & (df['bid price'] != 0) & (df['ask volume'] != 0) & (df['bid volume'] != 0)]
        
        lp_df.append(df)
        print(len(df))
        
    lp_index = [0, 0, 0, 0, 0]
    cur_max_time = max([lp_df[i].iloc[0]['time'] for i in range(5)])
    
    all_increments = []
    count = 0
    while (True not in [lp_index[i] >= len(lp_df[i]) for i in range(5)]) and count < 10 * 1000:
        
        for lp in range(5):
            while lp_df[lp].iloc[lp_index[lp]]['time'] <= cur_max_time:
                lp_index[lp] += 1
            
        # -1 to get the most recent (not the next)
        cur_rows = [lp_df[i].iloc[lp_index[i] - 1] for i in range(5)]
        all_increments.append(cur_rows)
        
        cur_max_time = max([lp_df[i].iloc[lp_index[i]]['time'] for i in range(5)])        
        count += 1
        
        if count % 250 == 0:
            print(count)
    
    return all_increments

In [None]:
incremental_info = daily_increment('2019-02-01', 'AUDUSD')

In [None]:
def analyze_increments(increments):
    for inc in increments:
        
        cur_asks = [inc[i]['ask price'] for i in range(5)]
        cur_bids = [inc[i]['bid price'] for i in range(5)]
        cur_times = [inc[i]['time'] for i in range(5)]

        cur_lowest_ask = round(np.min(cur_asks), 8)
        cur_highest_bet = round(np.max(cur_bids), 8)

        cur_lowest_ask_lp = np.argmin(cur_asks)
        cur_highest_bet_lp = np.argmax(cur_bids)

        cur_lowest_ask_time = inc[cur_lowest_ask_lp]['time']
        cur_highest_bet_time = inc[cur_highest_bet_lp]['time']
        
        if cur_highest_bet > cur_lowest_ask:
            print(f'Higest Bet: {cur_highest_bet}\tBet Time: {cur_highest_bet_time}')
            print(f'Lowest Ask: {cur_lowest_ask}Bet Time: {cur_highest_bet_time}')

In [None]:
analyze_spread(spread_info)

In [None]:
cur_asks = [cur_rows[i]['ask price'] for i in range(5)]
cur_bids = [cur_rows[i]['bid price'] for i in range(5)]
cur_times = [cur_rows[i]['time'] for i in range(5)]

cur_lowest_ask = np.min(cur_asks)
cur_highest_bet = np.max(cur_bids)

cur_lowest_ask_lp = np.argmin(cur_asks)
cur_highest_bet_lp = np.argmax(cur_bids)

cur_lowest_ask_time = cur_rows[cur_lowest_ask_lp]
cur_highest_bet_time = np.argmax(cur_bids)

all_spread.append([cur_max_time, cur_lowest_ask_lp, cur_highest_bet_lp, \
                   round(cur_lowest_ask, 8), round(cur_highest_bet, 8)])

In [None]:
for pair in valid_pairs:
    for day in ALL_DAYS:
        check_day(pair, day)
            