In [1]:
import numpy as np
import pandas as pd
import datetime
import json
import requests
import pytz

import pprint as pp

In [2]:
data_directory = '../database/populate_and_update/mongodb/output'
granularity = 'M15' # This data contains currency pair closing prices in 15-minute intervals
price_types = 'BAM'
config_file = '/home/emily/Desktop/projects/test/badass-data-science/badassdatascience/forex/data/DEVELOPMENT.json'

timezone = pytz.timezone('America/Toronto')

In [3]:
df_previous = pd.read_parquet(data_directory + '/' + granularity + '.parquet').sort_values(by = ['instrument', 'time'])
df_previous = df_previous[df_previous['complete']].copy()

df = df_previous[['instrument', 'time']].copy()
df.head()

Unnamed: 0,instrument,time
2553676,AUD_USD,1020717900
2553677,AUD_USD,1020804300
2553678,AUD_USD,1020890700
2553679,AUD_USD,1020977100
2553680,AUD_USD,1021063500


In [4]:
dict_max_time_we_already_have_per_instrument = {}
for item in df.groupby('instrument')['time'].agg('max').reset_index().to_dict(orient = 'records'):
    dict_max_time_we_already_have_per_instrument[item['instrument']] = item['time']

pp.pprint(dict_max_time_we_already_have_per_instrument)

{'AUD_USD': 1744385400,
 'EUR_USD': 1744385400,
 'GBP_USD': 1744385400,
 'NZD_USD': 1744385400,
 'USD_CAD': 1744385400,
 'USD_CHF': 1744385400,
 'USD_JPY': 1744385400}


In [5]:
now = int(np.floor(datetime.datetime.now().timestamp()))
now

1744976898

In [6]:
#
# load config
#
with open(config_file) as f:
    config = json.load(f)

In [7]:
#
# define a function to produce the headers from the given config file
#
def get_oanda_headers(config):
    headers = {
        'Content-Type' : 'application/json',
        'Authorization' : 'Bearer ' + config['token'],
        'Accept-Datetime-Format' : config['oanda_date_time_format'],
    }
    return headers

In [8]:
#
# send a request to Oanda for historical candlestick values
#
def get_instrument_candlesticks(
    instrument,
    price_types,
    granularity,
    start_date,
    end_date,
    config,
    error_retry_interval = 3,
):
    headers = get_oanda_headers(config)
    url = config['server'] + '/v3/instruments/' + instrument + '/candles?price=' + price_types + '&granularity=' + granularity + '&from=' + str(start_date) + '&to=' + str(end_date)

    worked = False
    while not worked:
        try:
            r = requests.get(url, headers=headers)
            worked = True
        except:
            time.sleep(error_retry_interval)
    
    rj = r.json()
    return rj

In [9]:
def booger(
    api_results
):
    candlesticks_temp = api_results['candles']
    candlesticks = []
    
    for c in candlesticks_temp:
        time_as_int = int(round(float(c['time']), 0))
        if c['complete']:
            if time_as_int != start_time:
                c['time'] = time_as_int
                candlesticks.append(c)

    return candlesticks

In [10]:
# from get_candles.py

#
# alters the dictionary in place; not my favorite design idiom
#
def deal_with_candlestick_format_and_time(candle):
    candle['time'] = int(float(candle['time']))
    time_dt = datetime.datetime.fromtimestamp(candle['time'], tz = timezone)
    candle['time_iso'] = time_dt.isoformat()
    candle['weekday'] = time_dt.weekday()
    candle['hour'] = time_dt.hour

    #
    # deal with prices that are currently string values but need to be float
    #
    # and reorganize them
    #
    for price_type in ['bid', 'mid', 'ask']:
        for candlestick_component in candle[price_type].keys():
            candle[price_type + '_' + candlestick_component] = float(candle[price_type][candlestick_component])
        candle[price_type + '_return'] = candle[price_type + '_c'] - candle[price_type + '_o']
        candle[price_type + '_volatility'] = candle[price_type + '_h'] - candle[price_type + '_l']
            
    for price_type in ['bid', 'mid', 'ask']:
        del(candle[price_type])
            
    return None


In [11]:
to_dataframe = []

for instrument in ['AUD_USD']:

    start_time = dict_max_time_we_already_have_per_instrument[instrument]
    end_time = now

    results = get_instrument_candlesticks(instrument, price_types, granularity, start_time, end_time, config)
    candlesticks = booger(results)

    # deal with timestamps and time-related content
    for candle in candlesticks:
        deal_with_candlestick_format_and_time(candle)
        candle['instrument'] = instrument
        candle['granularity'] = granularity

        to_dataframe.append(candle)
    
df_new = pd.DataFrame(to_dataframe)

In [12]:
columns_new = sorted(list(df_new.columns.values))
columns_previous = sorted(list(df_previous.columns.values))


df_previous = df_previous[columns_previous]
df_new = df_new[columns_new]

In [13]:
df_full = pd.concat([df_previous, df_new])

In [14]:
df_full.head()

Unnamed: 0,ask_c,ask_h,ask_l,ask_o,ask_return,ask_volatility,bid_c,bid_h,bid_l,bid_o,...,mid_c,mid_h,mid_l,mid_o,mid_return,mid_volatility,time,time_iso,volume,weekday
2553676,0.5399,0.5399,0.5399,0.5399,0.0,0.0,0.5395,0.5395,0.5395,0.5395,...,0.5397,0.5397,0.5397,0.5397,0.0,0.0,1020717900,2002-05-06T16:45:00-04:00,1,0
2553677,0.5397,0.5397,0.5397,0.5397,0.0,0.0,0.5393,0.5393,0.5393,0.5393,...,0.5395,0.5395,0.5395,0.5395,0.0,0.0,1020804300,2002-05-07T16:45:00-04:00,1,1
2553678,0.5399,0.5399,0.5399,0.5399,0.0,0.0,0.5395,0.5395,0.5395,0.5395,...,0.5397,0.5397,0.5397,0.5397,0.0,0.0,1020890700,2002-05-08T16:45:00-04:00,1,2
2553679,0.5442,0.5442,0.5442,0.5442,0.0,0.0,0.5438,0.5438,0.5438,0.5438,...,0.544,0.544,0.544,0.544,0.0,0.0,1020977100,2002-05-09T16:45:00-04:00,1,3
2553680,0.5453,0.5453,0.5453,0.5453,0.0,0.0,0.5443,0.5443,0.5443,0.5443,...,0.5448,0.5448,0.5448,0.5448,0.0,0.0,1021063500,2002-05-10T16:45:00-04:00,1,4


In [33]:
df_test_previous = df_previous.groupby('instrument')['time'].agg('max').reset_index()
df_test_new = df_new.groupby('instrument')['time'].agg('min').reset_index()

print(
    df_test_new[df_test_new['instrument'] == 'AUD_USD']['time'].values[0] - df_test_previous[df_test_previous['instrument'] == 'AUD_USD']['time'].values[0]
)

900


In [15]:
sorted(list(candlesticks[0].keys())) == sorted(list(df_previous.columns.values))

True

In [16]:
[(x, y) for x, y in zip(sorted(list(candlesticks[0].keys())), sorted(list(df_previous.columns.values)))]

[('ask_c', 'ask_c'),
 ('ask_h', 'ask_h'),
 ('ask_l', 'ask_l'),
 ('ask_o', 'ask_o'),
 ('ask_return', 'ask_return'),
 ('ask_volatility', 'ask_volatility'),
 ('bid_c', 'bid_c'),
 ('bid_h', 'bid_h'),
 ('bid_l', 'bid_l'),
 ('bid_o', 'bid_o'),
 ('bid_return', 'bid_return'),
 ('bid_volatility', 'bid_volatility'),
 ('complete', 'complete'),
 ('granularity', 'granularity'),
 ('hour', 'hour'),
 ('instrument', 'instrument'),
 ('mid_c', 'mid_c'),
 ('mid_h', 'mid_h'),
 ('mid_l', 'mid_l'),
 ('mid_o', 'mid_o'),
 ('mid_return', 'mid_return'),
 ('mid_volatility', 'mid_volatility'),
 ('time', 'time'),
 ('time_iso', 'time_iso'),
 ('volume', 'volume'),
 ('weekday', 'weekday')]