In [1]:
import json
import requests
import datetime
import time
import pytz
import pandas as pd
import pprint as pp
import matplotlib.pyplot as plt

In [2]:
from badassdatascience.forex.utilities.oanda_tools import get_oanda_headers
from badassdatascience.forex.utilities.oanda_tools import price_type_map

In [3]:
# This cell is tagged `parameters`

config_file = '/home/emily/Desktop/projects/test/badass-data-science/badassdatascience/forex/data/DEVELOPMENT.json'
count = 5000 
granularity = 'D'

#instruments = 'EUR_USD,USD_CAD,USD_JPY,USD_CHF,AUD_USD,GBP_USD,NZD_USD'
instrument = 'EUR_USD'

price_types = 'BAM'
error_retry_interval = 5

In [4]:
print(count)
print(granularity)

5000
D


In [13]:
class CandlePull():

    #
    # Constructor
    #
    def __init__(
        self,
        config_file,
        count,
        granularity,
        instrument,
        price_types,
        error_retry_interval = 5,
        keep_complete_only = True
    ):

        # command line arguments
        self.config_file = config_file
        self.count = count
        self.granularity = granularity
        self.instrument = instrument
        self.price_types = price_types
        self.error_retry_interval = error_retry_interval
        self.keep_complete_only = keep_complete_only

        # initialize (hard-coded)
        self.timezone_to_use = 'America/Toronto'   # Don't change this!
        self.start_time = int(datetime.datetime(2010, 1, 1, 0, 0, 0, tzinfo=datetime.timezone.utc).timestamp())
        
        
        #self.end_date = None
        #self.output_file_sans_extension = 'output/' + granularity
        
        self.timezone = pytz.timezone(self.timezone_to_use)  # not sure we are using this... check later
        self.price_type_list = [price_type_map[q] for q in self.price_types]
                
        self.end_date_original = int(time.mktime(datetime.datetime.now().timetuple()))


    def get_headers(self):
        with open(self.config_file) as f:
            self.config = json.load(f)
        self.headers = get_oanda_headers(self.config)

    def get_instrument_candlesticks(self, end_date):
        url = (
            self.config['server']
            + '/v3/instruments/' + self.instrument
            + '/candles?count=' + str(self.count)
            + '&price=' + self.price_types
            + '&granularity=' + self.granularity
            + '&to=' + str(end_date)
        )
        
        worked = False
        while not worked:
            try:
                r = requests.get(url, headers = self.headers)
                worked = True
            except:
                time.sleep(error_retry_interval)
        
        rj = r.json()
        return rj





    def compute_candle_features(self):
        
        finished = False
        end_date = self.end_date_original

        self.insert_many_list = []

        # loop through the timestamp ranges for each set of n=count values
        while not finished:

            # retrieve the instrument candlesticks from the Oanda server
            rj = self.get_instrument_candlesticks(end_date) # instrument, count, price_types, granularity, end_date)        
            candlesticks = rj['candles']

            #
            # deal with timestamps and time-related content
            #
            date_list = []
            for candle in candlesticks:

                candle['instrument'] = self.instrument
                candle['granularity'] = self.granularity
                candle['time'] = int(float(candle['time']))
                time_dt = datetime.datetime.fromtimestamp(candle['time'], tz = self.timezone)
                candle['time_iso'] = time_dt.isoformat()
                candle['weekday'] = time_dt.weekday()
                candle['hour'] = time_dt.hour

                for price_type in self.price_type_list:
                    for candlestick_component in candle[price_type].keys():
                        candle[price_type + '_' + candlestick_component] = float(candle[price_type][candlestick_component])
                    candle[price_type + '_return'] = candle[price_type + '_c'] - candle[price_type + '_o']
                    candle[price_type + '_volatility'] = candle[price_type + '_h'] - candle[price_type + '_l']
            
                for price_type in self.price_type_list:
                    del(candle[price_type])

                
                if self.keep_complete_only:
                    if candle['complete']:    
                        self.insert_many_list.append(candle)
                else:
                    self.insert_many_list.append(candle)

                date_list.append(candle['time'])


            # Are we done?
            if (len(date_list) < count) or (min(date_list) < self.start_time):
                finished = True
            else:
                # prepare for the next iteration
                end_date = min(date_list) - 0.1


    def create_dataframe(self):
        self.df = pd.DataFrame(self.insert_many_list).sort_values(by = ['instrument', 'time'])
        self.df = self.df[self.df['time'] >= int(self.start_time)]
        self.df = self.df.reset_index().copy()

    def qa(self):
        print(len(self.df.index) == len(self.df[['time']].drop_duplicates()))
        print(len(t.df.index) == len(t.df['time'].unique()))
    
    def fit(self):
        self.get_headers()
        self.compute_candle_features()
        self.create_dataframe()
        self.qa()

In [14]:
t = CandlePull(config_file, count, granularity, instrument, price_types)
t.fit()

True
True


In [15]:
t.df

Unnamed: 0,index,complete,volume,time,instrument,granularity,time_iso,weekday,hour,bid_o,...,ask_l,ask_c,ask_return,ask_volatility,mid_o,mid_h,mid_l,mid_c,mid_return,mid_volatility
0,738,True,1027,1262469600,EUR_USD,D,2010-01-02T17:00:00-05:00,5,17,1.43070,...,1.42944,1.43056,-0.00114,0.00581,1.43120,1.43425,1.42926,1.43036,-0.00084,0.00499
1,739,True,42031,1262556000,EUR_USD,D,2010-01-03T17:00:00-05:00,6,17,1.43010,...,1.42583,1.44136,0.01086,0.01982,1.43030,1.44560,1.42576,1.44127,0.01097,0.01984
2,740,True,45159,1262642400,EUR_USD,D,2010-01-04T17:00:00-05:00,0,17,1.44114,...,1.43473,1.43660,-0.00472,0.01372,1.44123,1.44839,1.43468,1.43650,-0.00473,0.01371
3,741,True,45142,1262728800,EUR_USD,D,2010-01-05T17:00:00-05:00,1,17,1.43655,...,1.42840,1.44087,0.00412,0.01510,1.43665,1.44346,1.42832,1.44078,0.00413,0.01514
4,742,True,42005,1262815200,EUR_USD,D,2010-01-06T17:00:00-05:00,2,17,1.44065,...,1.43001,1.43089,-0.00994,0.01475,1.44074,1.44464,1.42996,1.43079,-0.00995,0.01468
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4256,4994,True,238236,1744664400,EUR_USD,D,2025-04-14T17:00:00-04:00,0,17,1.13429,...,1.12648,1.12829,-0.00700,0.01152,1.13479,1.13792,1.12640,1.12819,-0.00660,0.01152
4257,4995,True,289974,1744750800,EUR_USD,D,2025-04-15T17:00:00-04:00,1,17,1.12802,...,1.12825,1.13994,0.01132,0.01313,1.12832,1.14130,1.12810,1.13986,0.01154,0.01320
4258,4996,True,255008,1744837200,EUR_USD,D,2025-04-16T17:00:00-04:00,2,17,1.13979,...,1.13361,1.13662,-0.00347,0.00742,1.13994,1.14094,1.13352,1.13646,-0.00348,0.00742
4259,4997,True,198710,1744923600,EUR_USD,D,2025-04-17T17:00:00-04:00,3,17,1.13623,...,1.13602,1.13997,0.00274,0.00395,1.13673,1.13978,1.13592,1.13950,0.00277,0.00386


In [None]:
print(len(t.df.index) == len(t.df[['time']].drop_duplicates()))
print(len(t.df.index) == len(t.df['time'].unique()))