# Set up

- Install oandapyV20 lib: https://oanda-api-v20.readthedocs.io/en/latest/installation.html

In [None]:
import pandas as pd
from oandapyV20 import API
import oandapyV20.endpoints.instruments as instruments
from dateutil import parser

In [None]:
# Need to create a demo account on Oanda to get ACCESS_TOKEN and ACCOUNT_ID
# Change ACCESS_TOKEN and ACCOUNT_ID to yours
ACCESS_TOKEN = '4da7bc21da90e02b4ffb1ba911189b05-517b03a56bdf473fafe293baf3ae3694'
ACCOUNT_ID = '101-009-13059916-001'

In [None]:
api = API(access_token=ACCESS_TOKEN,
                 environment='practice')

In [None]:
# Define the range of data you're getting
params = {
#     'from': '2019-01-01T00:00:00.000000000Z',
    'to': '2019-01-22T18:40:00.000000000Z',
    'count': 5000,  # number of data. Max is 5000
    'granularity': 'M5',  # 4hours
}

In [None]:
%%time
#Get data from Oanda API
r = instruments.InstrumentsCandles(instrument="USD_JPY", params=params)
api.request(r)

In [None]:
# No need to change
data = []
for raw in r.response['candles']:
    data.append([raw['time'], raw['volume'], raw['mid']['o'], raw['mid']['h'], raw['mid']['l'], raw['mid']['c']])

In [None]:
# No need to change
df = pd.DataFrame(data)
df.columns = ['Time', 'Volume', 'Open', 'High', 'Low', 'Close']
f = lambda x: x.split('.')[0]
df['Time'] = df['Time'].apply(f)
df = df.set_index('Time')
df = df[['Open', 'High', 'Low', 'Close', 'Volume']]
df.head()

In [None]:
# No need to change
df.index = pd.to_datetime(df.index)
df.tail()

In [None]:
df.head()

In [None]:
df.index

In [None]:
# Save the dataframe containing the data that you just downloaded from Oanda
df.to_csv('../data/raw/JPYUSD_5MIN/JPYUSD_5Min_CandleStick_20181227_092500_20190122_183500_OHLCV.csv.bz2', compression='bz2')

# Concat data

In [None]:
import os
file_list = sorted(os.listdir('../data/raw/JPYUSD_5MIN'))
file_list = ['../data/raw/JPYUSD_5MIN/' + file_name for file_name in file_list]
file_list

In [None]:
concat_df = pd.read_csv(file_list[0])
for i in range(1, len(file_list)):
    df_tmp = pd.read_csv(file_list[i])
    concat_df = pd.concat([concat_df, df_tmp], ignore_index=True)

In [None]:
concat_df.head()

In [None]:
concat_df.tail()

In [None]:
concat_df.to_csv('../data/raw/JPYUSD_5MIN/master.csv.bz2', compression='bz2', index=False)

# Split raw data into train and test

In [None]:
raw_df = pd.read_csv('../data/raw/JPYUSD_5MIN/master.csv.bz2', index_col='Time')
raw_df.head()

In [None]:
# Get rid of OCT because OCT starts from the middle, 14th and we want full data for each month
updated_df = raw_df[raw_df.index > '2019-01-01']
updated_df.head()

In [None]:
updated_df.shape

In [None]:
# Define the first day for the test data and create train_df
test_start = '2019-11-01'
train_df = updated_df[updated_df.index < test_start]
train_df.tail()

In [None]:
test_df = updated_df[updated_df.index > test_start]
test_df.head()

In [None]:
print('Num samples of train: {}'.format(train_df.shape[0]))
print('Num samples of test: {}'.format(test_df.shape[0]))

In [None]:
11978/(62295 + 11978)

In [None]:
# Save train_df and test_df
train_df.to_csv('../data/JPYUSD_5MIN_2019/train.csv.bz2', compression='bz2')
test_df.to_csv('../data/JPYUSD_5MIN_2019/test.csv.bz2', compression='bz2')