In [1]:
# Standard stuff
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os

# Machine Learning
import tensorflow as tf
from tensorflow.keras.layers import Input, Dense, Dropout
from tensorflow.keras.models import Model
import pickle
from sklearn.preprocessing import StandardScaler

# Algorithmic trading
import tpqoa
from datetime import datetime, timedelta
import time

# My designs
import Agents
import Environments
import Models

In [2]:
def get_scaler(env, epochs):
    states = []

    for i in range(epochs):
        done = False
        while not done:   # play as random agent to generate sample space of states
            action = np.random.choice(env.action_space)
            state, reward, done, info = env.step(action)
            states.append(state)
    
    scaler = StandardScaler()
    scaler.fit(states)
    return scaler

def maybe_make_dir(directory):
    if not os.path.exists(directory):
        os.makedirs(directory)

def play_one_episode(env, agent, scaler):
    state = env.reset()
    state = scaler.transform([state])
    done = False
    
    while not done:
        action = agent.act(state)
        next_state, reward, done, info = env.step(action)
        next_state = scaler.transform([next_state])
        agent.train(state, action, reward, next_state, done)
        state = next_state
    
    return info["cur_val"]

def get_data(api, instrument, bar_length, months = 6):     
    now = datetime.utcnow()
    now = now - timedelta(microseconds = now.microsecond)
    past = now - timedelta(days = 30 * months)

    df_ask = api.get_history(
        instrument = instrument,
        start = past,
        end = now,
        granularity = "S5",
        price = "A",
        localize = False
    ).c.rename("Ask")

    df_ask = df_ask.resample(pd.to_timedelta(bar_length), label = "right").last()

    df_bid = api.get_history(
        instrument = instrument,
        start = past,
        end = now,
        granularity = "S5",
        price = "B",
        localize = False
    ).c.rename("Bid")

    df_bid = df_bid.resample(pd.to_timedelta(bar_length), label = "right").last()

    df = pd.concat([df_ask, df_bid], axis = 1).dropna().iloc[ : -1]
    return df


In [3]:
api = tpqoa.tpqoa("oanda.cfg")
balance = float(api.get_account_summary()["balance"])
balance

99872.6231

In [4]:
%%time
data = get_data(api, "EUR_USD", "20min")
data

Wall time: 4min 37s


Unnamed: 0_level_0,Ask,Bid
time,Unnamed: 1_level_1,Unnamed: 2_level_1
2021-08-15 21:20:00+00:00,1.17983,1.17935
2021-08-15 21:40:00+00:00,1.17982,1.17926
2021-08-15 22:00:00+00:00,1.17965,1.17925
2021-08-15 22:20:00+00:00,1.17978,1.17962
2021-08-15 22:40:00+00:00,1.17953,1.17936
...,...,...
2022-02-10 13:00:00+00:00,1.14370,1.14357
2022-02-10 13:20:00+00:00,1.14332,1.14318
2022-02-10 13:40:00+00:00,1.14107,1.14091
2022-02-10 14:00:00+00:00,1.14090,1.14077


In [5]:
data.to_csv("eur_usd_data.csv")

In [9]:
model = Models.MLP(input_dim = 4)
flat_weights = []
for weight in model.model.get_weights():
    flat_weights.append(weight.flatten())
np.concatenate(flat_weights).size

2371

In [15]:
[model.output_dim] + [model.hidden_dim] * model.hidden_layers + [model.input_dim]

[3, 32, 32, 32, 4]

In [10]:
flat_weights = model.get_weights()
flat_weights

array([-0.15617898,  0.18407118, -0.07328022, ...,  0.        ,
        0.        ,  0.        ], dtype=float32)

In [11]:
old_weights = model.model.get_weights()

In [12]:
model.set_weights(flat_weights)

In [16]:
flag = True
for i in range(len(old_weights)):
    if model.model.get_weights()[i].all() != old_weights[i].all():
        flag = False
flag

True

In [6]:
train_sim = Environments.Simulator(
        data = data, 
        initial_investment = balance,
        bar_length = "20min",
        time_horizon = "12hour",
        units = 100000
)

In [7]:
state_size = 4
model = Models.MLP(input_dim = state_size)
agent = Agents.QDayTrader(state_size = state_size, model = model)
agent.load(r"C:\Users\Jeff\Documents\Projects\Algorithmic Trader\day_trader_models\q_day_trader")



In [8]:
train_sim.reset()

array([0.00000000e+00, 9.98726231e+04, 1.17983000e+00, 1.17935000e+00])

In [9]:
scaler = get_scaler(train_sim, 1)
train_sim.reset()

array([0.00000000e+00, 9.98726231e+04, 1.17983000e+00, 1.17935000e+00])

In [11]:
%%time
play_one_episode(env = train_sim, agent = agent, scaler = scaler)

Wall time: 20min 27s


86172.62310000029