In [1]:
import pandas as pd
import numpy as np

import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset

import environment
from collections import deque
import random

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [3]:
MAX_MEMORY = 2000
BATCH_SIZE = 256
LR = 0.001

class Agent:
    def __init__(self, model, batch_size, data_path):
        self.model = model
        self.memory = deque(maxlen=MAX_MEMORY)
        self.batch_size = batch_size
        raw_data = pd.read_csv(data_path)
        self.env = environment.Environment(raw_data)

    def get_state(self):
        return self.env.get_state()

    def get_action(self, state):
        return torch.argmax(self.model(state))
    
    def remember(self, state, action, reward):
        self.memory.append((state, action, reward))

    def train_long_memory(self):
        if len(self.memory) > BATCH_SIZE:
            mini_sample = random.sample(self.memory, BATCH_SIZE)
        else:
            mini_sample = self.memory
        states, actions, rewards = zip(*mini_sample)
        self.train_step(states, actions, rewards)

    def train_short_memory(self, state, action, reward):
        self.train_step(state, action, reward)


    def get_target(self, actions, rewards):
        # trick:
        # if action is "sell" and reward is positive, then target is "sell"
        # if action is "sell" and reward is negative, then target is "buy"
        # if action is "sell" and reward is zero, then target is "hold"
        # if action is "buy" and reward is positive, then target is "buy"
        # if action is "buy" and reward is negative, then target is "sell"
        # if action is "buy" and reward is zero, then target is "hold"
        # if action is "hold" and reward is positive, then target is "buy"
        # if action is "hold" and reward is negative, then target is "sell"
        # if action is "hold" and reward is zero, then target is "hold"
        # "buy"->0, "sell"->1, "hold"->2
        target = torch.zeros(len(actions))
        for i, (a, r) in enumerate(zip(actions, rewards)):
            if r==0:
                target[i] = 2
                continue
            if a==1:
                target[i] = int(r>0)
            elif a==0:
                target[i] = int(r<0)
            else:
                target[i] = int(r<0)

        target.reshape(-1, 1)
        return target
    
    def train_step(self, states, actions, rewards):
        states = torch.tensor(states, dtype=torch.float)
        actions = torch.tensor(actions, dtype=torch.int32)
        rewards = torch.tensor(rewards, dtype=torch.int32)
        target = self.get_target(actions, rewards)

        if len(states.shape) == 1:
            states = torch.unsqueeze(states, 0)
            actions = torch.unsqueeze(actions, 0)
            rewards = torch.unsqueeze(rewards, 0)
            target = torch.unsqueeze(target, 0)

        self.model.optimizer.zero_grad()
        output = self.model(states)
        loss = self.model.loss(actions, target)
        loss.backward()
        self.model.optimizer.step()

In [4]:
class ESN(nn.Module):
    def __init__(self, input_size, reservoir_size, output_size, spectral_radius=0.9, LR=0.001):
        super(ESN, self).__init__()
        self.input_size = input_size
        self.reservoir_size = reservoir_size

        # Initialize reservoir weights
        self.Win = nn.Parameter(torch.randn(reservoir_size, input_size))
        self.W = nn.Parameter(torch.randn(reservoir_size, reservoir_size))

        # Scaling W to have spectral radius = spectral_radius
        self.W.data *= spectral_radius / torch.max(torch.abs(torch.linalg.eigvals(self.W)))

        # Output layer
        self.Wout = nn.Linear(reservoir_size, output_size)

        # lr, loss and optimizer
        self.lr=LR
        self.loss = nn.CrossEntropyLoss()
        self.optimizer = torch.optim.Adam(self.parameters(), lr=LR)

    def forward(self, input_data, initial_state=None):
        if initial_state is None:
            state = torch.zeros((input_data.size(0), self.reservoir_size)).to(device)
        else:
            state = initial_state

        state = torch.tanh(torch.matmul(input_data, self.Win.t()) + torch.matmul(state, self.W.t()))
        state = torch.tanh(self.Wout(state))
        return state

In [5]:
class LSTM(nn.Module):
    def __init__(self, input_size, num_hidden, num_layers, output_size, esn):
        super().__init__()

        # store parameters
        self.input_size = input_size
        self.num_hidden = num_hidden
        self.num_layers = num_layers

        # RNN Layer (notation: LSTM \in RNN)
        self.lstm = nn.LSTM(input_size, num_hidden, num_layers)

        # linear layer for output
        self.out = nn.Linear(num_hidden, output_size)

        # esn
        self.ESN = esn

    def forward(self, x):
        print(f"Input: {list(x.shape)}")
        # pass the input through the ESN
        x = self.ESN(x)
        print(f"Output-ESN: {list(x.shape)}")

        # run through the RNN layer
        y, hidden = self.lstm(x)
        print(f"RNN-cell: {list(hidden[1].shape)}")
        print(f"RNN-hidden: {list(hidden[0].shape)}")
        print(f"RNN-out: {list(y.shape)}")

        # pass the RNN output through the linear output layer
        o = self.out(y)
        print(f"Output: {list(o.shape)}")

        return o[-1]

In [6]:
esn_input_size = 4
esn_reservoir_size = 1024
esn_output_size = 128
lstm_input_size = 128
lstm_num_hidden = 512
lstm_num_layers = 3
lstm_output_size = 3

# test
esn = ESN(esn_input_size, esn_reservoir_size, esn_output_size)
lstm = LSTM(lstm_input_size, lstm_num_hidden, lstm_num_layers, lstm_output_size, esn)
input_data = torch.randn(1, 1, esn_input_size)
action = torch.argmax(lstm(input_data)).item()

Input: [1, 1, 4]
Output-ESN: [1, 1, 128]
RNN-cell: [3, 1, 512]
RNN-hidden: [3, 1, 512]
RNN-out: [1, 1, 512]
Output: [1, 1, 3]


In [7]:
if __name__ == "__main__":
    esn = ESN(esn_input_size, esn_reservoir_size, esn_output_size)
    lstm = LSTM(lstm_input_size, lstm_num_hidden, lstm_num_layers, lstm_output_size, esn)
    agent = Agent(lstm, BATCH_SIZE, "crypto_data/train_data.csv")
    state = agent.env.reset()
    action = agent.get_action(state)
    new_state, reward, done = agent.env.step(action)
    agent.remember(state, action, reward)
    print(f"state: {state}, action: {action}, reward: {reward}")

Input: [10, 4]
Output-ESN: [10, 128]
RNN-cell: [3, 512]
RNN-hidden: [3, 512]
RNN-out: [10, 512]
Output: [10, 3]
state: tensor([[ -9.0040, -11.3140, -39.3180, -32.8940],
        [-32.7570, -29.0250, -28.5720, -29.6440],
        [-29.4300,  -4.5390,   5.3510,  14.1080],
        [ 13.4120, -10.8700,   3.2980, -10.0830],
        [ -8.9850,  -5.5100,   3.9490,   3.3310],
        [  2.9920,  34.6410,  -0.9330,  33.6390],
        [ 33.6590,  -5.4450,  24.9350, -12.5860],
        [-12.5950, -12.5920, -11.6640, -11.6310],
        [-11.7270,  -8.5820,  -9.4590,  -7.1490],
        [ -7.8730,  -8.3150,  -2.6370,  -4.9050]]), action: 2, reward: -1


In [8]:
new_state.shape

torch.Size([10, 4])