In [8]:
# Load libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pandas import read_csv, set_option
from pandas.plotting import scatter_matrix
import seaborn as sns
from sklearn.preprocessing import StandardScaler
import datetime
import math
from numpy.random import choice
import random

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

#Import Model Packages for reinforcement learning
#from keras import layers, models, optimizers
from torch import nn, optim
from torch.utils.data import TensorDataset, DataLoader
#from keras import backend as K
from collections import namedtuple, deque
import torch

device = "cpu" if not torch.cuda.is_available() else 'cuda'
#Diable the warnings
import warnings
warnings.filterwarnings('ignore')

In [18]:
from torch.utils.data import Dataset, DataLoader

class AIFinanceDataloader(Dataset):
    def __init__(self, file_csv):
        self.dataset = read_csv(file_csv, index_col=0, parse_dates=[0], header=0)
        self.dataset.isnull().values.any()
        self.dataset=self.dataset.fillna(method='ffill')
        # self.dataset['day'] = self.dataset[0].apply(lambda x : x)
        self.grouped_by_day = self.dataset.groupby(pd.Grouper(freq='D'))
        self.days = []
        for date, group in self.grouped_by_day:
            if(len(group) > 0):
                self.days.append(group)

    def __len__(self):
        return len(self.days)

    def __getitem__(self, idx):
        day_selected = self.days[idx]
        purged = pd.DataFrame(day_selected).drop(columns=[ 'Volume' , 'Open' ])
        return purged.to_numpy()

train_path = "lib/data/IVV_1m_training.csv"
validation_path = "lib/data/IVV_1m_validation.csv"
dataset = AIFinanceDataloader(file_csv = train_path)
dataloader_train = DataLoader(dataset, batch_size=1, shuffle=False)

#dataset = AIFinanceDataloader(file_csv = validation_path)
#dataloader_val = DataLoader(dataset, batch_size=2, shuffle=False)

In [19]:
# Day zero
print(dataset[0].shape)
print(dataset[1].shape)

feature_size = dataset[0].shape[1]

(390, 3)
(390, 3)


In [15]:
from tqdm import tqdm
from lib.Agent import AgentCNNNetwork

class Agent():
    def __init__(self, feature_size, window_size, is_eval=False, model_name="", dataset_train=None, dataset_val=None):
        super(Agent, self).__init__()
        self.feature_size = feature_size
        self.window_size = window_size
        self.action_size = 3
        self.memory = deque(maxlen=1000)
        self.inventory = []
        self.model_name = model_name
        self.is_eval = is_eval
        self.gamma = 0.95
        self.epsilon = 1.0
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.995
        self.dataset_train = dataset_train
        self.dataset_val = dataset_val
        self.model = AgentCNNNetwork(self.feature_size, self.window_size, self.action_size)
        self.model.to(device)
        self.optimizer = torch.optim.Adam(self.model.parameters(), lr=0.001)

    def act(self, state): 
        #If it is test and self.epsilon is still very high, once the epsilon become low, there are no random
        #actions suggested.
        if not self.is_eval and random.random() <= self.epsilon:
            return random.randrange(self.action_size) 
        self.model.eval()
        with torch.no_grad():
            options = self.model(state.float()).reshape(-1).cpu().numpy()   
        
        #set_trace()
        #action is based on the action that has the highest value from the q-value function.
        return np.argmax(options)

    def expReplay(self, batch_size):
        mini_batch = []
        l = len(self.memory)

        self.model.train()
        for i in range(l - batch_size + 1, l):
            mini_batch.append(self.memory[i])
        
        for state, action, reward, next_state, done in mini_batch:
            target = reward 
            if not done:
                target = reward + self.gamma * torch.max(self.model(next_state.float()).reshape(-1)).cpu().detach().numpy()   
            
            self.optimizer.zero_grad()
 
            target_f = self.model(state.float()).reshape(-1)   
            target_f[action] = target

            loss = nn.MSELoss()
            output = loss(target_f, self.model(state.float()).reshape(-1))
            output.backward()
            self.optimizer.step()
        
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

In [12]:
import numpy as np


# prints formatted price
def formatPrice(n):
    return ("-$" if n < 0 else "$") + "{0:.2f}".format(abs(n))

# returns an an n-day state representation ending at time t
def getState(data, t, n):    
    d = t - n + 1

    if d>=0:
        block = data[d:t + 1, :]
    else:
        block = torch.cat([ data[0].repeat(-d, 1), data[0:t + 1, :] ])

    res = []
    for i in range(n - 1):
        res.append(block[i + 1].cpu().numpy() - block[i].cpu().numpy())

    return torch.transpose(torch.tensor(res).to(device), 0, 1)

# Plots the behavior of the output
def plot_behavior(data_input, states_buy, states_sell, profit):
    fig = plt.figure(figsize = (15,5))
    plt.plot(data_input, color='r', lw=2.)
    plt.plot(data_input, '^', markersize=10, color='m', label = 'Buying signal', markevery = states_buy)
    plt.plot(data_input, 'v', markersize=10, color='k', label = 'Selling signal', markevery = states_sell)
    plt.title('Total gains: %f'%(profit))
    plt.legend()
    #plt.savefig('output/'+name+'.png')
    plt.show()

In [17]:
from IPython.core.debugger import set_trace



window_size = 10
batch_size = 16

# Initialize agent
agent = Agent(feature_size, window_size, dataset_train=dataloader_train)
episode_count = 0


for day_episode in dataloader_train:
    day_episode = day_episode.squeeze()
    print("Running episode " + str(episode_count) + "/" + str(len(dataloader_train)))
    episode_count += 1
    
    #set_trace()
    total_profit = 0
    agent.inventory = []
    states_sell = []
    states_buy = []

    buy_sell_order = []
    print(day_episode.shape)
    for t in range(day_episode.size(0)-1):
        # print(f"Step: {t}")
        state = getState(day_episode, t, window_size + 1)
        action = agent.act(state)
        # sit
        next_state = getState(day_episode, t + 1, window_size + 1)
        reward = 0

        if action == 1: # buy
            agent.inventory.append(day_episode[t][2])
            states_buy.append(t)
            buy_sell_order.append('BUY')
            #print("Buy: " + formatPrice(day_episode[t]))

        elif action == 2 and len(agent.inventory) > 0: # sell
            bought_price = agent.inventory.pop(0)      
            reward = day_episode[t][0] - bought_price
            total_profit += day_episode[t][2] - bought_price
            states_sell.append(t)
            buy_sell_order.append('SELL')
            
        done = True if t == day_episode.size(0) - 1 else False
        #appends the details of the state action etc in the memory, which is used further by the exeReply function
        agent.memory.append((state, action, reward, next_state, done))
        state = next_state

        if done:
            print("--------------------------------")
            print("Total Profit: " + formatPrice(total_profit))
            print("--------------------------------")
            #set_trace()
            #pd.DataFrame(np.array(agent.memory)).to_csv("Agent"+str(e)+".csv")
            #Chart to show how the model performs with the stock goin up and down for each 
            plot_behavior(day_episode,states_buy, states_sell, total_profit)
        if len(agent.memory) > batch_size:
            agent.expReplay(batch_size)    

    print(f"Total profit: {total_profit}, BUY trades: {len(states_buy)}, SELL trades: {len(states_sell)}")
    print(buy_sell_order)
            

    if episode_count % 2 == 0:
        print('Ciao sono episodio pari')
        #agent.model.save("model_ep" + str(episode_count))
        #torch.save(agent.model.state_dict(), "model_ep" + str(episode_count))

Running episode 0/2518
torch.Size([390, 3])


RuntimeError: expand(torch.DoubleTensor{[3]}, size=[]): the number of sizes provided (0) must be greater or equal to the number of dimensions in the tensor (1)