In [5]:
# Load libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pandas import read_csv, set_option
from pandas.plotting import scatter_matrix
import seaborn as sns
from sklearn.preprocessing import StandardScaler
import datetime
import math
from numpy.random import choice
import random

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

#Import Model Packages for reinforcement learning
#from keras import layers, models, optimizers
from torch import nn, optim
from torch.utils.data import TensorDataset, DataLoader
#from keras import backend as K
from collections import namedtuple, deque
import torch

In [6]:
from torch.utils.data import Dataset, DataLoader

class AIFinanceDataloader(Dataset):
    def __init__(self, file_csv):
        self.dataset = read_csv(file_csv, index_col=0)

    def __len__(self):
        return len(self.dataset)

    def __getitem__(self, idx):
        date = self.dataset.index[idx]
        date = date.replace(':', '-').replace('T', '-')
        year, month, day, hour, minute = date.split('-')[:-1]
        
        return {
            'Year': int(year),
            'Month': int(month),
            'Day': int(day),
            'Hour': int(hour),
            'Minute': int(minute),
            'Low': self.dataset.iloc[idx]['Low'],
            'Volume': self.dataset.iloc[idx]['Volume'],
            'Open': self.dataset.iloc[idx]['Open'],
            'High': self.dataset.iloc[idx]['High'],
            'Close': self.dataset.iloc[idx]['Close'],
        }

train_path = "IVV_1m_training.csv"
validation_path = "IVV_1m_validation.csv"
dataset = AIFinanceDataloader(file_csv = train_path)
dataloader_train = DataLoader(dataset, batch_size=2, shuffle=True)

dataset = AIFinanceDataloader(file_csv = validation_path)
dataloader_val = DataLoader(dataset, batch_size=2, shuffle=False)

In [7]:
#Diable the warnings
import warnings
warnings.filterwarnings('ignore')

In [8]:
# peek at data
set_option('display.width', 100)
dataset.head(5)

AttributeError: 'AIFinanceDataloader' object has no attribute 'head'

In [33]:
# describe data
#set_option('precision', 3)
#dataset.describe()

In [None]:
dataset['Close'].plot()

In [35]:
#Checking for any null values and removing the null values'''
print('Null Values =',dataset.isnull().values.any())

Null Values = False


In [36]:
# Fill the missing values with the last value available in the dataset. 
dataset=dataset.fillna(method='ffill')
dataset.head(2)

Unnamed: 0_level_0,Low,Volume,Open,High,Close
DateTime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2007-01-03T14:30:00.000Z,142.1,133200,142.53,142.53,142.26
2007-01-03T14:31:00.000Z,142.28,4400,142.28,142.45,142.45


In [37]:
X=list(dataset["Close"])
X=[float(x) for x in X]

In [14]:
import torch
import torch.nn as nn
import numpy as np
import random
from collections import deque

from IPython.core.debugger import set_trace

class AgentModel(nn.Module):
    def __init__(self, state_size, action_size):
        super(AgentModel, self).__init__()
        self.state_size = state_size
        self.action_size = action_size
        self.fc1 = nn.Linear(self.state_size, 64)
        self.fc2 = nn.Linear(64, 32)
        self.fc3 = nn.Linear(32, 8)
        self.fc4 = nn.Linear(8, self.action_size)

    def forward(self, x):
        x = self.fc1(x)
        x = nn.ReLU(x)
        x = self.fc2(x)
        x = nn.ReLU(x)
        x = self.fc3(x)
        x = nn.ReLU(x)
        x = self.fc4(x)
        return x

In [24]:
from tqdm import tqdm

class Agent():
    def __init__(self, window_size, is_eval=False, model_name="", dataset_train=None, dataset_val=None):
        super(Agent, self).__init__()
        self.state_size = window_size
        self.action_size = 3
        self.memory = deque(maxlen=1000)
        self.inventory = []
        self.model_name = model_name
        self.is_eval = is_eval
        self.gamma = 0.95
        self.epsilon = 1.0
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.995
        self.dataset_train = dataset_train
        self.dataset_val = dataset_val
        self.model = AgentModel(self.state_size, self.action_size)
        self.optimizer = torch.optim.Adam(self.model.parameters(), lr=0.001)

    def act(self, state): 
        #If it is test and self.epsilon is still very high, once the epsilon become low, there are no random
        #actions suggested.
        if not self.is_eval and random.random() <= self.epsilon:
            return random.randrange(self.action_size) 
        self.model.eval()
        with torch.no_grad():
            options = self.model(state)
        
        #set_trace()
        #action is based on the action that has the highest value from the q-value function.
        return np.argmax(options[0])

    def expReplay(self, batch_size):
        mini_batch = []
        l = len(self.memory)
        for i in range(l - batch_size + 1, l):
            mini_batch.append(self.memory[i])
        
        # the memory during the training phase. 
        for state, action, reward, next_state, done in mini_batch:
            target = reward # reward or Q at time t    
            #update the Q table based on Q table equation
            #set_trace()
            if not done:
                #set_trace()
                #max of the array of the predicted. 
                self.model.eval()
                with torch.no_grad():
                    target = reward + self.gamma * np.amax(self.model(next_state)[0])     
                
            # Q-value of the state currently from the table   
            self.model.eval()
            with torch.no_grad(): 
                target_f = self.model(state)
            # Update the output Q table for the given action in the table     
            target_f[0][action] = target
            #train and fit the model where state is X and target_f is Y, where the target is updated. 
            self.model.fit(state, target_f, epochs=1, verbose=0)
        
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

    def training_loop(self, epochs = 1):
        for _ in range(epochs):
            for data in tqdm(self.dataset_train):
                # Every data instance is an input + label pair
                year, month, day, hour, minute, low, volume, open, high, close = data

                # Zero your gradients for every batch!
                self.optimizer.zero_grad()

                # Make predictions for this batch
                outputs = self.model(volume)

                # Compute the loss and its gradients
                loss = nn.MSELoss()
                output = loss(outputs, close)
                output.backward()

                # Adjust learning weights
                self.optimizer.step()

In [39]:
import numpy as np
import math

# prints formatted price
def formatPrice(n):
    return ("-$" if n < 0 else "$") + "{0:.2f}".format(abs(n))

# # returns the vector containing stock data from a fixed file 
# def getStockData(key):
#     vec = []
#     lines = open("data/" + key + ".csv", "r").read().splitlines()

#     for line in lines[1:]:
#         vec.append(float(line.split(",")[4])) #Only Close column

#     return vec

# returns the sigmoid
def sigmoid(x):
    return 1 / (1 + math.exp(-x))

# returns an an n-day state representation ending at time t

def getState(data, t, n):    
    d = t - n + 1
    block = data[d:t + 1] if d >= 0 else -d * [data[0]] + data[0:t + 1] # pad with t0
    #block is which is the for [1283.27002, 1283.27002]
    res = []
    for i in range(n - 1):
        res.append(sigmoid(block[i + 1] - block[i]))
    return np.array([res])

# Plots the behavior of the output
def plot_behavior(data_input, states_buy, states_sell, profit):
    fig = plt.figure(figsize = (15,5))
    plt.plot(data_input, color='r', lw=2.)
    plt.plot(data_input, '^', markersize=10, color='m', label = 'Buying signal', markevery = states_buy)
    plt.plot(data_input, 'v', markersize=10, color='k', label = 'Selling signal', markevery = states_sell)
    plt.title('Total gains: %f'%(profit))
    plt.legend()
    #plt.savefig('output/'+name+'.png')
    plt.show()

In [25]:
from IPython.core.debugger import set_trace
window_size = 1
agent = Agent(window_size, dataset_train=dataloader_train, dataset_val=dataloader_val)
#In this step we feed the closing value of the stock price 
#data = X
#l = len(data) - 1
#
batch_size = 16
#An episode represents a complete pass over the data.
episode_count = 10


for e in range(episode_count + 1):
    print("Running episode " + str(e) + "/" + str(episode_count))
    state = getState(data, 0, window_size + 1)
    #set_trace()
    total_profit = 0
    agent.inventory = []
    states_sell = []
    states_buy = []
    for t in range(l):
        action = agent.act(state)    
        # sit
        next_state = getState(data, t + 1, window_size + 1)
        reward = 0

        if action == 1: # buy
            agent.inventory.append(data[t])
            states_buy.append(t)
            #print("Buy: " + formatPrice(data[t]))

        elif action == 2 and len(agent.inventory) > 0: # sell
            bought_price = agent.inventory.pop(0)      
            reward = max(data[t] - bought_price, 0)
            total_profit += data[t] - bought_price
            states_sell.append(t)
            #print("Sell: " + formatPrice(data[t]) + " | Profit: " + formatPrice(data[t] - bought_price))

        done = True if t == l - 1 else False
        #appends the details of the state action etc in the memory, which is used further by the exeReply function
        agent.memory.append((state, action, reward, next_state, done))
        state = next_state

        if done:
            print("--------------------------------")
            print("Total Profit: " + formatPrice(total_profit))
            print("--------------------------------")
            #set_trace()
            #pd.DataFrame(np.array(agent.memory)).to_csv("Agent"+str(e)+".csv")
            #Chart to show how the model performs with the stock goin up and down for each 
            plot_behavior(data,states_buy, states_sell, total_profit)
        if len(agent.memory) > batch_size:
            agent.expReplay(batch_size)    
            

    if e % 2 == 0:
        agent.model.save("model_ep" + str(e))
        torch.save(agent.model.state_dict(), "model_ep" + str(e))

Running episode 0/10


NameError: name 'getState' is not defined