<a href="https://colab.research.google.com/github/ali-rabiee/Portfolio-Formation/blob/DQN/DQN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Requirements & Packages

In [None]:
!pip3 install Keras
!pip3 install keras-rl
!pip install tensorflow==1.15
!pip install keras
!pip install keras-rl
!pip install gym
!pip install pandas
!pip install rl
!pip install keras-rl2
!pip install callbacks 
!pip install tf-nightly

# Enviroment

## MergedDataStructure

In [None]:
'''
MergedDataStructure.py
'''
# Library used to manipulate the CSV Dataset
# organize the dataset for the Enviroment
import pandas

#Library used to manipulate dates
import datetime

class MergedDataStructure():

    def __init__(self, delta=4, filename="sp500Week.csv"):
        self.delta=delta

        #Read the CSV
        timeserie = pandas.read_csv(filename)
        
        #Transform each column into a list
        Date = timeserie.loc[:, 'Date'].tolist()
        Time = timeserie.loc[:, 'Time'].tolist()
        Open = timeserie.loc[:, 'Open'].tolist()
        High = timeserie.loc[:, 'High'].tolist()
        Low = timeserie.loc[:, 'Low'].tolist()
        Close = timeserie.loc[:, 'Close'].tolist()

        #Create empty list and dictionary
        self.list=[]
        self.dict={}

        #The limit is the number of dates
        limit = len(Date)

        #Just converting pandas data to a list
        #lets pick up the csv data and put them in the list (self.list) 
        for i in range(0,limit-1):
            self.list.append({'Date' : Date[i],'Time' : Time[i], 'Open': Open[i], 'High': High[i], 'Low': Low[i], 'Close': Close[i]})
            
            #Fill the gaps with days that do not exist 
            dateList = [datetime.datetime.strptime(Date[i+1], "%m/%d/%Y") - datetime.timedelta(days=x) for x in range(0, ( datetime.datetime.strptime(Date[i+1], "%m/%d/%Y")- datetime.datetime.strptime(Date[i], "%m/%d/%Y") ).days )]
            
            for date in dateList:
                dateString=date.strftime("%m/%d/%Y")
                #Contains dates and indexes for the list self.list
                self.dict[dateString]=i

    def get(self, date):
        #Converts the date to string
        dateString=str(date)
        #given the date, you get an interval of past days or weeks
        return self.list[self.dict[dateString]-(self.delta):self.dict[dateString]]

## Callback

In [None]:
'''
Callback.py
'''

#Callbacks are functions used to give a feedback about each epoch calculated metrics
from rl.callbacks import Callback

class ValidationCallback(Callback):

    def __init__(self):
        #Initially, the metrics are zero
        self.episodes = 0
        self.rewardSum = 0
        self.accuracy = 0
        self.coverage = 0
        self.short = 0
        self.long = 0
        self.shortAcc =0
        self.longAcc =0
        self.longPrec =0
        self.shortPrec =0
        self.marketRise =0
        self.marketFall =0

    def reset(self):
        #The metrics are also zero when the epoch ends
        self.episodes = 0
        self.rewardSum = 0
        self.accuracy = 0
        self.coverage = 0
        self.short = 0
        self.long = 0
        self.shortAcc =0
        self.longAcc =0
        self.longPrec =0
        self.shortPrec =0
        self.marketRise =0
        self.marketFall =0
        
    #all information is given by the environment: action, reward and market
    #Then, when the episode ends, metrics are calculated
    def on_episode_end(self, action, reward, market):
        
        #After the episode ends, increments the episodes 
        self.episodes+=1

        #Increments the reward
        self.rewardSum+=reward

        #If the action is not a hold, there is coverage because the agent decided 
        self.coverage+=1 if (action != 0) else 0

        #increments the accuracy if the reward is positive (we have a hit)
        self.accuracy+=1 if (reward >= 0 and action != 0) else 0
        
        #Increments the counter for short if the action is a short (id 2)
        self.short +=1 if(action == 2) else 0
        
        #Increments the counter for long if the action is a long (id 1)
        self.long +=1 if(action == 1) else 0
        
        #We will also calculate the accuracy for a given action. Here, it increments
        #the accuracy for short if the action is short and the reward is positive
        self.shortAcc +=1 if(action == 2 and reward >=0) else 0
        
        #Increments the accuracy for long if the action is long and the reward is positive
        self.longAcc +=1 if(action == 1 and reward >=0) else 0
        
        #If the market increases, increments the marketRise variable. If the prediction is 1 (long), increments the precision for long
        if(market>0):
            self.marketRise+=1
            self.longPrec+=1 if(action == 1) else 0

        #If market decreases, increments the marketFall. If the prediction is 2 (short), increments the precision for short   
        elif(market<0):
            self.marketFall+=1
            self.shortPrec+=1 if(action == 2) else 0

    #Function to show the metrics of the episode  
    def getInfo(self):
        #Start setting them to zero
        acc = 0
        cov = 0
        short = 0
        long = 0
        longAcc = 0
        shortAcc = 0
        longPrec = 0
        shortPrec = 0
        
        #If there is coverage, we will calculate the accuracy only related to when decisions were made. 
        #In other words, we dont calculate accuracy for hold operations
        if self.coverage > 0:
            acc = self.accuracy/self.coverage
        
        #Now, we calculate the mean coverage, short and long operations from the episodes
        if self.episodes > 0:
            cov = self.coverage/self.episodes
            short = self.short/self.episodes
            long = self.long/self.episodes

        #Calculate the mean accuracy for short operations. 
        #That is, the number of total short correctly predicted (self.shortAcc) 
        #divided by the total of shorts predicted (self.short)
        if self.short > 0:
            shortAcc = self.shortAcc/self.short
        
        #Calculate the mean accuracy for long operations. 
        #That is, the number of total short correctly predicted (long.shortAcc) 
        #divided by the total of longs predicted (long.short) 
        if self.long > 0:
            longAcc = self.longAcc/self.long

        if self.marketRise > 0:
            longPrec = self.longPrec/self.marketRise

        if self.marketFall > 0:
            shortPrec = self.shortPrec/self.marketFall

        #Returns the metrics to the user    
        return self.episodes,cov,acc,self.rewardSum,long,short,longAcc,shortAcc,longPrec,shortPrec

## spEnv

In [None]:
'''
spEnv.py
'''
#Environment used for spenv 
#gym is the library of videogames used by reinforcement learning
import gym
from gym import spaces
#Numpy is the library to deal with matrices
import numpy
#Pandas is the library used to deal with the CSV dataset
import pandas
#datetime is the library used to manipulate time and date
from datetime import datetime
#Library created by Tonio to merge data used as feature vectors
# from mergedDataStructure import MergedDataStructure
#Callback is the library used to show metrics 
# import callback

#This is the prefix of the files that will be opened. It is related to the s&p500 stock market datasets
MK = "dax"


class SpEnv(gym.Env):
    #Just for the gym library. In a continuous environment, you can do infinite decisions. 
    #We dont want this because we have just three possible actions.
    continuous = False

    #Observation window is the time window regarding the "hourly" dataset 
    #ensemble variable tells to save or not the decisions at each walk
    def __init__(self, minLimit=None, maxLimit=None, operationCost = 0, observationWindow = 40, ensamble = None, callback = None, isOnlyShort=False, columnName = "iteration-1"):
        #Declare the episode as the first episode
        self.episode=1

        self.isOnlyShort=isOnlyShort
        
        #Open the time series as the hourly dataset of S&P500
        #the input feature vector is composed of data from hours, weeks and days
        #20 from days, 8 from weeks and 40 hours, ending with 68 dimensional feature vectors
        spTimeserie = pandas.read_csv('./datasets/'+MK+'Hour.csv')[minLimit:maxLimit] # opening the dataset
        
        #Converts each column to a list
        Date = spTimeserie.loc[:, 'Date'].tolist()
        Time = spTimeserie.loc[:, 'Time'].tolist()
        Open = spTimeserie.loc[:, 'Open'].tolist()
        High = spTimeserie.loc[:, 'High'].tolist()
        Low = spTimeserie.loc[:, 'Low'].tolist()
        Close = spTimeserie.loc[:, 'Close'].tolist()
        
        #Open the weekly and daily data as a merged data structure
        #Get 20 dimensional vectors (close-open) considering 20 past days and 8 dimensional vectors (close-open) 
        #considering 8 weeks
        self.weekData = MergedDataStructure(delta=8,filename='./datasets/'+MK+"Week.csv")# this DS allows me to obtain previous historical data with different resolution
        self.dayData = MergedDataStructure(delta=20,filename='./datasets/'+MK+"Day.csv")#  with low computational complexity
        
        #Load the data
        self.output=False

        #ensamble is the table of validation and testing
        #If its none, you will not save csvs of validation and testing    
        if(ensamble is not None): # managing the ensamble output (maybe in the wrong way)
            self.output=True
            self.ensamble=ensamble
            self.columnName = columnName
            #self.ensemble is a big table (before file writing) containing observations as lines and epochs as columns
            #each column will contain a decision for each epoch at each date. It is saved later.
            #We read this table later in order to make ensemble decisions at each epoch
            self.ensamble[self.columnName]=0

        #Declare low and high as vectors with -inf values 
        self.low = numpy.array([-numpy.inf])
        self.high = numpy.array([+numpy.inf])

        #Define the space of actions as 3
        #the action space is just 0,1,2 which means hold,long,short
        self.action_space = spaces.Discrete(3) 
        
        #low and high are the minimun and maximum accepted values for this problem
        #Tonio used random values
        #We dont know what are the minimum and maximum values of Close-Open, so we put these values
        self.observation_space = spaces.Box(self.low, self.high, dtype=numpy.float32)

        #The history begins empty
        self.history=[]
        #Set observationWindow = 40
        self.observationWindow = observationWindow
        
        #Set the current observation as 40
        self.currentObservation = observationWindow
        #The operation cost is defined as 
        self.operationCost=operationCost
        #Defines that the environment is not done yet
        self.done = False
        #The limit is the number of open values in the dataset (could be any other value)
        self.limit = len(Open)
        #organizing the dataset as a list of dictionaries 
        for i in range(0,self.limit): 
            self.history.append({'Date' : Date[i],'Time' : Time[i], 'Open': Open[i], 'High': High[i], 'Low': Low[i], 'Close': Close[i]})
        
        #Next observation starts
        self.nextObservation=0
        
        #self.history contains all the hour data. Here we search for the next day 
        while(self.history[self.currentObservation]['Date']==self.history[(self.currentObservation+self.nextObservation)%self.limit]['Date']):
            self.nextObservation+=1
        
        #Initiates the values to be returned by the environment
        self.reward = None
        self.possibleGain = 0
        self.openValue = 0
        self.closeValue = 0
        self.callback=callback


    #This is the action that is done in the environment. 
    #Receives the action and returns the state, the reward and if its done 
    def step(self, action):
        #Initiates the reward, weeklist and daylist
        self.reward=0
        

        ##UNCOMMENT NEXT LINE FOR ONLY SHORT AGENT
        if(self.isOnlyShort):
            action *= 2


        #set the next observation to zero
        self.nextObservation=0
        #Search for the close value for tommorow
        while(self.history[self.currentObservation]['Date']==self.history[(self.currentObservation+self.nextObservation)%self.limit]['Date']):
            #Search for the close error for today
            self.closeValue=self.history[(self.currentObservation+self.nextObservation)%self.limit]['Close']
            self.nextObservation+=1

        #Get the open value for today 
        self.openValue = self.history[self.currentObservation]['Open']

        #Calculate the reward in percentage of growing/decreasing
        self.possibleGain = (self.closeValue - self.openValue)/self.openValue
        #If action is a long, calculate the reward 
        if(action == 1):
            #The reward must be subtracted by the cost of transaction
            self.reward = self.possibleGain-self.operationCost
        #If action is a short, calculate the reward     
        elif(action==2):
            self.reward = (-self.possibleGain)-self.operationCost
        #If action is a hold, no reward     
        else:
            self.reward = 0
        #Finish episode 
        self.done=True


        #Call the callback for the episode
        if(self.callback!=None and self.done):
            self.callback.on_episode_end(action,self.reward,self.possibleGain)
        

        #File of the ensamble (file containing each epoch decisions at each walk) will contain the action for that 
        #day (observation, line) at each epoch (column)
        if(self.output):
            self.ensamble.at[self.history[self.currentObservation]['Date'],self.columnName]=action
        
        
        
        #Return the state, reward and if its done or not
        return self.getObservation(self.history[self.currentObservation]['Date']), self.reward, self.done, {}
        
    #function done when the episode finishes
    #reset will prepare the next state (feature vector) and give it to the agent
    def reset(self):

        if(self.currentObservation<self.observationWindow):
            self.currentObservation=self.observationWindow


        
        self.episode+=1
        
        
        #Shiftting the index for the first hour of the next day
        self.nextObservation=0
        while(self.history[self.currentObservation]['Date']==self.history[(self.currentObservation+self.nextObservation)%self.limit]['Date']):
            self.nextObservation+=1
            #check if the index exceeds the limits
            if((self.currentObservation+self.nextObservation)>=self.limit):
                print("Resetted: episode " + str(self.episode) +"; Index " + str(self.currentObservation+self.nextObservation) + " over the limit (" + str(self.limit) + ")" )
            
        #reset the values used in the step() function
        self.done = False
        self.reward = None
        self.possibleGain = 0
        self.openValue = 0
        self.closeValue = 0

        #Prepapre to get the next observation
        self.currentObservation+=self.nextObservation
        if(self.currentObservation>=self.limit):
            self.currentObservation=self.observationWindow
        
        return self.getObservation(self.history[self.currentObservation]['Date'])


    def getObservation(self, date):

        #Get the dayly information and week information
        #get all the data
        # dayList=self.dayData.get(date)
        # weekList=self.weekData.get(date)

        #Get the previous 40 hours regarding each date
        # currentData = self.history[self.currentObservation-self.observationWindow:self.currentObservation] 

        #The data is finally concatenated here. We concatenate Hours, days and weeks information
        # currentData=self.history[self.currentObservation-self.observationWindow:self.currentObservation]  + self.dayData.get(date) + self.weekData.get(date)

        #Calculates the close minus open 
        #The percentage of growing or decreasing is calculated as CloseMinusOpen
        #This is the input vector
        # closeMinusOpen=list(map(lambda x: (x["Close"]-x["Open"])/x["Open"],self.history[self.currentObservation-self.observationWindow:self.currentObservation]  + self.dayData.get(date) + self.weekData.get(date)))
        
        
        #The state is prepared by the environment, which is simply the feature vector
        return  numpy.array(
            [list(
                map(
                    lambda x: (x["Close"]-x["Open"])/x["Open"],
                        self.history[self.currentObservation-self.observationWindow:self.currentObservation]  + 
                        self.dayData.get(date) + 
                        self.weekData.get(date)))])
    
    def resetEnv(self):
        self.currentObservation=self.observationWindow
        #Resets the episode to 1
        self.episode=1

# Trading

The code needs three positional parameters to be correctly executed: \\
python main.py [numberOfActions, isOnlyShort, ensembleFolder]


* To run the FULL agent you need to run: python main.py 3 0 ensembleFolder
* To run the ONLY LONG agent you need to run: python main.py 2 0 ensembleFolder
* To run the ONLY SHORT agent you need to run: python main.py 2 1 ensembleFolder \\
where the paramenter ensembleFolder is used to set the name of the folder in which you'll get your results.

## DeepQTrading

In [None]:
'''
deepQTrading.py
'''
#Imports the SPEnv library, which will perform the Agent actions themselves
# from spEnv import SpEnv

#Callback used to print the results at each episode
# from callback import ValidationCallback

#Keras library for the NN considered
from keras.models import Sequential

#Keras libraries for layers, activations and optimizers used
from keras.layers import Dense, Activation, Flatten
from keras.layers.advanced_activations import LeakyReLU, PReLU
from tensorflow.keras.optimizers import Adam

#RL Agent 
from rl.agents.dqn import DQNAgent
from rl.memory import SequentialMemory
from rl.policy import EpsGreedyQPolicy

#Mathematical operations used later
from math import floor

#Library to manipulate the dataset in a csv file
import pandas as pd

#Library used to manipulate time
import datetime


#Prefix of the name of the market (S&P500) files used to load the data
MK="dax"

class DeepQTrading:
    
    #Class constructor
    #model: Keras model considered
    #Explorations is a vector containing (i) probability of random predictions; (ii) how many epochs will be 
    # runned by the algorithm (we run the algorithm several times-several iterations)  
    #trainSize: size of the training set
    #validationSize: size of the validation set
    #testSize: size of the testing set 
    #outputFile: name of the file to print results
    #begin: Initial date
    #end: final date
    #nbActions: number of decisions (0-Hold 1-Long 2-Short) 
    #nOutput is the number of walks. We are doing 5 walks.  
    #operationCost: Price for the transaction (we set they are free)
    def __init__(self, model, explorations, trainSize, validationSize, testSize, outputFile, begin, end, nbActions, isOnlyShort, ensembleFolderName, operationCost=0):
        
        self.isOnlyShort=isOnlyShort
        self.ensembleFolderName=ensembleFolderName

        

        #Define the policy, explorations, actions and model as received by parameters
        self.policy = EpsGreedyQPolicy()
        self.explorations=explorations
        self.nbActions=nbActions
        self.model=model

        #Define the memory
        self.memory = SequentialMemory(limit=10000, window_length=1)

        #Instantiate the agent with parameters received
        self.agent = DQNAgent(model=self.model, policy=self.policy,  nb_actions=self.nbActions, memory=self.memory, nb_steps_warmup=200, target_model_update=1e-1,
                                    enable_double_dqn=True,enable_dueling_network=True)
        
        #Compile the agent with the adam optimizer and with the mean absolute error metric
        self.agent.compile(Adam(lr=1e-3), metrics=['mae'])

        #Save the weights of the agents in the q.weights file
        #Save random weights
        self.agent.save_weights("q.weights", overwrite=True)

        #Define the current starting point as the initial date
        self.currentStartingPoint = begin

        #Define the training, validation and testing size as informed by the call
        #Train: 5 years
        #Validation: 6 months
        #Test: 6 months
        self.trainSize=trainSize
        self.validationSize=validationSize
        self.testSize=testSize
        
        #The walk size is simply summing up the train, validation and test sizes
        self.walkSize=trainSize+validationSize+testSize
        
        #Define the ending point as the final date (January 1st of 2010)
        self.endingPoint=end

        #Read the hourly dataset
        #We join data from different files
        #Here hour data is read 
        self.dates= pd.read_csv('./datasets/'+MK+'Hour.csv')
        self.sp = pd.read_csv('./datasets/'+MK+'Hour.csv')
        #Convert the pandas format to date and time format
        self.sp['Datetime'] = pd.to_datetime(self.sp['Date'] + ' ' + self.sp['Time'])
        #Set an index to Datetime on the pandas loaded dataset. Registers will be indexes through these values
        self.sp = self.sp.set_index('Datetime')
        #Drop Time and Date from the Dataset
        self.sp = self.sp.drop(['Time','Date'], axis=1)
        #Just the index considering date and time will be important, because date and time will be used to define the train, 
        #validation and test for each walk
        self.sp = self.sp.index

        #Receives the operation cost, which is 0
        #Operation cost is the cost for long and short. It is defined as zero
        self.operationCost = operationCost
        
        #Call the callback for training, validation and test in order to show results for each episode 
        self.trainer=ValidationCallback()
        self.validator=ValidationCallback()
        self.tester=ValidationCallback()
        self.outputFileName=outputFile

    def run(self):
        #Initiates the environments, 
        trainEnv=validEnv=testEnv=" "
        
        iteration=-1

        #While we did not pass through all the dates (i.e., while all the walks were not finished)
        #walk size is train+validation+test size
        #currentStarting point begins with begin date
        while(self.currentStartingPoint+self.walkSize <= self.endingPoint):

            #Iteration is the current walk
            iteration+=1

            #Initiate the output file
            self.outputFile=open(self.outputFileName+str(iteration+1)+".csv", "w+")
            #write the first row of the csv
            self.outputFile.write(
                "Iteration,"+
                "trainAccuracy,"+
                "trainCoverage,"+
                "trainReward,"+
                "trainLong%,"+
                "trainShort%,"+
                "trainLongAcc,"+
                "trainShortAcc,"+
                "trainLongPrec,"+
                "trainShortPrec,"+

                "validationAccuracy,"+
                "validationCoverage,"+
                "validationReward,"+
                "validationLong%,"+
                "validationShort%,"+
                "validationLongAcc,"+
                "validationShortAcc,"+
                "validLongPrec,"+
                "validShortPrec,"+
                
                "testAccuracy,"+
                "testCoverage,"+
                "testReward,"+
                "testLong%,"+
                "testShort%,"+
                "testLongAcc,"+
                "testShortAcc,"+
                "testLongPrec,"+
                "testShortPrec\n")


            
            #Empty the memory and agent
            del(self.memory)
            del(self.agent)

            #Define the memory and agent
            #Memory is Sequential
            self.memory = SequentialMemory(limit=10000, window_length=1)
            #Agent is initiated as passed through parameters
            self.agent = DQNAgent(model=self.model, policy=self.policy,  nb_actions=self.nbActions, memory=self.memory, nb_steps_warmup=200, target_model_update=1e-1,
                                    enable_double_dqn=True,enable_dueling_network=True)
            #Compile the agent with Adam initialization
            self.agent.compile(Adam(lr=1e-3), metrics=['mae'])
            
            #Load the weights saved before in a random way if it is the first time
            self.agent.load_weights("q.weights")
            
            ########################################TRAINING STAGE########################################################
            
            #The TrainMinLimit will be loaded as the initial date at the beginning, and will be updated later.
            #If the initial date cannot be used, add 1 hour to the initial date and consider it the initial date    
            trainMinLimit=None
            while(trainMinLimit is None):
                try:
                    trainMinLimit = self.sp.get_loc(self.currentStartingPoint)
                except:
                    self.currentStartingPoint+=datetime.timedelta(0,0,0,0,0,1,0)

            #The TrainMaxLimit will be loaded as the interval between the initial date plus the training size.
            #If the initial date cannot be used, add 1 hour to the initial date and consider it the initial date    
            trainMaxLimit=None
            while(trainMaxLimit is None):
                try:
                    trainMaxLimit = self.sp.get_loc(self.currentStartingPoint+self.trainSize)
                except:
                    self.currentStartingPoint+=datetime.timedelta(0,0,0,0,0,1,0)
            
            ########################################VALIDATION STAGE#######################################################
            #The ValidMinLimit will be loaded as the next element of the TrainMax limit
            validMinLimit=trainMaxLimit+1

            #The ValidMaxLimit will be loaded as the interval after the begin + train size +validation size
            #If the initial date cannot be used, add 1 hour to the initial date and consider it the initial date  
            validMaxLimit=None
            while(validMaxLimit is None):
                try:
                    validMaxLimit = self.sp.get_loc(self.currentStartingPoint+self.trainSize+self.validationSize)
                except:
                    self.currentStartingPoint+=datetime.timedelta(0,0,0,0,0,1,0)

            ########################################TESTING STAGE######################################################## 
            #The TestMinLimit will be loaded as the next element of ValidMaxlimit 
            testMinLimit=validMaxLimit+1

            #The testMaxLimit will be loaded as the interval after the begin + train size +validation size + Testsize
            #If the initial date cannot be used, add 1 hour to the initial date and consider it the initial date 
            testMaxLimit=None
            while(testMaxLimit is None):
                try:
                    testMaxLimit = self.sp.get_loc(self.currentStartingPoint+self.trainSize+self.validationSize+self.testSize)
                except:
                    self.currentStartingPoint+=datetime.timedelta(0,0,0,0,0,1,0)

            #Separate the Validation and testing data according to the limits found before
            #Prepare the training and validation files for saving them later 
            ensambleValid=pd.DataFrame(index=self.dates[validMinLimit:validMaxLimit].loc[:,'Date'].drop_duplicates().tolist())
            ensambleTest=pd.DataFrame(index=self.dates[testMinLimit:testMaxLimit].loc[:,'Date'].drop_duplicates().tolist())
            
            #Put the name of the index for validation and testing
            ensambleValid.index.name='Date'
            ensambleTest.index.name='Date'
            
            #Explorations are epochs considered, or how many times the agent will play the game.  
            for eps in self.explorations:

                #policy will be 0.2, so the randomness of predictions (actions) will happen with 20% of probability 
                self.policy.eps = eps[0]
                
                #there will be 100 iterations (epochs), or eps[1])
                for i in range(0,eps[1]):
                    
                    del(trainEnv)

                    #Define the training, validation and testing environments with their respective callbacks
                    trainEnv = SpEnv(operationCost=self.operationCost,minLimit=trainMinLimit,maxLimit=trainMaxLimit,callback=self.trainer,isOnlyShort=self.isOnlyShort)
                    del(validEnv)
                    validEnv=SpEnv(operationCost=self.operationCost,minLimit=validMinLimit,maxLimit=validMaxLimit,callback=self.validator,isOnlyShort=self.isOnlyShort,ensamble=ensambleValid,columnName="iteration"+str(i))
                    del(testEnv)
                    testEnv=SpEnv(operationCost=self.operationCost,minLimit=testMinLimit,maxLimit=testMaxLimit,callback=self.tester,isOnlyShort=self.isOnlyShort,ensamble=ensambleTest,columnName="iteration"+str(i))

                    #Reset the callback
                    self.trainer.reset()
                    self.validator.reset()
                    self.tester.reset()

                    #Reset the training environment
                    trainEnv.resetEnv()
                    #Train the agent
                    self.agent.fit(trainEnv,nb_steps=floor(self.trainSize.days-self.trainSize.days*0.2),visualize=False,verbose=0)
                    #Get the info from the train callback
                    (_,trainCoverage,trainAccuracy,trainReward,trainLongPerc,trainShortPerc,trainLongAcc,trainShortAcc,trainLongPrec,trainShortPrec)=self.trainer.getInfo()
                    #Print Callback values on the screen
                    print(str(i) + " TRAIN:  acc: " + str(trainAccuracy)+ " cov: " + str(trainCoverage)+ " rew: " + str(trainReward))

                    #Reset the validation environment
                    validEnv.resetEnv()
                    #Test the agent on validation data
                    self.agent.test(validEnv,nb_episodes=floor(self.validationSize.days-self.validationSize.days*0.2),visualize=False,verbose=0)
                    #Get the info from the validation callback
                    (_,validCoverage,validAccuracy,validReward,validLongPerc,validShortPerc,validLongAcc,validShortAcc,validLongPrec,validShortPrec)=self.validator.getInfo()
                    #Print callback values on the screen
                    print(str(i) + " VALID:  acc: " + str(validAccuracy)+ " cov: " + str(validCoverage)+ " rew: " + str(validReward))

                    #Reset the testing environment
                    testEnv.resetEnv()
                    #Test the agent on testing data
                    self.agent.test(testEnv,nb_episodes=floor(self.validationSize.days-self.validationSize.days*0.2),visualize=False,verbose=0)
                    #Get the info from the testing callback
                    (_,testCoverage,testAccuracy,testReward,testLongPerc,testShortPerc,testLongAcc,testShortAcc,testLongPrec,testShortPrec)=self.tester.getInfo()
                    #Print callback values on the screen
                    print(str(i) + " TEST:  acc: " + str(testAccuracy)+ " cov: " + str(testCoverage)+ " rew: " + str(testReward))
                    print(" ")
                    
                    #write the walk data on the text file
                    self.outputFile.write(
                        str(i)+","+
                        str(trainAccuracy)+","+
                        str(trainCoverage)+","+
                        str(trainReward)+","+
                        str(trainLongPerc)+","+
                        str(trainShortPerc)+","+
                        str(trainLongAcc)+","+
                        str(trainShortAcc)+","+
                        str(trainLongPrec)+","+
                        str(trainShortPrec)+","+
                        
                        str(validAccuracy)+","+
                        str(validCoverage)+","+
                        str(validReward)+","+
                        str(validLongPerc)+","+
                        str(validShortPerc)+","+
                        str(validLongAcc)+","+
                        str(validShortAcc)+","+
                        str(validLongPrec)+","+
                        str(validShortPrec)+","+
                        
                        str(testAccuracy)+","+
                        str(testCoverage)+","+
                        str(testReward)+","+
                        str(testLongPerc)+","+
                        str(testShortPerc)+","+
                        str(testLongAcc)+","+
                        str(testShortAcc)+","+
                        str(testLongPrec)+","+
                        str(testShortPrec)+"\n")

            #Close the file                
            self.outputFile.close()

            #For the next walk, the current starting point will be the current starting point + the test size
            #It means that, for the next walk, the training data will start 6 months after the training data of 
            #the previous walk   
            self.currentStartingPoint+=self.testSize

            #Write validation and Testing data into files
            #Save the files for processing later with the ensemble considering the 100 epochs
            ensambleValid.to_csv("./Output/ensemble/"+self.ensembleFolderName+"/walk"+str(iteration)+"ensemble_valid.csv")
            ensambleTest.to_csv("./Output/ensemble/"+self.ensembleFolderName+"/walk"+str(iteration)+"ensemble_test.csv")

    #Function to end the Agent
    def end(self):
        print("END")

## main

In [None]:
'''
main.py
'''
#os library is used to define the GPU to be used by the code, needed only in cerain situations (Better not to use it, use only if the main gpu is Busy)
#import os
#os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID";
#os.environ["CUDA_VISIBLE_DEVICES"]="0";

#This is the class call for the Agent which will perform the experiment
# from deepQTrading import DeepQTrading

#Date library to manipulate time in the source code
import datetime

#Keras library to define the NN to be used
from keras.models import Sequential

#Layers used in the NN considered
from keras.layers import Dense, Activation, Flatten

#Activation Layers used in the source code
from keras.layers.advanced_activations import LeakyReLU, PReLU, ReLU

#Optimizer used in the NN
from tensorflow.keras.optimizers import Adam

#Libraries used for the Agent considered
from rl.agents.dqn import DQNAgent
from rl.memory import SequentialMemory
from rl.policy import EpsGreedyQPolicy


#Library used for showing the exception in the case of error 
import sys

#import tensorflow as tf
#from keras.backend.tensorflow_backend import set_session
#config = tf.ConfigProto()
#config.gpu_options.per_process_gpu_memory_fraction = 0.3
#set_session(tf.Session(config=config))



#Let's capture the starting time and send it to the destination in order to tell that the experiment started 
startingTime=datetime.datetime.now()

#There are three actions possible in the stock market
#Hold(id 0): do nothing.
#Long(id 1): It predicts that the stock market value will raise at the end of the day. 
#So, the action performed in this case is buying at the beginning of the day and sell it at the end of the day (aka long).
#Short(id 2): It predicts that the stock market value will decrease at the end of the day.
#So, the action that must be done is selling at the beginning of the day and buy it at the end of the day (aka short). 

nb_actions = 3
isOnlyShort = 0
ensembleFolderName = "ensembleFolder"

#This is a simple NN considered. It is composed of:
#One flatten layer to get 68 dimensional vectors as input
#One dense layer with 35 neurons and LeakyRelu activation
#One final Dense Layer with the 3 actions considered
#the input is 20 observation days from the past, 8 observations from the past week and 
#40 observations from the past hours
model = Sequential()
model.add(Flatten(input_shape=(1,1,68)))
model.add(Dense(35,activation='linear'))
model.add(LeakyReLU(alpha=.001))
model.add(Dense(nb_actions))
model.add(Activation('linear'))


#Define the DeepQTrading class with the following parameters:
#explorations: 0.2 operations are random, and 100 epochs.
#in this case, epochs parameter is used because the Agent acts on daily basis, so its better to repeat the experiments several
#times so, its defined that each epoch will work on the data from training, validation and testing.
#trainSize: the size of the train data gotten from the dataset, we are setting 5 stock market years, or 1800 days
#validationSize: the size of the validation data gotten from dataset, we are setting 6 stock market months, or 180 days
#testSize: the size of the testing data gotten from dataset, we are setting 6 stock market months, or 180 days
#outputFile: where the results will be written
#begin: where the walks will start from. We are defining January 1st of 2010
#end: where the walks will finish. We are defining February 22nd of 2019
#nOutput:number of walks
dqt = DeepQTrading(
    model=model,
    explorations=[(0.2, 3)],
    trainSize=datetime.timedelta(days=360*5),
    validationSize=datetime.timedelta(days=30*6),
    testSize=datetime.timedelta(days=30*6),
    outputFile="./Output/csv/walks/walks",
    begin=datetime.datetime(2001,1,1,0,0,0,0),
    end=datetime.datetime(2019,2,28,0,0,0,0),
    nbActions=nb_actions,
    isOnlyShort=isOnlyShort,
    ensembleFolderName=ensembleFolderName
    )

dqt.run()

dqt.end()

  super(Adam, self).__init__(name, **kwargs)


Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: 'arguments' object has no attribute 'posonlyargs'
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: 'arguments' object has no attribute 'posonlyargs'
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: 'arguments' object has no attribute 'posonlyargs'
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: 'arguments' object has no attribute 'posonlyargs'
./Output/csv/walks/walks1.csv
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `ex

  updates=self.state_updates,


Resetted: episode 1241; Index 14899 over the limit (14899)
0 TRAIN:  acc: 0.5085836909871244 cov: 0.6472222222222223 rew: 0.42672165329658834
Resetted: episode 120; Index 1672 over the limit (1672)
0 VALID:  acc: 0.4375 cov: 0.4444444444444444 rew: -0.0459412439842766
Resetted: episode 127; Index 1952 over the limit (1952)
0 TEST:  acc: 0.5 cov: 0.2777777777777778 rew: 0.021415606148076714
 
Resetted: episode 1241; Index 14899 over the limit (14899)
1 TRAIN:  acc: 0.629950495049505 cov: 0.5611111111111111 rew: 3.2053990103932177
Resetted: episode 120; Index 1672 over the limit (1672)
1 VALID:  acc: 0.4056603773584906 cov: 0.7361111111111112 rew: -0.09820788719873001
Resetted: episode 127; Index 1952 over the limit (1952)
1 TEST:  acc: 0.5632183908045977 cov: 0.6041666666666666 rew: 0.1426291878337838
 
Resetted: episode 1241; Index 14899 over the limit (14899)
2 TRAIN:  acc: 0.6902050113895216 cov: 0.6097222222222223 rew: 4.842655218479314
Resetted: episode 120; Index 1672 over the lim

# Ensemble

## ensemble.py

In [None]:
import pandas as pd
import numpy as np



def full_ensemble(df):
    m1 = df.eq(1).all(axis=1)

    m2 = df.eq(2).all(axis=1)

    local_df = df.copy()
    local_df['ensemble'] = np.select([m1, m2], [1, 2], 0)

    local_df = local_df.drop(local_df.columns.difference(['ensemble']), axis=1)

    return local_df

def perc_ensemble(df, thr = 0.7):
    c1 = (df.eq(1).sum(1) / df.shape[1]).gt(thr)
    c2 = (df.eq(2).sum(1) / df.shape[1]).gt(thr)
    return pd.DataFrame(np.select([c1, c2], [1, 2], 0), index=df.index, columns=['ensemble'])




def ensemble(numWalks,perc,type,numDel):
    dollSum=0
    rewSum=0
    posSum=0
    negSum=0
    covSum=0
    numSum=0

    values=[]
    #output=open("daxValidDel9th60.csv","w+")
    #output.write("Iteration,Reward%,#Wins,#Losses,Euro,Coverage,Accuracy\n")
    columns = ["Iteration","Reward%","#Wins","#Losses","Dollars","Coverage","Accuracy"]
    dax=pd.read_csv("./datasets/sp500Day.csv",index_col='Date')
    for j in range(0,numWalks):

        df=pd.read_csv("./Output/ensemble/walk"+str(j)+"ensemble_"+type+".csv",index_col='Date')



        for deleted in range(1,numDel):
            del df['iteration'+str(deleted)]
        
        if perc==0:
            df=full_ensemble(df)
        else:
            df=perc_ensemble(df,perc)

        num=0
        rew=0
        pos=0
        neg=0
        doll=0
        cov=0
        for date, i in df.iterrows():
            num+=1

            if date in dax.index:
                if (i['ensemble']==1):
                    pos+= 1 if (dax.at[date,'Close']-dax.at[date,'Open'])/dax.at[date,'Open'] > 0 else 0
                    
                    neg+= 0 if (dax.at[date,'Close']-dax.at[date,'Open'])/dax.at[date,'Open'] > 0 else 1
                    rew+=(dax.at[date,'Close']-dax.at[date,'Open'])/dax.at[date,'Open']
                    doll+=(dax.at[date,'Close']-dax.at[date,'Open'])*50
                    cov+=1
                elif (i['ensemble']==2):
                    
                    neg+= 0 if -(dax.at[date,'Close']-dax.at[date,'Open'])/dax.at[date,'Open'] > 0 else 1
                    pos+= 1 if -(dax.at[date,'Close']-dax.at[date,'Open'])/dax.at[date,'Open'] > 0 else 0
                    rew+=-(dax.at[date,'Close']-dax.at[date,'Open'])/dax.at[date,'Open']
                    cov+=1
                    doll+=-(dax.at[date,'Close']-dax.at[date,'Open'])*50
        
        values.append([str(round(j,2)),str(round(rew,2)),str(round(pos,2)),str(round(neg,2)),str(round(doll,2)),str(round(cov/num,2)),(str(round(pos/cov,2)) if (cov>0) else "")])
        
        dollSum+=doll
        rewSum+=rew
        posSum+=pos
        negSum+=neg
        covSum+=cov
        numSum+=num


    values.append(["sum",str(round(rewSum,2)),str(round(posSum,2)),str(round(negSum,2)),str(round(dollSum,2)),str(round(covSum/numSum,2)),(str(round(posSum/covSum,2)) if (covSum>0) else "")])
    return values,columns




def evaluate(csvname=""):
    
    output=open("resultsSPFinal.csv","w+")
    output.write("Iteration,Reward%,#Wins,#Losses,Euro,Coverage,Accuracy\n")
    df=pd.read_csv(csvname)
    dax=pd.read_csv("./datasets/sp500Day.csv",index_col='Date')
    df['date'] = pd.to_datetime(df['date'])
    df['date'] = df['date'].dt.strftime('%m/%d/%Y')
    df.set_index('date', inplace=True)
    print(df)
    num=0
    rew=0
    pos=0
    neg=0
    doll=0
    cov=0
    for date, i in df.iterrows():
        num+=1

        if date in dax.index:
            if (i['ensemble']==1):
                pos+= 1 if (dax.at[date,'Close']-dax.at[date,'Open'])/dax.at[date,'Open'] > 0 else 0
                
                neg+= 0 if (dax.at[date,'Close']-dax.at[date,'Open'])/dax.at[date,'Open'] > 0 else 1
                rew+=(dax.at[date,'Close']-dax.at[date,'Open'])/dax.at[date,'Open']
                doll+=(dax.at[date,'Close']-dax.at[date,'Open'])*50
                cov+=1
            elif (i['ensemble']==-1):
                
                neg+= 0 if -(dax.at[date,'Close']-dax.at[date,'Open'])/dax.at[date,'Open'] > 0 else 1
                pos+= 1 if -(dax.at[date,'Close']-dax.at[date,'Open'])/dax.at[date,'Open'] > 0 else 0
                rew+=-(dax.at[date,'Close']-dax.at[date,'Open'])/dax.at[date,'Open']
                cov+=1
                doll+=-(dax.at[date,'Close']-dax.at[date,'Open'])*50
    
    output.write(str(0)+ "," + str(round(rew,2))+ "," + str(round(pos,2))+ "," + str(round(neg,2))+ "," + str(round(doll,2))+ "," + str(round(cov/num,2))+ "," +(str(round(pos/cov,2)) if (cov>0) else "") + "\n")




#evaluate(".\Output\results\finalEnsembleSP500.csv")

## splitEnsemble.py

In [None]:
import pandas as pd



long = [[],[]]
short = [[],[]]

longs=pd.read_csv("./Output/results/spLong.csv")
shorts=pd.read_csv("./Output/results/spShort.csv")

long[0]= longs.ix[:,"Date"].tolist()
long[1]= longs.ix[:,"ensemble"].tolist()
short[0] = shorts.ix[:,"Date"].tolist()
short[1] = shorts.ix[:,"ensemble"].tolist()

output = open("finalEnsemble.csv", "w+")
output.write("date,ensemble\n")

for i in range(0,len(long[0])):
    if(long[0][i]==short[0][i]):
        output.write(str(long[0][i]) + "," + str(long[1][i]+short[1][i]) + "\n")