In [23]:
# Agent
import keras
from keras.models import Sequential
from keras.models import load_model
from keras.layers import Dense
from keras.optimizers import Adam

import numpy as np
import random
from collections import deque

class Agent:
    def __init__(self, state_size, is_eval=False, model_name=""):
        self.state_size = state_size # normalized previous days
        self.action_size = 3 #  buy_1, sell_1,DO Nothing
        self.memory = deque(maxlen=2000)
        self.inventory1 = []
        self.inventory2 = []
        self.model_name = model_name
        self.is_eval = is_eval
        self.gamma = 0.95 #gamma is the discount factor. It quantifies how much importance we give for future rewards.
        self.epsilon = 1.0 #Exploration and Exploitation — Epsilon (ε)
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.995
        self.model = load_model("models/" + model_name) if is_eval else self._model()

    def _model(self):
        model = Sequential()
        model.add(Dense(units=64, input_dim=self.state_size, activation="relu"))
        model.add(Dense(units=32, activation="relu"))
        model.add(Dense(units=8, activation="relu"))
        model.add(Dense(self.action_size, activation="linear"))
        model.compile(loss="mse", optimizer=Adam(lr=0.0001))
        return model

    def act(self, state):
        if not self.is_eval and random.random() <= self.epsilon:
#             print("random action")
            return random.randrange(self.action_size)
#         print("Calculating using model")
        print(self.model.predict(state))
        options = self.model.predict(state)
#         print(str(options))
        return np.argmax(options[0])
    
    def getPredict(self, state):
        print("Predict using model")
        options = self.model.predict(state)
        print(str(options))
        return np.argmax(options[0])

    def expReplay(self, batch_size):
        mini_batch = []
#         print("expReplay")
        minibatch = random.sample(self.memory, batch_size)
        for state, action, reward, next_state, done in minibatch:
#             print("For loop")
            target = reward
#             print("target = "+str(target))
#             print("Done = "+str(done))
            if not done:
#                 print("Not Done")
                target = reward + self.gamma * np.amax(self.model.predict(next_state)[0])
            target_f = self.model.predict(state)
#             print("target_f")
#             print(target_f)
#             print(target_f[0][action])
            target_f[0][action] = target
#             print(target_f)
            self.model.fit(state, target_f, epochs=1, verbose=0)
#             print("Self model")
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay
#         print("Wtf model")


In [2]:
import numpy as np
import math

# prints formatted price
def formatPrice(n):
    return ("-$" if n < 0 else "$") + "{0:.2f}".format(abs(n))

# returns the vector containing stock data from a fixed file
def getStockDataVec(key):
    vec = []
    lines = open("data/" + key + ".txt", "r").read().splitlines()

    for line in lines[1:]:
        vec.append(float(line.split(",")[4]))

    return vec


# returns the vector containing stock data from a fixed file
def getStockVolVec(key):
    vol = []
    lines = open("data/" + key + ".csv", "r").read().splitlines()

    for line in lines[1:]:
        vol.append(float(line.split(",")[5]))

    return vol

# returns the sigmoid
def sigmoid(x):
    return 1 / (1 + math.exp(-x))



In [3]:
# State
import numpy as np
import random
import math, random 
import gym 
import numpy as np 


class State:
    def __init__(self, data1, Bal_stock1, open_cash, timestep):
        self.Stock1Price=data1[timestep] #stock 1 open price
        self.Stock1Blnc=Bal_stock1 #stock 1 balance
        self.open_cash=open_cash #cash balance
        self.fiveday_stock1=self.five_day_window(data1, timestep)
        #self.volume1=volume1[timestep]
        #self.volume2=volume2[timestep]
        self.portfolio_value=self.portfolio_value()

    def portfolio_value(self):
        pvalue=0
        #print("In portfolio func")
        #print("self.Stock1Price",self.Stock1Price, type(self.Stock1Price))
        #print("self.Stock1Blnc",self.Stock1Blnc[0], type(self.Stock1Blnc))

        v1=self.Stock1Price * float(self.Stock1Blnc)
        v2=float(self.open_cash)
        return (v1+v2)
    
    def next_opening_price(self):
        return data1[timestep+1]
    
    def five_day_window(self,data, timestep):
        step = timestep
        if step < 5:
            return data[0]
        
        stock_5days = np.mean(data[step-5:step])
        #print("stock_5days=" + str(stock_5days))
        #print(stock_5days)

        #print(type(stock_5days))

        return stock_5days
    
    def reset(self):
        #self.state = torch.FloatTensor(torch.zeros(8)).cuda()
        self.Stock1Price=151.25 #stock 1 open price Google
#         self.Stock2Price=21.845 #stock 2 open price Walmart
        self.Stock1Blnc=34 #stock 1 balance Google
#         self.Stock2Blnc=221 #stock 2 balance Walmart
        self.open_cash=10000 #cash balance
        self.fiveday_stock1=151.25
#         self.fiveday_stock2=21.845
        self.portfolio_value=10000
        
    def getState(self):
        #print("In get state")
        res=[]
        res.append(self.Stock1Price) #stock 1 open price
#         res.append(self.Stock2Price) #stock 2 open price
        res.append(self.Stock1Blnc) #stock 1 balance
#         res.append(self.Stock2Blnc) #stock 2 balance
        res.append(self.open_cash) #cash balance
        res.append(self.fiveday_stock1)
#         res.append(self.fiveday_stock2)        
        res.append(self.portfolio_value)
        #res.append(self.volume1)
        #res.append(self.volume2)


        
        #print(res)
        res1=np.array([res])
        #print("res array"+np.array([res]))
        return res1

In [4]:
import pandas as pd
import sys

import pandas as pd

import math, random 
import numpy as np 
from datetime import datetime, timedelta

#stock_name, window_size, episode_count = sys.argv[1], int(sys.argv[2]), int(sys.argv[3])

stock_name1, episode_count, start_balance, training, test = 'AOT.BK', 100,10000,731,100


pd_data1=pd.read_csv('data/AOT.BK.csv', sep=",", header=0)
# pd_data2=pd.read_csv('data/amzn.us.txt', sep=",", header=0)


In [5]:
# if (pd_data1['Date'][0]>pd_data2['Date'][0]): 
#     #print("Date1 is older than Date2")
#     pd_data1=pd_data1[pd_data1.Date>=pd_data2['Date'][0]]
#     pd_data1=pd_data1.reset_index(drop=True)
# else:
#     #print("Date2>Date1")
#     pd_data2=pd_data2[pd_data2.Date>=pd_data1['Date'][0]]
#     pd_data2=pd_data2.reset_index(drop=True)
    #print("Date2>Date1  and date2 is" + str(pd_data2['Date'][0]) +" Date 1 is : "+ str(pd_data1['Date'][0]))
print("^^")

^^


In [6]:
# Pre -Processing the Datasheet ...Drop Data that is not in both stock data- some days data is missing in Apple and some in Amazon
import datetime
#timestamp = data1_date[10]
#print(timestamp.strftime('%Y-%m-%d'))
#Convert  Date to Date format
pd_data1['Date']=pd.to_datetime(pd_data1['Date'], format='%Y/%m/%d')
# pd_data2['Date']=pd.to_datetime(pd_data2['Date'], format='%Y/%m/%d')

list1= pd_data1['Date']
# list2= pd_data2['Date']
# diff_pd1_data = list(set(list1) - set(list2))
# diff_pd2_data = list(set(list2) - set(list1))
#x11=x[0].strftime('%Y-%m-%d 00:00:00')
#p=datetime.datetime.strptime(x11, "%Y-%m-%d 00:00:00")
#print(p)
# for k in range(len(diff_pd1_data)):
#     pd1_dat_format=diff_pd1_data[k].strftime('%Y-%m-%d 00:00:00')
#     date_format_pd1=datetime.datetime.strptime(pd1_dat_format, "%Y-%m-%d 00:00:00")
#     for i, j in enumerate(list1):
#         if j == date_format_pd1:
#             #print(i)
#             pd_data1=pd_data1.drop([i])            
# pd_data1=pd_data1.reset_index(drop=True)

# for k in range(len(diff_pd2_data)):
#     pd2_dat_format=diff_pd2_data[k].strftime('%Y-%m-%d 00:00:00')
#     date_format_pd2=datetime.datetime.strptime(pd2_dat_format, "%Y-%m-%d 00:00:00")
#     for M, N in enumerate(list2):
#         if N == date_format_pd2:
#             #print(M)
#             pd_data2=pd_data2.drop([M])
            
# pd_data2=pd_data2.reset_index(drop=True) 



In [7]:
#Training Data
pd_data1_train=pd_data1[0:training]
# pd_data2_train=pd_data2[0:training]
#Test Data
pd_data1_test=pd_data1[training:training+test]
# pd_data2_test=pd_data2[training:training+test]



vol1_train=getStockVolVec(stock_name1)
# vol2_train=getStockVolVec(stock_name2)



In [8]:
#Initialize state and set benchmarking model



# total_Prof=[]
# done=False

#Benchmark Model
#In this model, we would divide 


#Initialize state and set benchmarking model


#print(df_data1)
total_Prof=[]
done=False

Act_datasize = training
batch_size = 64

#Benchmark Model

data1_train=pd_data1_train['Open']
# data2_train=pd_data2_train['Open']

data1_date=pd_data1_train['Date']
# Start with half of money bought stock1
Act_Bench_Stock1_Bal=int(np.floor((start_balance/2)/data1_train[0]))
# Act_Bench_Stock2_Bal=int(np.floor((start_balance/4)/data2_train[0]))
Act_Bench_Open_cash=start_balance/2




### Program to calculate benchmark profit


#sell 10% of stock in 10 intervals
#Example 100 days of data set.Sold every 10 days of 10%.

interval=int(Act_datasize/10)
Total_Stock1_Amount= 0
# Total_Stock2_Amount= 0
stocks2Value = 0
# stocks1Value = 0

Act_stocks1=np.floor(Act_Bench_Stock1_Bal /10)
# Act_stocks2=np.floor(Act_Bench_Stock2_Bal /10)
print("Buy Stock "+stock_name1+" = "+str(Act_stocks1)+" unit")
# print(str(Act_stocks2))

remaining_stock1=Act_Bench_Stock1_Bal
# remaining_stock2=Act_Bench_Stock2_Bal
ttl=0

Benchmark_Port_Value=[]


for j in range (interval,Act_datasize+1,interval):
        #print("closing prices : " + str(data1_train[j-1]) )
        Price_closing_Stock1=data1_train[j-1]
#         Price_closing_Stock2=data2_train[j-1]
        
        date_stock1=data1_date[j-1].strftime('%Y-%m-%d')
        #print(date_stock1)
                
        stocks1Value= Act_stocks1 * Price_closing_Stock1
#         stocks2Value= Act_stocks2 * Price_closing_Stock2
        remaining_stock1=remaining_stock1-Act_stocks1
#         remaining_stock2=remaining_stock2-Act_stocks2
        #print("J is:"+ str(j))
        
        
        
        Stock1_Port_value=remaining_stock1*Price_closing_Stock1
#         Stock2_Port_value=remaining_stock2*Price_closing_Stock2
        Act_Bench_Open_cash=Act_Bench_Open_cash+stocks1Value+stocks2Value #Adding 10% sold value into open cash
        
        Total_Portfolio_value=Act_Bench_Open_cash+Stock1_Port_value
        Benchmark_Port_Value.append([date_stock1,Total_Portfolio_value])
        



#print ("total_Test_Benchmark_amount : " +  str(Total_Portfolio_value))

Training_Benchmark_Portfolio_Value= Total_Portfolio_value




print("Benchmark_Profit is  " + str(Training_Benchmark_Portfolio_Value) +"with Apple Stocks:  " + str(remaining_stock1))


#Define arrays to store per episode values 
total_Prof=[]
total_stock1bal=[]
# total_stock2bal=[]
total_open_cash=[]
total_port_value=[]
total_days_played=[]

Buy Stock AOT.BK = 17.0 unit
Benchmark_Profit is  12640.599898with Apple Stocks:  8.0


In [9]:
# #Initialize Agent
# agent = Agent(5)
# Bal_stock1=int(0)
# open_cash=start_balance
    
# datasize=training
# reward = 0
# state_class_obj= State(data1_train, Bal_stock1, open_cash,0)
# state_array_obj=state_class_obj.getState()
# action = agent.act(state_array_obj)

W0901 19:14:22.149538 4429239744 deprecation_wrapper.py:119] From /Users/watwattanagaroon/anaconda3/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:74: The name tf.get_default_graph is deprecated. Please use tf.compat.v1.get_default_graph instead.

W0901 19:14:22.162090 4429239744 deprecation_wrapper.py:119] From /Users/watwattanagaroon/anaconda3/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:517: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead.

W0901 19:14:22.164623 4429239744 deprecation_wrapper.py:119] From /Users/watwattanagaroon/anaconda3/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:4138: The name tf.random_uniform is deprecated. Please use tf.random.uniform instead.

W0901 19:14:22.220829 4429239744 deprecation_wrapper.py:119] From /Users/watwattanagaroon/anaconda3/lib/python3.7/site-packages/keras/optimizers.py:790: The name tf.train.Optimizer is deprecated. Please use tf.compat.v1.train.Opti

In [10]:
# # #Initialize Agent
# # agent = Agent(5)
# # Bal_stock1=int(0)
# # open_cash=start_balance
    
# # datasize=training
# # reward = 0
# # state_class_obj= State(data1_train, Bal_stock1, open_cash,0)
# # state_array_obj=state_class_obj.getState()
# # action = agent.act(state_array_obj)
# print("Agent memory = "+ str(len(agent.memory)))
# print("Action ="+ str(action))
# print(agent.getPredict(state_array_obj))
# next_state_class_obj=State(data1_train, Bal_stock1, open_cash,1)
# next_state_array_obj=next_state_class_obj.getState()
# print("..........")   
# #                          state, action, reward, next_state, done
# agent.memory.append((state_array_obj, 1, -500000, next_state_array_obj, True))
# # agent.expReplay(len(agent.memory))
# print(agent.getPredict(next_state_array_obj))
# print("Agent memory = "+ str(len(agent.memory)))

W0901 19:14:22.238878 4429239744 deprecation_wrapper.py:119] From /Users/watwattanagaroon/anaconda3/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:2741: The name tf.Session is deprecated. Please use tf.compat.v1.Session instead.

W0901 19:14:22.240993 4429239744 deprecation_wrapper.py:119] From /Users/watwattanagaroon/anaconda3/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:174: The name tf.get_default_session is deprecated. Please use tf.compat.v1.get_default_session instead.



Agent memory = 0
Action =2
Predict using model
[[ -504.17358 -1849.4805   1784.0701 ]]
2
..........
Predict using model
[[ -504.18762 -1849.519    1784.1592 ]]
2
Agent memory = 1


In [11]:
#Training run

import csv


for e in range(episode_count + 1):
    print("..........")
    print("Episode " + str(e) + "/" + str(episode_count))
    
    Bal_stock1=int(0)
#     Bal_stock1 = int(np.floor((start_balance/2)/data1_train[0]))
#     Bal_stock2=int(np.floor((start_balance/4)/data2_train[0]))
    open_cash=start_balance
    datasize=training
    done=False
    total_profit = 0
    reward = 0
    
    #Initialize Agent
    agent = Agent(5)
    agent.inventory1 =[]
    open_cash_t1=open_cash
#     agent.inventory2 =[]
    for i in range(Bal_stock1):
        agent.inventory1.append(data1_train[0])
    Bal_stock1_t1 = len(agent.inventory1)
#     for i in range(Bal_stock2):
#         agent.inventory2.append(data2_train[0]) 
    
    
    #Timestep delta to make sure that with time reward increases for taking action
    #timestep_delta=0
    
    #Running episode over all days in the datasize
    for t in range(datasize):
#         print(datasize)
        #print(pd_data1_train.iloc[t,0])
        state_class_obj= State(data1_train, Bal_stock1, open_cash,t)
        state_array_obj=state_class_obj.getState()
        action = agent.act(state_array_obj)
                   
        change_percent_stock1=(state_class_obj.Stock1Price-state_class_obj.fiveday_stock1)/state_class_obj.fiveday_stock1*100
#         change_percent_stock2=(state_class_obj.Stock2Price-state_class_obj.fiveday_stock2)/state_class_obj.fiveday_stock2*100
        
        #print("change_percent_stock1:  "+str(change_percent_stock1))
        #print("change_percent_stock2:  "+str(change_percent_stock2))
        
        
        if action == 0:  #buy stock 1
            if state_class_obj.Stock1Price > state_class_obj.open_cash:
                '''
                print("Buy stock 1 when it did not have cash, so bankrupt, end of episode")
                reward=-reward_timedelta*10
                done = True
                '''
                #If agent is trying to buy when it has no cash but has stock1 and stock2 balance then, 
                #it should pick from other actions
                #if (state_class_obj.Stock1Blnc>1) and  (state_class_obj.Stock2Blnc>1):
                 #   action=random.sample([1, 2, 4, 5, 6],  1)  # Choose 1 elements from sell actions
                #else:    
                #print("Bankrupt")
                reward=-200000
                done = True
                #end episode
                     
            else:
                #print("In Buy stock 1")
                agent.inventory1.append(data1_train[t])
                Bal_stock1_t1= len(agent.inventory1)
#                 Bal_stock2_t1=len(agent.inventory2)
                open_cash_t1=state_class_obj.open_cash-state_class_obj.Stock1Price #Here we are buying 1 stock
                
                #needs to be reviewed
                if(Bal_stock1_t1 == 0):
                    reward = 300000
                elif(state_class_obj.open_cash<500):
                    reward=-100000
                elif (state_class_obj.Stock1Price > state_class_obj.fiveday_stock1):
                    reward+=1000
#                 elif (abs(change_percent_stock1)<=2):
#                     reward=-10000
#                 else:  
#                     reward=-change_percent_stock1*100
                

               
                
        if action == 1:  #sell stock 1
            if state_class_obj.Stock1Blnc <1 :
               # print("sold stock 2 when it did not have stock 2, so bankrupt, end of episode")
                reward=-5000000
                done = True
                #end episode
            else:
                #print("In sell stock 1")
                bought_price1=agent.inventory1.pop(0)
                Bal_stock1_t1= len(agent.inventory1)
                Bal_stock2_t1=len(agent.inventory2)
                open_cash_t1=state_class_obj.open_cash+state_class_obj.Stock1Price #State[0] is the price of stock 1. Here we are buying 1 stoc
          
                if(state_class_obj.Stock1Blnc<10):
                    reward=-100000
                elif (abs(change_percent_stock1)<=2):
                    reward=-10000
                else:
                    reward=change_percent_stock1*100 #State[0] is the price of stock 1. Here we are buying 1 stock
                
                #total_profit += data1_train[t] - bought_price1
            #print("reward for sell stock1 " + str(reward))
                
        


#        TODO Config logic in this Action 
        if action == 2:             # Do nothing action    
                if (abs(change_percent_stock1)<=2):
                    reward=10000
                elif (state_class_obj.open_cash<0.1*start_balance):
                    reward=-1000000
                elif (abs(change_percent_stock1)<=2):
                    reward=1000
#                 else:
#                     reward=-100000
                
                Bal_stock1_t1= len(agent.inventory1)
#                 Bal_stock2_t1=len(agent.inventory2)
                open_cash_t1=open_cash
               # print("Do nothing")
        
        
#         if action == 3:  #buy stock 2
#             if state_class_obj.Stock2Price > state_class_obj.open_cash:
#                 '''
#                 print("Buy stock 2 when it did not have cash, so bankrupt, end of episode")
#                 reward=-reward_timedelta*10
#                 done = True
                
#                 '''
#                 #If agent is trying to buy when it has no cash but has stock1 and stock2 balance then, 
#                 #it should pick from other actions
#                 #if (state_class_obj.Stock1Blnc>1) and  (state_class_obj.Stock2Blnc>1):
#                  #   action=random.sample([1, 2, 4, 5, 6],  1)  # Choose 1 elements from sell actions
#                 #else:    
#                 #print("Bankrupt")
#                 reward=-200000
#                 done = True
#                      #end episode   
#             else:
#                 #print("In Buy stock 2")
#                 agent.inventory2.append(data2_train[t])
#                 Bal_stock1_t1= len(agent.inventory1)
#                 Bal_stock2_t1=len(agent.inventory2)
#                 open_cash_t1=state_class_obj.open_cash-state_class_obj.Stock2Price
                
#                 if(state_class_obj.open_cash<5000):
#                     reward=-100000
#                 elif (abs(change_percent_stock2)<=2):
#                     reward=-10000
#                 else:
#                     reward=-change_percent_stock2*100
 
        
#         if action == 4:  #sell stock 2
#             if state_class_obj.Stock2Blnc <1 :
#                     #print("sold stock 2 when it did not have stock 2, so bankrupt, end of episode")
#                     reward=-200000
#                     done = True
#                 #end episode
#             else:
#                 #print("In sell stock 2")
#                 bought_price2=agent.inventory2.pop(0)
#                 Bal_stock1_t1= len(agent.inventory1)
#                 Bal_stock2_t1=len(agent.inventory2)
#                 open_cash_t1=state_class_obj.open_cash+state_class_obj.Stock2Price
    
              
#                 if(state_class_obj.Stock2Blnc<10):
#                     reward=-100000
#                 elif (abs(change_percent_stock2)<=2):
#                     reward=-10000
#                 else:
#                     reward=change_percent_stock2*100 
                
                
#                 total_profit += state_class_obj.Stock2Price - bought_price2

#                # print("reward for selling stock2: " + str(reward))
            
        
        
        #print("reward:  "+str(reward))
        #if done!= False:done = True if t == datasize
        if t == datasize-1:
            #print("t==datasize")
            done=True
            next_state_class_obj=State(data1_train, Bal_stock1_t1, open_cash_t1,t)
            next_state_array_obj=next_state_class_obj.getState()
        else:
            next_state_class_obj=State(data1_train, Bal_stock1_t1, open_cash_t1,t+1)
            next_state_array_obj=next_state_class_obj.getState()
            
        agent.memory.append((state_array_obj, action, reward, next_state_array_obj, done))
        #print("Action is "+str(action)+" reward is" + str(reward))
         
        Bal_stock1=Bal_stock1_t1
#         Bal_stock2= Bal_stock2_t1
        open_cash=open_cash_t1
        
        
      #  print("total_profit on day basis " + str(total_profit) +"on day"+str(t) + "stock 1 number: " + 
        #      str(len(agent.inventory1))+"/"+str(next_state_class_obj.Stock1Blnc)+" stock2 number:"+
         #         str(len(agent.inventory2)) +"/"+str(next_state_class_obj.Stock2Blnc)+
          #        "open cash: "+str(next_state_class_obj.open_cash))
        
       # print("doneAction" + str(done))
       # print("--------------------------------") 
       
        
        
        if done==True:
            #print("--------------------------------")
           # print("Total Profit: " + formatPrice(total_profit))
           # print("Total No. of days played: " + str(t)+ "  out of overall days:  " + str(datasize))
           # print("Total portfolio value: " + str(next_state_class_obj.portfolio_value)+ 
             #     "  stock 1 number: " + str(len(agent.inventory1))
            #      +"  stock 2 number: "+str(len(agent.inventory2))+"  open cash"+str(next_state_class_obj.open_cash))

            total_Prof.append(total_profit)
            total_stock1bal.append(len(agent.inventory1))
#             total_stock2bal.append(len(agent.inventory2))
            total_open_cash.append(state_class_obj.open_cash)
            total_port_value.append(state_class_obj.portfolio_value)
            total_days_played.append(t)
            if len(agent.memory) <= batch_size:
                print("^_^")
                agent.expReplay(len(agent.memory))


            print("--------------------------------")
#             state_class_obj.reset()
            break
           
          

        if len(agent.memory) > batch_size:
            agent.expReplay(batch_size)


    if e % 10 == 0:
        agent.model.save("models/model_ep" + str(e))
        

        
#print("Total Apple stocks in episodes"+ str(total_stock1bal))
#print("Total Amazon stocks in episodes"+ str(total_stock2bal))
#print("Total Open cash in episodes"+ str(total_open_cash))
#print("Total Portfolio value in episodes"+ str(total_port_value))
#print("Total Days in episodes"+ str(total_days_played))
#print("Benchmark_Profit is  " + str(int(Benchmark_Portfolio_Value)) +"   with Apple Stocks: " + str(Bench_Stock1_Bal) + 
    #  "   and Amazon stocks: "+ str(Bench_Stock2_Bal) )






..........
Episode 0/100
^_^
--------------------------------
..........
Episode 1/100
^_^
--------------------------------
..........
Episode 2/100
^_^
--------------------------------
..........
Episode 3/100
^_^
--------------------------------
..........
Episode 4/100
^_^
--------------------------------
..........
Episode 5/100
^_^
--------------------------------
..........
Episode 6/100
--------------------------------
..........
Episode 7/100
^_^
--------------------------------
..........
Episode 8/100
--------------------------------
..........
Episode 9/100
^_^
--------------------------------
..........
Episode 10/100
^_^
--------------------------------
..........
Episode 11/100
^_^
--------------------------------
..........
Episode 12/100
^_^
--------------------------------
..........
Episode 13/100
^_^
--------------------------------
..........
Episode 14/100
^_^
--------------------------------
..........
Episode 15/100
^_^
--------------------------------
..........

In [12]:
print("Total Apple stocks in episodes"+ str(total_stock1bal))
print("///////")
# print("Total Amazon stocks in episodes"+ str(total_stock2bal))
print("Total Open cash in episodes"+ str(total_open_cash))
print("Total Portfolio value in episodes"+ str(total_port_value))
print("------------------------------")
print("Total Days in episodes"+ str(total_days_played))

print("Benchmark_Profit is  " + str(int(Training_Benchmark_Portfolio_Value)) +"   with remaining Apple Stocks: " + str(remaining_stock1))

Total Apple stocks in episodes[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 21, 0, 0, 0, 0, 37, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 17, 0, 0, 0, 0, 0, 0, 0, 13, 0, 0, 0, 0, 0, 0, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 10, 0, 0, 0]
///////
Total Open cash in episodes[9999.9, 10000, 10000, 10000, 10003.500002999997, 10000.199999999999, 9994.300001000001, 9998.9, 10002.700003999997, 10000, 9998.9, 10000.3, 10005.300003999995, 9987.600006999995, 10000, 9998.9, 9259.350011999994, 10000, 10000, 10004.100002, 10000, 8704.00000899999, 10000, 10000, 10000.300006999994, 10000, 10000, 10000, 10000, 10000.9, 9996.599998999998, 10000, 9998.9, 10001.4, 10001.5, 10000.6, 10000.700004999997, 10000, 10006.300003, 10000, 10000, 10000, 9999.700002, 9999.9, 10004.4, 10000, 10005.2, 9998.9, 10000, 10000, 10000, 10002.2, 10000, 9606.450003999997, 10000.9, 10000, 10000, 10011.999998000001, 10000, 100

In [26]:
pd_data1_test=pd_data1_test.reset_index(drop=True)
data1_test=pd_data1_test['Close']
#Training run

import csv

from keras.models import load_model

model_name='model_ep90'

model = load_model("models/" + model_name)

initial_cash = state_class_obj.portfolio_value
#Define arrays to store per episode values 
total_Prof=[]
total_stock1bal=[]
# total_stock2bal=[]
total_open_cash=[]
total_port_value=[]
total_days_played=[]

for e in range(1):
    print("..........")
    
    Bal_stock1=int(0)
    open_cash=start_balance
    datasize=test
    done=False
    total_profit = 0
    reward = 0
    
    #Initialize Agent
#     agent = Agent(5)
    agent = Agent(5, is_eval=True, model_name=model_name)
    agent.inventory1 =[]
    open_cash_t1=open_cash
    for i in range(Bal_stock1):
        agent.inventory1.append(data1_train[0])
        
    Bal_stock1_t1 = len(agent.inventory1)
    
    #Running episode over all days in the datasize
    for t in range(datasize):
#         print(datasize)
        #print(pd_data1_train.iloc[t,0])
        state_class_obj= State(data1_train, Bal_stock1, open_cash,t)
        state_array_obj=state_class_obj.getState()
        action = agent.act(state_array_obj)
        print(action)           
        change_percent_stock1=(state_class_obj.Stock1Price-state_class_obj.fiveday_stock1)/state_class_obj.fiveday_stock1*100
#         change_percent_stock2=(state_class_obj.Stock2Price-state_class_obj.fiveday_stock2)/state_class_obj.fiveday_stock2*100
        
        #print("change_percent_stock1:  "+str(change_percent_stock1))
        #print("change_percent_stock2:  "+str(change_percent_stock2))
        
        
        if action == 0:  #buy stock 1
            if state_class_obj.Stock1Price > state_class_obj.open_cash:
                '''
                print("Buy stock 1 when it did not have cash, so bankrupt, end of episode")
                reward=-reward_timedelta*10
                done = True
                '''
                #If agent is trying to buy when it has no cash but has stock1 and stock2 balance then, 
                #it should pick from other actions
                #if (state_class_obj.Stock1Blnc>1) and  (state_class_obj.Stock2Blnc>1):
                 #   action=random.sample([1, 2, 4, 5, 6],  1)  # Choose 1 elements from sell actions
                #else:    
                #print("Bankrupt")
#                 reward=-200000
                done = True
                #end episode
                     
            else:
                #print("In Buy stock 1")
                agent.inventory1.append(data1_train[t])
                Bal_stock1_t1= len(agent.inventory1)
#                 Bal_stock2_t1=len(agent.inventory2)
                open_cash_t1=state_class_obj.open_cash-state_class_obj.Stock1Price #Here we are buying 1 stock
                
                #needs to be reviewed
#                 if(Bal_stock1_t1 == 0):
#                     reward = 300000
#                 elif(state_class_obj.open_cash<500):
#                     reward=-100000
#                 elif (state_class_obj.Stock1Price > state_class_obj.fiveday_stock1):
#                     reward+=1000
#                 elif (abs(change_percent_stock1)<=2):
#                     reward=-10000
#                 else:  
#                     reward=-change_percent_stock1*100
                

               
                
        if action == 1:  #sell stock 1
            if state_class_obj.Stock1Blnc <1 :
               # print("sold stock 2 when it did not have stock 2, so bankrupt, end of episode")
#                 reward=-5000000
                done = True
                #end episode
            else:
                #print("In sell stock 1")
                bought_price1=agent.inventory1.pop(0)
                Bal_stock1_t1= len(agent.inventory1)
                Bal_stock2_t1=len(agent.inventory2)
                open_cash_t1=state_class_obj.open_cash+state_class_obj.Stock1Price #State[0] is the price of stock 1. Here we are buying 1 stoc
                #total_profit += data1_train[t] - bought_price1
            #print("reward for sell stock1 " + str(reward))
                
        


#        TODO Config logic in this Action 
        if action == 2:             # Do nothing action    
#                 if (abs(change_percent_stock1)<=2):
# #                     reward=10000
#                 elif (state_class_obj.open_cash<0.1*start_balance):
#                     reward=-1000000
#                 elif (abs(change_percent_stock1)<=2):
#                     reward=1000
#                 else:
#                     reward=-100000
                
                Bal_stock1_t1= len(agent.inventory1)
#                 Bal_stock2_t1=len(agent.inventory2)
                open_cash_t1=open_cash
               # print("Do nothing")
        
            
        
        
        #print("reward:  "+str(reward))
        #if done!= False:done = True if t == datasize
        if t == datasize-1:
            #print("t==datasize")
            done=True
            next_state_class_obj=State(data1_train, Bal_stock1_t1, open_cash_t1,t)
            next_state_array_obj=next_state_class_obj.getState()
        else:
            next_state_class_obj=State(data1_train, Bal_stock1_t1, open_cash_t1,t+1)
            next_state_array_obj=next_state_class_obj.getState()
            
        agent.memory.append((state_array_obj, action, reward, next_state_array_obj, done))
        #print("Action is "+str(action)+" reward is" + str(reward))
         
        Bal_stock1=Bal_stock1_t1
#         Bal_stock2= Bal_stock2_t1
        open_cash=open_cash_t1
        
        
      #  print("total_profit on day basis " + str(total_profit) +"on day"+str(t) + "stock 1 number: " + 
        #      str(len(agent.inventory1))+"/"+str(next_state_class_obj.Stock1Blnc)+" stock2 number:"+
         #         str(len(agent.inventory2)) +"/"+str(next_state_class_obj.Stock2Blnc)+
          #        "open cash: "+str(next_state_class_obj.open_cash))
        
       # print("doneAction" + str(done))
       # print("--------------------------------") 
       
        
        
        if done==True:
            #print("--------------------------------")
           # print("Total Profit: " + formatPrice(total_profit))
           # print("Total No. of days played: " + str(t)+ "  out of overall days:  " + str(datasize))
           # print("Total portfolio value: " + str(next_state_class_obj.portfolio_value)+ 
             #     "  stock 1 number: " + str(len(agent.inventory1))
            #      +"  stock 2 number: "+str(len(agent.inventory2))+"  open cash"+str(next_state_class_obj.open_cash))

            print("Total Apple stocks in episodes"+ str(Bal_stock1))
            print("///////")
            # print("Total Amazon stocks in episodes"+ str(total_stock2bal))
            print("Total Open cash in episodes"+ str(open_cash))
            print("Total Portfolio value in episodes"+ str(state_class_obj.portfolio_value))
            print("------------------------------")
            print("Total Days in episodes"+ str(t))

#             print("Benchmark_Profit is  " + str(int(Training_Benchmark_Portfolio_Value)) +"   with remaining Apple Stocks: " + str(remaining_stock1))
# #             if len(agent.memory) <= batch_size:
#                 print("^_^")
#                 agent.expReplay(len(agent.memory))


            print("--------------------------------")
#             state_class_obj.reset()
            break
           
          




  







..........
[[-2631.9841 -1070.4501  2292.6062]]
2
[[-2632.0227 -1070.5747  2292.6143]]
2
[[-2632.0017 -1070.5066  2292.61  ]]
2
[[-2631.952  -1070.3483  2292.5984]]
2
[[-2631.9458 -1070.3254  2292.5981]]
2
[[-2631.9546 -1070.3367  2292.606 ]]
2
[[-2631.972  -1070.3473  2292.6243]]
2
[[-2631.9866 -1070.3014  2292.6577]]
2
[[-2632.0366 -1070.3801  2292.695 ]]
2
[[-2632.0325 -1070.3802  2292.69  ]]
2
[[-2632.0076 -1070.324   2292.677 ]]
2
[[-2632.0059 -1070.3125  2292.6782]]
2
[[-2632.01   -1070.3125  2292.683 ]]
2
[[-2632.0085 -1070.3124  2292.6816]]
2
[[-2631.9995 -1070.2555  2292.689 ]]
2
[[-2631.9763 -1070.1306  2292.7002]]
2
[[-2631.9795 -1070.0619  2292.7263]]
2
[[-2632.011  -1070.0612  2292.766 ]]
2
[[-2632.0557 -1070.1053  2292.8088]]
2
[[-2632.081  -1070.1047  2292.8418]]
2
[[-2632.0718 -1070.0138  2292.8594]]
2
[[-2632.051  -1069.9001  2292.8704]]
2
[[-2632.0886 -1069.9564  2292.899 ]]
2
[[-2632.1328 -1070.058   2292.9219]]
2
[[-2632.1313 -1070.0349  2292.9277]]
2
[[-2632.1255 -

2
[[-2632.065  -1070.0826  2292.8276]]
2
[[-2632.061  -1070.0483  2292.8342]]
2
[[-2632.0645 -1070.0367  2292.842 ]]
2
[[-2632.0732 -1070.0138  2292.861 ]]
2
[[-2632.064  -1069.934   2292.8752]]
2
[[-2632.1038 -1069.9786  2292.9114]]
2
[[-2632.12   -1069.9896  2292.9282]]
2
[[-2632.1519 -1070.0687  2292.9426]]
2
[[-2632.154  -1070.0917  2292.938 ]]
2
[[-2632.158  -1070.137   2292.9285]]
2
[[-2632.105  -1070.0471  2292.8906]]
2
[[-2632.0793 -1069.9908  2292.876 ]]
2
[[-2632.0718 -1069.9683  2292.8738]]
2
[[-2632.0955 -1070.0017  2292.8923]]
2
[[-2632.1028 -1069.9902  2292.906 ]]
2
[[-2632.1213 -1069.9896  2292.9292]]
2
[[-2632.1284 -1069.9894  2292.9392]]
2
[[-2632.1252 -1069.9781  2292.939 ]]
2
[[-2632.1162 -1069.9553  2292.9346]]
2
[[-2632.1155 -1069.9327  2292.9404]]
2
[[-2632.0947 -1069.8422  2292.9436]]
2
[[-2632.1055 -1069.8186  2292.9653]]
2
[[-2632.1196 -1069.7955  2292.991 ]]
2
[[-2632.0974 -1069.6477  2293.0107]]
2
[[-2632.1284 -1069.6241  2293.0579]]
2
[[-2632.1912 -1069.7023