In [4]:
import keras
from keras.models import Sequential
from keras.models import load_model
from keras.layers import Dense
from keras.optimizers import Adam

import numpy as np
import random
from collections import deque

class Agent:
	def __init__(self, state_size, is_eval=False, model_name=""):
		self.state_size = state_size # normalized previous days
		self.action_size = 3 # sit, buy, sell
		self.memory = deque(maxlen=1000)
		self.inventory = []
		self.model_name = model_name
		self.is_eval = is_eval

		self.gamma = 0.95
		self.epsilon = 1.0
		self.epsilon_min = 0.01
		self.epsilon_decay = 0.995
	
		self.model = load_model("model/" + model_name) if is_eval else self._model()

	def _model(self):
		model = Sequential()
		model.add(Dense(units=64, input_dim=self.state_size, activation="relu"))
		model.add(Dense(units=32, activation="relu"))
		model.add(Dense(units=8, activation="relu"))
		model.add(Dense(self.action_size, activation="linear"))
		model.compile(loss="mse", optimizer=Adam(lr=0.001))

		return model

	def act(self, state):
		if not self.is_eval and np.random.rand() <= self.epsilon:
			return random.randrange(self.action_size)

		options = self.model.predict(state)
		return np.argmax(options[0])

	def expReplay(self, batch_size):
		mini_batch = []
		l = len(self.memory)
		for i in range(l - batch_size + 1, l):
			mini_batch.append(self.memory[i])

		for state, action, reward, next_state, done in mini_batch:
			target = reward
			if not done:
				target = reward + self.gamma * np.amax(self.model.predict(next_state)[0])

			target_f = self.model.predict(state)
			target_f[0][action] = target
			self.model.fit(state, target_f, epochs=1, verbose=0)

		if self.epsilon > self.epsilon_min:
			self.epsilon *= self.epsilon_decay 

import numpy as np
import math
import io
import gym
import pandas as pd

class State:
	def __init__(self, data1, timestep):
		self.Stock1Price=data1[timestep] #stock 1 open price
		self.fiveday_stock1=self.five_day_window(data1, timestep)
# prints formatted price
	def formatPrice(n):
		return ("-$" if n < 0 else "$") + "{0:.2f}".format(abs(n))

	# returns the vector containing stock data from a fixed file

	# returns the sigmoid
	# def sigmoid(x):
	#     return 1 / (1 + math.exp(-x))

	# returns an an n-day state representation ending at time t
	# def getState(data, t, n):
	# 	d = t - n + 1
	# 	block = data[d:t + 1] if d >= 0 else -d * [data[0]] + data[0:t + 1] # pad with t0
	# 	res = []
	# 	for i in range(n - 1):
	# 		res.append(sigmoid(block[i + 1] - block[i]))

	# 	return np.array([res])
	def getState(self):
		#print("In get state")
		res=[]
		res.append(self.Stock1Price) #stock 1 open price
# 		res.append(self.Stock1Blnc) #stock 1 balance
# 		res.append(self.open_cash) #cash balance
		res.append(self.fiveday_stock1)
# 		res.append(self.portfolio_value)
		#res.append(self.volume1)
		#res.append(self.volume2)


		
		#print(res)
		res1=np.array([res])
		#print("res array"+np.array([res]))
		return res1
	    
	def five_day_window(self,data, timestep):
		step = timestep
		if step < 5:
			return data[0]
		stock_5days = np.mean(data[step-5:step])
		#print("stock_5days=" + str(stock_5days))
		#print(stock_5days)

		#print(type(stock_5days))

		return stock_5days

def getStockDataVec(key):
	vec = []
	lines=pd.read_csv('data/'+key+'.csv', sep=",", header=0).values
	for line in lines[1:]:
		vec.append(line[4])
	return vec

def getStockData(key):
	return pd.read_csv('data/'+key+'.csv', sep=",", header=0)

def formatPrice(n):
	return ("-$" if n < 0 else "$") + "{0:.2f}".format(abs(n))

In [5]:
import sys

sys.argv = ["AOT.BK",2,50]

stock_name, window_size, episode_count = sys.argv[0], int(sys.argv[1]), int(sys.argv[2])

agent = Agent(window_size)
data = getStockDataVec(stock_name)
rawData = getStockData(stock_name)
# l = len(data) - 1
l = 245
batch_size = 32

In [None]:
import matplotlib.pyplot as plt
import datetime
import numpy as np

%matplotlib inline
# rawData['Date']=pd.to_datetime(rawData['Date'], format='%Y/%m/%d')
x2 = np.array(rawData['Date'])
y2 = rawData['Open']
y22= rawData['Volume']


plt.title(stock_name+" Stock Performance years")
plt.xlabel("Year")
plt.ylabel("Price in $")

plt.plot(x2,y2)


ax2 = plt.twinx()  # instantiate a second axes that shares the same x-axis

color = 'tab:red'
ax2.set_ylabel('volume', color=color)  # we already handled the x-label with ax1
ax2.plot(x2, y22, color=color)
ax2.tick_params(axis='y', labelcolor=color)




plt.show()

In [8]:
# import sys

# sys.argv = ["AOT.BK",10,1]

# stock_name, window_size, episode_count = sys.argv[0], int(sys.argv[1]), int(sys.argv[2])

# agent = Agent(window_size)
# data = getStockDataVec(stock_name)
# l = len(data) - 1
# # l = 245
# batch_size = 32

for e in range(episode_count + 1):

	print("Episode " + str(e) + "/" + str(episode_count))
	state_class_obj= State(data, e)
# 	state = getState(data, 0, window_size + 1)
	state = state_class_obj.getState()
# 	print(state)  
	total_profit = 0
	agent.inventory = []
	done = False

	for t in range(l):
		action = agent.act(state)
		# sit
		reward = 0

		if action == 1: # buy
			agent.inventory.append(data[t])
			#print("Buy: " + formatPrice(data[t]))

		elif action == 2: # sell
			if len(agent.inventory) == 0:
				done = True
				print("Nothing left to sell")
				reward = -100000                
			else:           
				bought_price = agent.inventory.pop(0)
				reward = max(data[t] - bought_price, 0)
				total_profit += data[t] - bought_price
			#print("Sell: " + formatPrice(data[t]) + " | Profit: " + formatPrice(data[t] - bought_price))

		if t == l - 1:
			done = True 
        
		nextState_class_obj= State(data, e+1)
		next_state = nextState_class_obj.getState()
# 		print(next_state)        
		agent.memory.append((state, action, reward, next_state, done))
		state = next_state

		if done:
			print("--------------------------------")
			print("Total Profit: " + formatPrice(total_profit))
			print("--------------------------------")
			break
# 		print(len(agent.memory))
		if len(agent.memory) > batch_size:
			agent.expReplay(batch_size)

	if e % 10 == 0:
		name = 'model/model_ep' + str(e)
		agent.model.save(name)

Episode 0/50
--------------------------------
Total Profit: $5.70
--------------------------------
Episode 1/50
--------------------------------
Total Profit: $0.00
--------------------------------
Episode 2/50
--------------------------------
Total Profit: $0.20
--------------------------------
Episode 3/50
Nothing left to sell
--------------------------------
Total Profit: $0.00
--------------------------------
Episode 4/50
--------------------------------
Total Profit: $2.10
--------------------------------
Episode 5/50
--------------------------------
Total Profit: $0.00
--------------------------------
Episode 6/50
--------------------------------
Total Profit: $6.00
--------------------------------
Episode 7/50
--------------------------------
Total Profit: $0.00
--------------------------------
Episode 8/50
--------------------------------
Total Profit: $111.70
--------------------------------
Episode 9/50
Nothing left to sell
--------------------------------
Total Profit: $0.00

In [3]:
import keras
from keras.models import load_model

import sys

if len(sys.argv) != 3:
	print("Usage: python evaluate.py [stock] [model]")
	exit()
sys.argv = ['AOT.BK','model_ep50']
stock_name, model_name = sys.argv[0], sys.argv[1]
model = load_model("model/" + model_name)
window_size = model.layers[0].input.shape.as_list()[1]

agent = Agent(window_size, True, model_name)
data = getStockDataVec(stock_name)
l = 100
batch_size = 32
print(window_size)
# print(state)
total_profit = 0
agent.inventory = []
print(l)
for t in range(l):
	state_class_obj= State(data, t)
# 	state = getState(data, 0, window_size + 1)
	state = state_class_obj.getState()
	action = agent.act(state)
	print(str(t)+" "+str(action))
	# sit
# 	next_state = getState(data, t + 1, window_size + 1)
	nextState_class_obj= State(data, t+1)
	next_state = nextState_class_obj.getState()
	reward = 0

	if action == 1: # buy
		agent.inventory.append(data[t])
		print("Buy: " + formatPrice(data[t]))

	elif action == 2 and len(agent.inventory) > 0: # sell
		bought_price = agent.inventory.pop(0)
		reward = max(data[t] - bought_price, 0)
		total_profit += data[t] - bought_price
		print("Sell: " + formatPrice(data[t]) + " | Profit: " + formatPrice(data[t] - bought_price))

	done = True if t == l - 1 else False
	agent.memory.append((state, action, reward, next_state, done))
	state = next_state

	if done:
		print(stock_name + " Total Profit: " + formatPrice(total_profit))
		print("--------------------------------")
		print(agent.inventory)


Usage: python evaluate.py [stock] [model]
2
100
0 1
Buy: $27.20
1 1
Buy: $28.30
2 1
Buy: $28.90
3 1
Buy: $29.10
4 1
Buy: $29.10
5 1
Buy: $29.00
6 1
Buy: $28.40
7 1
Buy: $28.90
8 1
Buy: $28.90
9 1
Buy: $29.00
10 1
Buy: $29.00
11 1
Buy: $29.00
12 1
Buy: $29.50
13 1
Buy: $31.00
14 1
Buy: $31.40
15 1
Buy: $31.30
16 1
Buy: $31.10
17 1
Buy: $30.90
18 1
Buy: $32.40
19 1
Buy: $32.40
20 1
Buy: $32.20
21 1
Buy: $31.40
22 1
Buy: $31.60
23 1
Buy: $32.00
24 1
Buy: $31.40
25 1
Buy: $31.50
26 1
Buy: $31.90
27 1
Buy: $31.60
28 1
Buy: $31.20
29 1
Buy: $31.20
30 1
Buy: $30.70
31 1
Buy: $31.30
32 1
Buy: $30.70
33 1
Buy: $31.10
34 1
Buy: $30.70
35 1
Buy: $30.60
36 1
Buy: $30.40
37 1
Buy: $31.40
38 1
Buy: $30.90
39 1
Buy: $30.30
40 1
Buy: $29.70
41 1
Buy: $29.30
42 1
Buy: $30.10
43 1
Buy: $29.70
44 1
Buy: $29.50
45 1
Buy: $29.70
46 1
Buy: $29.60
47 1
Buy: $29.50
48 1
Buy: $29.40
49 1
Buy: $29.10
50 1
Buy: $29.80
51 1
Buy: $29.90
52 1
Buy: $29.70
53 1
Buy: $30.30
54 1
Buy: $29.80
55 1
Buy: $30.10
56 1
Buy: 

E0829 23:07:24.579704 4565046720 alias.py:221] Invalid alias: The name clear can't be aliased because it is another magic command.
E0829 23:07:24.580564 4565046720 alias.py:221] Invalid alias: The name more can't be aliased because it is another magic command.
E0829 23:07:24.581372 4565046720 alias.py:221] Invalid alias: The name less can't be aliased because it is another magic command.
E0829 23:07:24.582450 4565046720 alias.py:221] Invalid alias: The name man can't be aliased because it is another magic command.
