In [1]:
import numpy as np
import math

# prints formatted price
def formatPrice(n):
	return ("-$" if n < 0 else "$") + "{0:.2f}".format(abs(n))

# returns the vector containing stock data from a fixed file
def getStockDataVec(key):
	vec = []
	lines = open(key + ".csv", "r").read().splitlines()
	for line in lines[1:]:
		vec.append(float(line.split(",")[5]))
	return vec

# returns the sigmoid
def sigmoid(x):
	return 1 / (1 + math.exp(-x))

# returns an an n-day state representation ending at time t
def getState(data, t, n):
	d = t - n + 1
	block = data[d:t + 1] if d >= 0 else -d * [data[0]] + data[0:t + 1] # pad with t0
	res = []
	for i in range(n - 1):
		res.append(sigmoid(block[i + 1] - block[i]))

	return np.array([res])

In [2]:
import pandas as pd
import matplotlib.pyplot as plt

In [3]:
from sklearn.multioutput import MultiOutputRegressor

In [4]:
import numpy as np
import random
from collections import deque

In [23]:
class Agent:
	def __init__(self, state_size, is_eval=False, model_name=""):
		self.state_size = state_size # normalized previous days
		self.action_size = 3 # sit, buy, sell
		self.memory = deque(maxlen=1000)
		self.inventory = []
		self.model_name = model_name
		self.is_eval = is_eval
		self.gamma = 0.95
		self.epsilon = 1.0
		self.epsilon_min = 0.01
		self.epsilon_decay = 0.995
		self.isFit = False#change1
		filename= model_name +'.pkl'
		self.model =joblib.load(filename) if is_eval else self._model()
		#self.model = load_model("model/" + model_name) if is_eval else self._model()

	def _model(self):
		model = KNeighborsRegressor(n_jobs=-1)#change#2
		return model
    
	def act(self, state):
		if not self.is_eval and random.random() <= self.epsilon:
			return random.randrange(self.action_size)
		if self.isFit == True:#change3
			options= self.model.predict(state)
		else:
			options = np.zeros(self.action_size).reshape(1, -1)

		return np.argmax(options[0])

	def expReplay(self, batch_size):
		mini_batch = []
		X =[]
		targets =[]
		l = len(self.memory)
		for i in range(l - batch_size + 1, l):
			mini_batch.append(self.memory[i])
		for state, action, reward, next_state, done in mini_batch:
			target = reward
			if not done:
				if self.isFit:
					target = reward + self.gamma * np.amax(self.model.predict(next_state)[0])
				else:
					target=reward
			if self.isFit:
				target_f = self.model.predict(state)
			else:
				target_f  = np.zeros(self.action_size).reshape(1, -1)
			target_f[0][action] = target
            #print(state)
            #print(action)
            #print(q_values)
			X.append(list(state[0]))
			targets.append(target_f[0])
        #print(X)
        #print(targets)
		self.model.fit(X, targets)
		self.isFit = True
            

		if self.epsilon > self.epsilon_min:
			self.epsilon *= self.epsilon_decay 

In [24]:
agent.model

MultiOutputRegressor(estimator=LGBMRegressor(boosting_type='gbdt', class_weight=None, colsample_bytree=1.0,
       importance_type='split', learning_rate=0.1, max_depth=-1,
       min_child_samples=20, min_child_weight=0.001, min_split_gain=0.0,
       n_estimators=100, n_jobs=-1, num_leaves=31, objective=None,
       random_state=None, reg_alpha=0.0, reg_lambda=0.0, silent=True,
       subsample=1.0, subsample_for_bin=200000, subsample_freq=0),
           n_jobs=None)

In [27]:
from lightgbm import LGBMRegressor
from sklearn.neighbors import KNeighborsRegressor

In [28]:
from sklearn.externals import joblib
from sklearn.linear_model import LogisticRegression
stock_name= "^GSPC" #(past two months)
window_size=10
episode_count = 10
df = pd.read_csv("^GSPC.csv")
df['Profit']=0
df['Action']=0
agent = Agent(window_size)
data = getStockDataVec(stock_name)
l = len(data) - 1
batch_size = 32
pd.options.mode.chained_assignment = None

for e in range(episode_count + 1):
	print("---------------------------------------------------------------------------------------")
	print("Episode " + str(e) + "/" + str(episode_count))
	state = getState(data, 0, window_size + 1)

	total_profit = 0
	agent.inventory = []

	for t in range(l):
		action = agent.act(state)

		# sit
		next_state = getState(data, t + 1, window_size + 1)
		reward = 0

		if action == 1: # buy
			agent.inventory.append(data[t])
			print("Buy: " + formatPrice(data[t]))
			df['Action'][t] = 1
			df.to_csv('^GSPC.csv')

		elif action == 2 and len(agent.inventory) > 0: # sell
			bought_price = agent.inventory.pop(0)
			reward = max(data[t] - bought_price, 0)
			total_profit += data[t] - bought_price
			print("Sell: " + formatPrice(data[t]) + " | Profit: " + formatPrice(data[t] - bought_price))
			df['Profit'][t] = data[t] - bought_price
			df.to_csv('^GSPC.csv')

		done = True if t == l - 1 else False
		agent.memory.append((state, action, reward, next_state, done))
		state = next_state

		if done:
			print("--------------------------------")
			print("Total Profit: " + formatPrice(total_profit))
			print("--------------------------------")

		if len(agent.memory) > batch_size:
			agent.expReplay(batch_size)

	if e % 10 == 0:
		filename="model_ep" + str(e) +'.pkl'
		joblib.dump(agent.model, filename)
		m1=agent.model
		#agent.model.save("model/model_ep" + str(e))

plt.plot(df["Date"], df["Profit"])
plt.show()
plt.plot(df["Date"], df["Action"])
plt.show() 

---------------------------------------------------------------------------------------
Episode 0/10
Buy: $1283.27
Buy: $1347.56
Buy: $1333.34
Buy: $1298.35
Sell: $1326.82 | Profit: $43.55
Sell: $1318.55 | Profit: -$29.01
Sell: $1326.65 | Profit: -$6.69
Sell: $1329.47 | Profit: $31.12
Buy: $1347.97
Buy: $1342.90
Sell: $1364.30 | Profit: $16.33
Buy: $1357.51
Buy: $1364.17
Sell: $1366.01 | Profit: $23.11
Buy: $1373.47
Buy: $1349.47
Sell: $1354.31 | Profit: -$3.20
Buy: $1352.26
Buy: $1340.89
Sell: $1314.76 | Profit: -$49.41
Buy: $1318.80
Sell: $1326.61 | Profit: -$46.86
Buy: $1301.53
Sell: $1278.94 | Profit: -$70.53
Sell: $1252.82 | Profit: -$99.44
Sell: $1267.65 | Profit: -$73.24
Sell: $1239.94 | Profit: -$78.86
Sell: $1241.23 | Profit: -$60.30
Buy: $1180.16
Buy: $1197.66
Buy: $1166.71
Sell: $1150.53 | Profit: -$29.63
Buy: $1142.62
Sell: $1122.14 | Profit: -$75.52
Sell: $1117.58 | Profit: -$49.13
Buy: $1139.83
Sell: $1152.69 | Profit: $10.07
Sell: $1182.17 | Profit: $42.34
Buy: $1128.43


KeyboardInterrupt: 

In [10]:
joblib.load('model_ep0.pkl')

MultiOutputRegressor(estimator=LGBMRegressor(boosting_type='gbdt', class_weight=None, colsample_bytree=1.0,
       importance_type='split', learning_rate=0.1, max_depth=-1,
       min_child_samples=20, min_child_weight=0.001, min_split_gain=0.0,
       n_estimators=100, n_jobs=-1, num_leaves=31, objective=None,
       random_state=None, reg_alpha=0.0, reg_lambda=0.0, silent=True,
       subsample=1.0, subsample_for_bin=200000, subsample_freq=0),
           n_jobs=None)

In [12]:
joblib.load('model_ep10.pkl')

MultiOutputRegressor(estimator=LGBMRegressor(boosting_type='gbdt', class_weight=None, colsample_bytree=1.0,
       importance_type='split', learning_rate=0.1, max_depth=-1,
       min_child_samples=20, min_child_weight=0.001, min_split_gain=0.0,
       n_estimators=100, n_jobs=-1, num_leaves=31, objective=None,
       random_state=None, reg_alpha=0.0, reg_lambda=0.0, silent=True,
       subsample=1.0, subsample_for_bin=200000, subsample_freq=0),
           n_jobs=None)

In [14]:
agent.model


MultiOutputRegressor(estimator=LGBMRegressor(boosting_type='gbdt', class_weight=None, colsample_bytree=1.0,
       importance_type='split', learning_rate=0.1, max_depth=-1,
       min_child_samples=20, min_child_weight=0.001, min_split_gain=0.0,
       n_estimators=100, n_jobs=-1, num_leaves=31, objective=None,
       random_state=None, reg_alpha=0.0, reg_lambda=0.0, silent=True,
       subsample=1.0, subsample_for_bin=200000, subsample_freq=0),
           n_jobs=None)

In [18]:
from sklearn.externals import joblib
print("Evaluating......")
stock_name= "^GSPC_2011" #From 01/01/19 to 28/05/19(this year)


model_name = "model_ep10"
filename=model_name+'.pkl'

model = joblib.load(filename)

window_size = 10

agent = Agent(window_size, True, model_name)

data = getStockDataVec(stock_name)
l = len(data) - 1
batch_size = 32
state = getState(data, 0, window_size + 1)
total_profit = 0
agent.inventory = []
for t in range(l):
	#print('here')
	action = agent.act(state)
	print(action)
	# sit
	next_state = getState(data, t + 1, window_size + 1)
	print(next_state)    
	reward = 0

	if action == 1: # buy
		agent.inventory.append(data[t])
		print("Buy: " + formatPrice(data[t]))

	elif action == 2 and len(agent.inventory) > 0: # sell
		bought_price = agent.inventory.pop(0)
		reward = max(data[t] - bought_price, 0)
		total_profit += data[t] - bought_price
		print("Sell: " + formatPrice(data[t]) + " | Profit: " + formatPrice(data[t] - bought_price))
        
	done = True if t == l - 1 else False
	agent.memory.append((state, action, reward, next_state, done))
	state = next_state
	if done:
		print("--------------------------------")
		print(stock_name + " Total Profit: " + formatPrice(total_profit))
		print("--------------------------------")

Evaluating......
0
[[0.5        0.5        0.5        0.5        0.5        0.5
  0.5        0.5        0.5        0.15841831]]
0
[[0.5        0.5        0.5        0.5        0.5        0.5
  0.5        0.5        0.15841831 0.9982738 ]]
0
[[0.5        0.5        0.5        0.5        0.5        0.5
  0.5        0.15841831 0.9982738  0.062381  ]]
0
[[0.5        0.5        0.5        0.5        0.5        0.5
  0.15841831 0.9982738  0.062381   0.08706768]]
0
[[0.5        0.5        0.5        0.5        0.5        0.15841831
  0.9982738  0.062381   0.08706768 0.1480472 ]]
0
[[0.5        0.5        0.5        0.5        0.15841831 0.9982738
  0.062381   0.08706768 0.1480472  0.99125058]]
0
[[0.5        0.5        0.5        0.15841831 0.9982738  0.062381
  0.08706768 0.1480472  0.99125058 0.99998967]]
0
[[0.5        0.5        0.15841831 0.9982738  0.062381   0.08706768
  0.1480472  0.99125058 0.99998967 0.09975489]]
0
[[0.5        0.15841831 0.9982738  0.062381   0.08706768 0.1480472
 