In [None]:
import keras
from keras.models import Sequential
from keras.models import load_model
from keras.layers import Dense
from keras.optimizers import Adam
import math
import numpy as np
import random
from collections import deque

In [None]:
class Agent:
  def __init__(self, state_size, is_eval=False, model_name=""):
        self.state_size = state_size # normalized previous days
        self.action_size = 3 # sit, buy, sell
        self.memory = deque(maxlen=1000)
        self.inventory = []
        self.model_name = model_name
        self.is_eval = is_eval
        self.gamma = 0.95
        self.epsilon = 1.0
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.995
        self.model = load_model(model_name) if is_eval else self._model()
  def _model(self):
        model = Sequential()
        model.add(Dense(units=64, input_dim=self.state_size, activation="relu"))
        model.add(Dense(units=32, activation="relu"))
        model.add(Dense(units=8, activation="relu"))
        model.add(Dense(self.action_size, activation="linear"))
        model.compile(loss="mse", optimizer=Adam(lr=0.001))
        return model
  def act(self, state):
        if not self.is_eval and random.random()<= self.epsilon:
            return random.randrange(self.action_size)
        options = self.model.predict(state)
        return np.argmax(options[0])
  def expReplay(self, batch_size):
        mini_batch = []
        l = len(self.memory)
        for i in range(l - batch_size + 1, l):
            mini_batch.append(self.memory[i])
        for state, action, reward, next_state, done in mini_batch:
            target = reward
            if not done:
                target = reward + self.gamma * np.amax(self.model.predict(next_state)[0])
            target_f = self.model.predict(state)
            target_f[0][action] = target
            self.model.fit(state, target_f, epochs=1, verbose=0)
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

In [None]:
def formatPrice(n):
    return("-Rs." if n<0 else "Rs.")+"{0:.2f}".format(abs(n))
def getStockDataVec(key):
    vec = []
    lines = open(key+".csv","r").read().splitlines()
    for line in lines[1:]:
        #print(line)
        #print(float(line.split(",")[4]))
        vec.append(float(line.split(",")[4]))
        #print(vec)
    return vec
def sigmoid(x):
    return 1/(1+math.exp(-x))
def getState(data, t, n):
    d = t - n + 1
    block = data[d:t + 1] if d >= 0 else -d * [data[0]] + data[0:t + 1] # pad with t0
    res = []
    for i in range(n - 1):
        res.append(sigmoid(block[i + 1] - block[i]))
    return np.array([res])


In [None]:
import sys
stock_name = input("Enter stock_name, window_size, Episode_count")
window_size = input()
episode_count = input()
stock_name = str(stock_name)
window_size = int(window_size)
episode_count = int(episode_count)
agent = Agent(window_size)
data = getStockDataVec(stock_name)
l = len(data) - 1
batch_size = 32
for e in range(episode_count + 1):
    print("Episode " + str(e) + "/" + str(episode_count))
    state = getState(data, 0, window_size + 1)
    total_profit = 0
    agent.inventory = []
    for t in range(l):
        action = agent.act(state)
        # sit
        next_state = getState(data, t + 1, window_size + 1)
        reward = 0
        if action == 1: # buy
            agent.inventory.append(data[t])
            print("Buy: " + formatPrice(data[t]))
        elif action == 2 and len(agent.inventory) > 0: # sell
            bought_price = window_size_price = agent.inventory.pop(0)
            reward = max(data[t] - bought_price, 0)
            total_profit += data[t] - bought_price
            print("Sell: " + formatPrice(data[t]) + " | Profit: " + formatPrice(data[t] - bought_price))
        done = True if t == l - 1 else False
        agent.memory.append((state, action, reward, next_state, done))
        state = next_state
        if done:
            print("--------------------------------")
            print("Total Profit: " + formatPrice(total_profit))
            print("--------------------------------")
        if len(agent.memory) > batch_size:
            agent.expReplay(batch_size)
    if e % 10 == 0:
        agent.model.save(str(e))

Episode 0/10
Buy: Rs.55.89
Sell: Rs.53.40 | Profit: -Rs.2.49
Buy: Rs.52.80
Sell: Rs.53.12 | Profit: Rs.0.32
Buy: Rs.54.44
Sell: Rs.52.90 | Profit: -Rs.1.54
Buy: Rs.56.88
Sell: Rs.58.94 | Profit: Rs.2.06
Buy: Rs.58.43
Buy: Rs.58.00
Buy: Rs.52.65
Buy: Rs.52.69


  super().__init__(name, **kwargs)


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Buy: Rs.63.39
Buy: Rs.64.86
Buy: Rs.66.06
Sell: Rs.66.08 | Profit: Rs.2.66
Buy: Rs.66.67
Buy: Rs.66.76
Sell: Rs.66.96 | Profit: Rs.3.57
Sell: Rs.68.49 | Profit: Rs.3.63
Sell: Rs.67.67 | Profit: Rs.1.61
Sell: Rs.69.76 | Profit: Rs.3.09
Sell: Rs.69.46 | Profit: Rs.2.70
Buy: Rs.69.66
Buy: Rs.69.60
--------------------------------
Total Profit: Rs.173.04
--------------------------------
Episode 1/10
Buy: Rs.52.04
Buy: Rs.50.70
Buy: Rs.53.00
Sell: Rs.52.90 | Profit: Rs.0.86
Sell: Rs.53.80 | Profit: Rs.3.10
Buy: Rs.53.64
Sell: Rs.54.25 | Profit: Rs.1.25
Sell: Rs.56.35 | Profit: Rs.2.71
Buy: Rs.52.69
Sell: Rs.52.84 | Profit: Rs.0.15
Buy: Rs.57.60


In [None]:
stock_name = input("Enter Stock_name, Model_name")
model_name = input()
model = load_model(model_name)
window_size = model.layers[0].input.shape.as_list()[1]
agent = Agent(window_size, True, model_name)
data = getStockDataVec(stock_name)
print(data)
l = len(data) - 1
batch_size = 32
state = getState(data, 0, window_size + 1)
print(state)
total_profit = 0
agent.inventory = []
print(l)
for t in range(l):
    action = agent.act(state)
    print(action)
    # sit
    next_state = getState(data, t + 1, window_size + 1)
    reward = 0
    if action == 1: # buy
        agent.inventory.append(data[t])
        print("Buy: " + formatPrice(data[t]))
    elif action == 2 and len(agent.inventory) > 0: # sell
        bought_price = agent.inventory.pop(0)
        reward = max(data[t] - bought_price, 0)
        total_profit += data[t] - bought_price
        print("Sell: " + formatPrice(data[t]) + " | Profit: " + formatPrice(data[t] - bought_price))
    done = True if t == l - 1 else False
    agent.memory.append((state, action, reward, next_state, done))
    state = next_state
    if done:
        print("--------------------------------")
        print(stock_name + " Total Profit: " + formatPrice(total_profit))
        print("--------------------------------")
        print ("Total profit is:",formatPrice(total_profit))

Enter Stock_name, Model_nameHDB
0
[55.209999, 55.529999, 55.889999, 55.25, 53.400002, 52.040001, 50.700001, 53.73, 53.349998, 52.799999, 53.119999, 53.0, 54.439999, 52.900002, 52.639999, 53.799999, 53.639999, 54.25, 56.349998, 56.880001, 58.939999, 57.57, 58.310001, 58.91, 58.34, 58.310001, 58.43, 58.68, 58.0, 55.369999, 52.650002, 52.689999, 52.84, 51.290001, 53.48, 55.98, 55.650002, 56.57, 57.810001, 56.84, 55.16, 55.560001, 54.959999, 55.509998, 55.810001, 57.130001, 59.470001, 59.759998, 59.299999, 58.669998, 57.25, 57.02, 58.0, 56.360001, 57.18, 57.599998, 58.27, 58.610001, 59.150002, 58.610001, 59.950001, 62.080002, 62.799999, 63.080002, 62.080002, 63.09, 62.43, 62.810001, 63.07, 63.07, 65.269997, 65.0, 66.169998, 65.760002, 64.940002, 64.610001, 64.620003, 63.049999, 62.130001, 62.200001, 62.849998, 62.75, 61.0, 60.700001, 61.34, 61.049999, 61.77, 61.98, 61.959999, 63.279999, 64.400002, 64.449997, 65.300003, 65.25, 67.82, 66.949997, 64.919998, 66.07, 65.379997, 63.84, 61.119999,