In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import time

import environment.trading_environment as env
from agent.dqn import DQN

In [2]:
portfolio = env.setup_environment()
print("Portfolio balance:", portfolio.balance)
print("Stocks in portfolio:", len(portfolio.stocks))

Portfolio balance: 100000
Stocks in portfolio: 0


In [9]:
agent = DQN(action_space=3) # 3 possible actions

actions = {
    0: "Hold",
    1: "Buy",
    2: "Sell"
}

def step(actions):
    """
    Transition method
    """
    action = np.argmax(actions)
    value_before = portfolio.get_value()

    if action == 0:
        # Hold
        pass
    if action == 1:
        # Buy
        try:
            portfolio.buy("AAPL", 1)
        except:
            pass

    if action == 2:
        # Sell
        try:
            portfolio.sell("AAPL", 1)
        except:
            pass

    portfolio.market.update_prices()

    state = portfolio.get_state()
    reward = portfolio.get_value() - value_before
    return state, reward

start_time = time.time()

# Training loop
num_episodes = 1
for episode in range(num_episodes):
    # Reset environment
    portfolio.reset()
    portfolio.market.time_offset = 90

    done = False
    total_reward = 0

    # 0-6046 Training/Val - 90
    num_training_period = 6046
    num_training_period = 6177
    for period in range(5927, num_training_period):
        if(period % 20 == 0):
            print(f"Periode: {period}, Total Reward: {total_reward}, {time.time()-start_time}")
        state = portfolio.get_state()

        # 
        action = agent.act(state)
        next_state, reward = step(action)

        if period == (num_training_period - 1):
            done = True

        agent.remember(state, action, reward, next_state, done)
        total_reward += reward

        
        agent.replay()
        """
        try:
            agent.replay()
        except:
            print(next_state)
            break"""

    print(f"Episode: {episode+1}, Total Reward: {total_reward}")

Periode: 5941, Total Reward: -0.22121892869472504, 0.0029985904693603516
Periode: 5961, Total Reward: -0.9703559726476669, 0.007000923156738281
Periode: 5981, Total Reward: -1.5826144218444824, 0.012000083923339844
Periode: 6001, Total Reward: -1.7424651384353638, 80.27382206916809
Periode: 6021, Total Reward: -2.894345313310623, 240.0141463279724
Periode: 6041, Total Reward: -4.413776531815529, 403.83090591430664
Periode: 6061, Total Reward: -4.475036859512329, 568.679196357727
Periode: 6081, Total Reward: -5.76616433262825, 731.7647500038147
Periode: 6101, Total Reward: -5.733551666140556, 891.1326291561127
Periode: 6121, Total Reward: -5.0299995839595795, 1053.8594057559967
Periode: 6141, Total Reward: -5.345068246126175, 1215.5389816761017
Periode: 6161, Total Reward: -4.678825616836548, 1377.9056468009949
Episode: 1, Total Reward: -4.009752452373505


In [15]:
agent.model.save_weights("models/aapl/dqn_1")

In [12]:
portfolio.balance

49983.60980850458

In [13]:
portfolio.stocks

{'AAPL': 40}

## Apple 2019

In [3]:
agent = DQN(action_space=3)

actions = {
    0: "Hold",
    1: "Buy",
    2: "Sell"
}

num_buys = 0
num_sells = 0

def step(actions):
    """
    Transition method
    """
    action = np.argmax(actions)
    value_before = portfolio.get_value()

    global num_buys, num_sells

    if action == 0:
        # Hold
        pass
    if action == 1:
        # Buy
        try:
            portfolio.buy("AAPL", 10)
            num_buys += 1
        except:
            pass

    if action == 2:
        # Sell
        try:
            portfolio.sell("AAPL", 10)
            num_sells += 1
        except:
            pass

    portfolio.market.update_prices()

    state = portfolio.get_state()
    reward = portfolio.get_value() - value_before
    return state, reward

start_time = time.time()

# Training loop
num_episodes = 3
for episode in range(num_episodes):
    # Reset environment
    portfolio.reset()
    portfolio.market.time_offset = 6429

    done = False
    total_reward = 0

    # 0-6046 Training/Val - 90
    num_training_period = 6046
    num_training_period = 6680
    for period in range(6429, num_training_period):
        if(period % 20 == 0):
            print(f"Periode: {period}, Total Reward: {total_reward}, {time.time()-start_time}")
        state = portfolio.get_state()

        # 
        action = agent.act(state)
        next_state, reward = step(action)

        if period == (num_training_period - 1):
            done = True

        agent.remember(state, action, reward, next_state, done)
        total_reward += reward

        
        agent.replay()

    print(f"Episode: {episode+1}, Total Reward: {total_reward}")
    print(num_buys)
    print(num_sells)
    print(portfolio.balance)
    print(portfolio.stocks)
    print("*" * 40)

# 6047-7555 Test
print(num_buys)
print(num_sells)
print(portfolio.balance)
print(portfolio.stocks)

agent.model.save_weights("models/aapl/dqn_2")

Periode: 6441, Total Reward: -733.6652374267578, 0.0010235309600830078
Periode: 6461, Total Reward: -1467.6183319091797, 0.004232645034790039
Periode: 6481, Total Reward: -1711.104507446289, 0.007359504699707031
Periode: 6501, Total Reward: -2066.84871673584, 63.620696783065796
Periode: 6521, Total Reward: -5560.7037353515625, 222.1989140510559
Periode: 6541, Total Reward: -5053.833427429199, 381.1625506877899
Periode: 6561, Total Reward: -4820.616874694824, 540.8446314334869
Periode: 6581, Total Reward: -5376.410140991211, 700.1521980762482
Periode: 6601, Total Reward: -6053.29891204834, 859.9111132621765
Periode: 6621, Total Reward: -5177.950401306152, 1015.0899639129639
Periode: 6641, Total Reward: -2624.9535369873047, 1168.1283831596375
Periode: 6661, Total Reward: -1582.3918151855469, 1321.6982922554016
Episode: 1, Total Reward: 4987.09342956543
108
0
42227.31643676758
{'AAPL': 830}
****************************************
Periode: 6441, Total Reward: 50.116310119628906, 1560.2655

## Procter & Gamble 2019

In [3]:
agent = DQN(action_space=3)

actions = {
    0: "Hold",
    1: "Buy",
    2: "Sell"
}

num_buys = 0
num_sells = 0

def step(actions):
    """
    Transition method
    """
    action = np.argmax(actions)
    value_before = portfolio.get_value()

    global num_buys, num_sells

    if action == 0:
        # Hold
        pass
    if action == 1:
        # Buy
        try:
            portfolio.buy("PG", 10)
            num_buys += 1
        except:
            pass

    if action == 2:
        # Sell
        try:
            portfolio.sell("PG", 10)
            num_sells += 1
        except:
            pass

    portfolio.market.update_prices()

    state = portfolio.get_state()
    reward = portfolio.get_value() - value_before
    return state, reward

start_time = time.time()

# Training loop
num_episodes = 3
for episode in range(num_episodes):
    # Reset environment
    portfolio.reset()
    portfolio.market.time_offset = 6429

    done = False
    total_reward = 0

    # 0-6046 Training/Val - 90
    num_training_period = 6046
    num_training_period = 6680
    for period in range(6429, num_training_period):
        if(period % 20 == 0):
            print(f"Periode: {period}, Total Reward: {total_reward}, {time.time()-start_time}")
        state = portfolio.get_state()

        # 
        action = agent.act(state)
        next_state, reward = step(action)

        if period == (num_training_period - 1):
            done = True

        agent.remember(state, action, reward, next_state, done)
        total_reward += reward

        
        agent.replay()

    print(f"Episode: {episode+1}, Total Reward: {total_reward}")
    print(num_buys)
    print(num_sells)
    print(portfolio.balance)
    print(portfolio.stocks)
    print("*" * 40)

# 6047-7555 Test
print(num_buys)
print(num_sells)
print(portfolio.balance)
print(portfolio.stocks)

agent.model.save_weights("models/pg/dqn_1")

Periode: 6441, Total Reward: 20.55511474609375, 0.00099945068359375
Periode: 6461, Total Reward: -3360.005111694336, 0.004015922546386719
Periode: 6481, Total Reward: -6907.150115966797, 0.0060155391693115234
Periode: 6501, Total Reward: -10623.841018676758, 64.60846042633057
Periode: 6521, Total Reward: -10512.779922485352, 222.98028349876404
Periode: 6541, Total Reward: -14078.035507202148, 381.69393014907837
Periode: 6561, Total Reward: -15612.514114379883, 540.1401376724243
Periode: 6581, Total Reward: -16755.307693481445, 698.7172634601593
Periode: 6601, Total Reward: -17808.06182861328, 857.5554149150848
Periode: 6621, Total Reward: -18988.347091674805, 1011.5089685916901
Periode: 6641, Total Reward: -19174.928894042954, 1162.832004070282
Periode: 6661, Total Reward: -20560.03387451172, 1314.194213628769
Episode: 1, Total Reward: -20828.045349121094
96
0
139.61441040039062
{'PG': 700}
****************************************
Periode: 6441, Total Reward: 134.43603515625, 1549.5809