In [1]:
from agent.agent import Agent
from envs import TradingEnv
from functions import *
import torch
import yaml, os
import csv

In [11]:
def write_to_csv(writer, environment, epsilon, t):
    """
    Writes three rows to csv: first is profit, max_staked, and epsilon; 
        second is buy indices; third is sell indices
    """
    writer.writerow([environment.net_profit(t),environment.max_spent,epsilon])
    writer.writerow(environment.buys)
    writer.writerow(environment.sells)

In [None]:
profits_list = [] # Will hold list of all profits as we go through training
with open(os.path.join(os.getcwd(), 'config.yml'), 'r') as stream:
    config = yaml.load(stream)

test_name = "^HSI_2018"
stock_name, window_size, episode_count = config['stock_name'], config['window_size'], config["num_epochs"]

num_tech_indicators = config['num_tech_indicators']
agent = Agent(window_size + num_tech_indicators, config)
data = getStockDataVec(stock_name)
test_data = getStockDataVec(test_name)
env = TradingEnv(data, window_size)
env_test = TradingEnv(test_data, window_size)
l = len(data) - 1
l1 = len(test_data) - 1

test_portfolio = []  # test_portfolio[i] will hold a tuple of the list of all buys and sells
out_filename = "test_progression.csv"
out_writer = open(out_filename,'a')
writer = csv.writer(out_writer, delimiter=',')
train_out_writer = open("train_progression.csv",'a')
train_writer = csv.writer(train_out_writer,delimiter=',')

for e in range(500 + 1):
    print("Episode " + str(e) + "/" + str(episode_count))
    state = env.get_state(0)

    env.reset_holdings()

    for t in range(l):
        action = agent.act(state)

        # sit
        next_state = env.get_state(t + 1)
        reward = 0

        if action == 1: # buy
            #remembers the price bought at t, and the time bought
            env.buy(t)
            # print("Buy: " + formatPrice(data[t]))

        elif action == 2: # sell
            reward, profit = env.sell(t)
            # print("Sell: " + formatPrice(data[t]) + " | Profit: " + formatPrice(profit))

        done = True if t == l - 1 else False
        # Push all values to memory
        agent.memory.push(state, action, next_state, reward)
        state = next_state
        total_profit = env.net_profit(t)
        max_staked = env.max_spent
        

        if done:
            # First we test the agent
            state1 = env_test.get_state(0)

            for t1 in range(l1):
                action = agent.act(state1)
                if action == 1:
                    env_test.buy(t1)
                elif action == 2:
                    env_test.sell(t1)
                state = env_test.get_state(t1+1)

            # then we record our progress in the csv's
            write_to_csv(writer, env_test, agent.epsilon, t1)
            write_to_csv(train_writer, env, agent.epsilon, t)
            
            
            
            percent_return = total_profit / max_staked * 100
            print("--------------------------------")
            print("Total Profit: " + formatPrice(total_profit))
            print("Max staked: " + formatPrice(max_staked))
            print("Percent return: " + "{0:.2f}%".format(percent_return))
            print("--------------------------------")
            profits_list.append((total_profit, percent_return))
#             print(profits_list)
        agent.optimize()

    if e % config['save_freq'] == 0:
        agent.target_net.load_state_dict(agent.policy_net.state_dict())
        torch.save(agent.policy_net, config['policy_model'])
        torch.save(agent.target_net, config['target_model'])
        
out_writer.close()
train_out_writer.close()

  config = yaml.load(stream)


Episode 0/100
--------------------------------
Total Profit: $1758003.01
Max staked: $1290708.08
Percent return: 136.20%
--------------------------------
Episode 1/100


  reward = max(profit, .0001) // (np.log(delta_t) + 1)


--------------------------------
Total Profit: $1687063.29
Max staked: $1100720.61
Percent return: 153.27%
--------------------------------
Episode 2/100
--------------------------------
Total Profit: $973702.82
Max staked: $847693.18
Percent return: 114.87%
--------------------------------
Episode 3/100
--------------------------------
Total Profit: $874529.32
Max staked: $1033632.09
Percent return: 84.61%
--------------------------------
Episode 4/100
--------------------------------
Total Profit: $605040.42
Max staked: $1105503.19
Percent return: 54.73%
--------------------------------
Episode 5/100
--------------------------------
Total Profit: $583366.00
Max staked: $1171232.86
Percent return: 49.81%
--------------------------------
Episode 6/100
--------------------------------
Total Profit: $448703.91
Max staked: $210694.30
Percent return: 212.96%
--------------------------------
Episode 7/100
--------------------------------
Total Profit: $342597.49
Max staked: $365381.17
Perce

In [None]:
print(profits_list)