In [6]:
import csv
import gym
from gym import spaces
import numpy as np
from environment.environment import Environment
from agents.mab_agent import MAB_Agent
from agents.mc_agent import MC_Agent
from agents.sarsa_agent import SARSA_Agent
from train.train import train_agent
import matplotlib.pyplot as plt

In [7]:
tickers = ["AAPL", "AMZN", "GOOGL", "MSFT", "NVDA", "TSLA"]

data = {i: {t: float(row[t]) for t in tickers} \
    for i, row in enumerate(csv.DictReader( \
    open("data/nasdaq_stock_prices.csv", mode='r'), delimiter=','))
}

In [None]:
INITIAL_BALANCE = 10000
WINDOW_SIZE = 2
EPISODES = 100
VERBOSE = False

environment = Environment(data, window_size=WINDOW_SIZE, initial_balance=INITIAL_BALANCE, verbose=VERBOSE)
sarsa_agent_optimal = SARSA_Agent(environment, epsilon=0.1, alpha=0.1, gamma=0.9)

training = train_agent(sarsa_agent_optimal, environment, episodes=EPISODES, verbose=VERBOSE)
training

In [None]:
INITIAL_BALANCE = 10000
WINDOW_SIZE = 5
EPISODES = 100
VERBOSE = False
GAMMA = 0.7
ALPHA = 0.1

environment = Environment(data, window_size=WINDOW_SIZE, initial_balance=INITIAL_BALANCE, verbose=VERBOSE)

mab_agent_greedy = MAB_Agent(environment, epsilon=0.1)
mab_agent_optimal = MAB_Agent(environment, epsilon=0.5)
mab_agent_random = MAB_Agent(environment, epsilon=1.0)

mc_agent_greedy = MC_Agent(environment, epsilon=0.1, gamma=GAMMA)
mc_agent_optimal = MC_Agent(environment, epsilon=0.5, gamma=GAMMA)
mc_agent_random = MC_Agent(environment, epsilon=1.0, gamma=GAMMA)

sarsa_agent_greedy = SARSA_Agent(environment, epsilon=0.1, alpha=ALPHA, gamma=GAMMA)
sarsa_agent_optimal = SARSA_Agent(environment, epsilon=0.5, alpha=ALPHA, gamma=GAMMA)
sarsa_agent_random = SARSA_Agent(environment, epsilon=1.0, alpha=ALPHA, gamma=GAMMA)

results_mab_agent_greedy = train_agent(mab_agent_greedy, environment, episodes=EPISODES, verbose=VERBOSE)
results_mab_mab_agent_optimal = train_agent(mab_agent_optimal, environment, episodes=EPISODES, verbose=VERBOSE)
results_mab_agent_random = train_agent(mab_agent_random, environment, episodes=EPISODES, verbose=VERBOSE)

results_mc_agent_greedy = train_agent(mc_agent_greedy, environment, episodes=EPISODES, verbose=VERBOSE)
results_mc_agent_optimal = train_agent(mc_agent_optimal, environment, episodes=EPISODES, verbose=VERBOSE)
results_mc_agent_random = train_agent(mc_agent_random, environment, episodes=EPISODES, verbose=VERBOSE)

results_sarsa_agent_greedy = train_agent(sarsa_agent_greedy, environment, episodes=EPISODES, verbose=VERBOSE)
results_sarsa_agent_optimal = train_agent(sarsa_agent_optimal, environment, episodes=EPISODES, verbose=VERBOSE)
results_sarsa_agent_random = train_agent(sarsa_agent_random, environment, episodes=EPISODES, verbose=VERBOSE)


In [None]:
plt.plot(results_mab_agent_greedy, label=f"MAB {mab_agent_greedy.epsilon}")
plt.plot(results_mab_mab_agent_optimal, label=f"MAB {mab_agent_optimal.epsilon}")
plt.plot(results_mab_agent_random, label=f"MAB {mab_agent_random.epsilon}")

plt.plot(results_mc_agent_greedy, label=f"MC {mc_agent_greedy.epsilon}")
plt.plot(results_mc_agent_optimal, label=f"MC {mc_agent_optimal.epsilon}")
plt.plot(results_mc_agent_random, label=f"MC {mc_agent_random.epsilon}")

plt.plot(results_sarsa_agent_greedy, label=f"SARSA {sarsa_agent_greedy.epsilon}")
plt.plot(results_sarsa_agent_optimal, label=f"SARSA {sarsa_agent_optimal.epsilon}")
plt.plot(results_sarsa_agent_random, label=f"SARSA {sarsa_agent_random.epsilon}")

plt.legend()
plt.title("Total rewards over episodes")
plt.xlabel("Episodes")
plt.ylabel("Total rewards")
plt.show()

print("MC with epsilon = 0.1: ", np.mean(results_mab_agent_greedy))
print("MC with epsilon = 0.5: ", np.mean(results_mab_mab_agent_optimal))
print("MC with epsilon = 1.0: ", np.mean(results_mab_agent_random))

print("MC with epsilon = 0.1: ", np.mean(results_mc_agent_greedy))
print("MC with epsilon = 0.5: ", np.mean(results_mc_agent_optimal))
print("MC with epsilon = 1.0: ", np.mean(results_mc_agent_random))

print("SARSA with epsilon = 0.1: ", np.mean(results_sarsa_agent_greedy))
print("SARSA with epsilon = 0.5: ", np.mean(results_sarsa_agent_optimal))
print("SARSA with epsilon = 1.0: ", np.mean(results_sarsa_agent_random))