In [1]:
import csv
import gym
from gym import spaces
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px

In [2]:
from environment.environment import Environment

from train.train import train_agent

from agents.mab_agent import MAB_Agent
from agents.mc_agent import MC_Agent
from agents.sarsa_agent import SARSA_Agent
from agents.dqn_agent import DQN_Agent

In [3]:
tickers = ["AAPL", "AMZN", "GOOGL", "MSFT", "NVDA", "TSLA"]

data = {i: {t: float(row[t]) for t in tickers} \
    for i, row in enumerate(csv.DictReader( \
    open("data/nasdaq_stock_prices.csv", mode='r'), delimiter=','))
}

In [8]:
INITIAL_BALANCE = 10000
WINDOW_SIZE = 5
EPISODES = 2
VERBOSE = False
LOW_EPSILON = 0.1
HIGH_EPSILON = 0.5
GAMMA = 0.95
ALPHA = 0.3

environment = Environment(data, window_size=WINDOW_SIZE, initial_balance=INITIAL_BALANCE, verbose=VERBOSE)

In [9]:
dqn_agent_low = DQN_Agent(environment, epsilon=LOW_EPSILON, gamma=GAMMA)
results_dqn_agent_low = train_agent(dqn_agent_low, environment, episodes=EPISODES, verbose=VERBOSE)

Device: mps


RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn

In [9]:
mab_agent_low = MAB_Agent(environment, epsilon=LOW_EPSILON)
mab_agent_high = MAB_Agent(environment, epsilon=HIGH_EPSILON)

mc_agent_low = MC_Agent(environment, epsilon=LOW_EPSILON, gamma=GAMMA)
mc_agent_high = MC_Agent(environment, epsilon=HIGH_EPSILON, gamma=GAMMA)

sarsa_agent_low = SARSA_Agent(environment, epsilon=LOW_EPSILON, gamma=GAMMA)
sarsa_agent_high = SARSA_Agent(environment, epsilon=HIGH_EPSILON, alpha=ALPHA, gamma=GAMMA)

results_mab_agent_low = train_agent(mab_agent_low, environment, episodes=EPISODES, verbose=VERBOSE)
results_mab_agent_high = train_agent(mab_agent_high, environment, episodes=EPISODES, verbose=VERBOSE)

results_mc_agent_low = train_agent(mc_agent_low, environment, episodes=EPISODES, verbose=VERBOSE)
results_mc_agent_high = train_agent(mc_agent_high, environment, episodes=EPISODES, verbose=VERBOSE)

results_sarsa_agent_low = train_agent(sarsa_agent_low, environment, episodes=EPISODES, verbose=VERBOSE)
results_sarsa_agent_high = train_agent(sarsa_agent_high, environment, episodes=EPISODES, verbose=VERBOSE)

KeyboardInterrupt: 

In [9]:
import pandas as pd
import numpy as np
import plotly.express as px

# 1. Résultats formatés dans un DataFrame
results_df = pd.DataFrame({
    f"MAB {mab_agent_low.epsilon}": results_mab_agent_low,
    f"MAB {mab_agent_high.epsilon}": results_mab_agent_high,
    f"MC {mc_agent_low.epsilon}": results_mc_agent_low,
    f"MC {mc_agent_high.epsilon}": results_mc_agent_high,
    f"SARSA {sarsa_agent_low.epsilon}": results_sarsa_agent_low,
    f"SARSA {sarsa_agent_high.epsilon}": results_sarsa_agent_high
})

# 2. Lissage via moyenne glissante
window = 20
ma_df = results_df.rolling(window).mean()
ma_df.columns = [f"{col} (MA)" for col in ma_df.columns]

# 3. Fusion et passage au format long
merged_df = pd.concat([results_df, ma_df], axis=1).reset_index().rename(columns={"index": "Episode"})
long_df = merged_df.melt(id_vars="Episode", var_name="Agent", value_name="Reward")
long_df["Type"] = long_df["Agent"].apply(lambda x: "Moving Average" if "(MA)" in x else "Raw")

# 4. Graphique interactif
fig = px.line(
    long_df[long_df["Type"] == "Moving Average"],
    x="Episode",
    y="Reward",
    color="Agent",
    title="Agent Performance Comparison (Moving Average)",
    log_y=True,
    labels={"Reward": "Total Rewards ($)", "Episode": "Episode"},
    color_discrete_sequence=px.colors.qualitative.Set2
)

# 5. Ajout de phases d'entraînement
n = len(results_df)
fig.add_vrect(x0=0, x1=n//3, fillcolor="blue", opacity=0.05, line_width=0, annotation_text="Early Training", annotation_position="top left")
fig.add_vrect(x0=n//3, x1=2*n//3, fillcolor="green", opacity=0.05, line_width=0, annotation_text="Middle Training", annotation_position="top left")
fig.add_vrect(x0=2*n//3, x1=n, fillcolor="red", opacity=0.05, line_width=0, annotation_text="Late Training", annotation_position="top left")

# 6. Annotation du meilleur agent
best_col = ma_df.iloc[-1].idxmax()
best_val = ma_df[best_col].iloc[-1]
fig.add_annotation(
    x=n, y=best_val,
    text=f"Best: {best_col.replace(' (MA)', '')}<br>${best_val:,.0f}",
    showarrow=True, arrowhead=2, ax=-40, ay=-40,
    bgcolor="rgba(0,0,0,0.7)", font=dict(color="white")
)

fig.update_layout(
    font=dict(family="Arial", size=14),
    title_font=dict(size=20),
    xaxis_title="Episode",
    yaxis_title="Total Rewards ($)",
    legend_title_text="Agent",
    hovermode="x unified",
    margin=dict(t=60, l=20, r=20, b=20)
)

fig.show()

# 7. Résumé statistique
print("\n📊 PERFORMANCE SUMMARY")
print(f"{'Agent':<15} {'Mean':>10} {'Std':>10} {'Median':>10} {'Min':>10} {'Max':>10}")
for name, data in zip(results_df.columns, results_df.values.T):
    print(f"{name:<15} {np.mean(data):>10.2f} {np.std(data):>10.2f} {np.median(data):>10.2f} {np.min(data):>10.2f} {np.max(data):>10.2f}")


📊 PERFORMANCE SUMMARY
Agent                 Mean        Std     Median        Min        Max
MAB 0.1           15779.77   30743.22    6049.41   -6422.77  473555.28
MAB 0.5           24824.90   28854.06   17500.54   -5651.51  504748.94
MC 0.1          171731150.03 219488436.50 97296570.73    3758.85 2101152859.62
MC 0.5          10807747.88 11729839.09 7249961.39    1265.90 96490320.63
SARSA 0.1       2132929.08 1506854.25 2086543.03    4268.76 8644749.55
SARSA 0.5       46564028.45 75580828.99 22591756.31    2668.91 1139944827.88
