TEAM 37

In [3]:
!pip install gymnasium




[notice] A new release of pip is available: 25.2 -> 26.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [4]:
import gymnasium as gym
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import random
import matplotlib.pyplot as plt


In [5]:
def set_seed(seed):
    torch.manual_seed(seed)
    np.random.seed(seed)
    random.seed(seed)


In [6]:
class ActorCritic(nn.Module):
    def __init__(self, state_dim, action_dim):
        super(ActorCritic, self).__init__()

        self.shared = nn.Sequential(
            nn.Linear(state_dim, 64),
            nn.ReLU(),
            nn.Linear(64, 64),
            nn.ReLU()
        )

        self.actor = nn.Linear(64, action_dim)
        self.critic = nn.Linear(64, 1)

    def forward(self, x):
        x = self.shared(x)
        return self.actor(x), self.critic(x)


In [7]:
def train(optimizer_name="adam", seed=0, episodes=500):
    set_seed(seed)

    env = gym.make("CartPole-v1")
    state_dim = env.observation_space.shape[0]
    action_dim = env.action_space.n

    model = ActorCritic(state_dim, action_dim)

    if optimizer_name == "adam":
        optimizer = optim.Adam(model.parameters(), lr=1e-3)
    elif optimizer_name == "rmsprop":
        optimizer = optim.RMSprop(model.parameters(), lr=1e-3)

    gamma = 0.99
    rewards_history = []

    for episode in range(episodes):
        state, _ = env.reset(seed=seed)
        done = False
        total_reward = 0

        while not done:
            state_tensor = torch.FloatTensor(state)
            logits, value = model(state_tensor)

            probs = torch.softmax(logits, dim=-1)
            dist = torch.distributions.Categorical(probs)
            action = dist.sample()

            next_state, reward, terminated, truncated, _ = env.step(action.item())
            done = terminated or truncated

            total_reward += reward

            next_state_tensor = torch.FloatTensor(next_state)
            _, next_value = model(next_state_tensor)

            target = reward + gamma * next_value * (1 - done)
            advantage = target - value

            actor_loss = -dist.log_prob(action) * advantage.detach()
            critic_loss = advantage.pow(2)

            loss = actor_loss + critic_loss

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            state = next_state

        rewards_history.append(total_reward)

    env.close()
    return rewards_history


In [8]:
def run_experiment(optimizer_name, seeds=5):
    all_rewards = []
    for s in range(seeds):
        rewards = train(optimizer_name=optimizer_name, seed=s)
        all_rewards.append(rewards)

    return np.array(all_rewards)


In [None]:
adam_results = run_experiment("adam", seeds=5)
rms_results = run_experiment("rmsprop", seeds=5)


In [None]:
def plot_results(adam, rms):
    adam_mean = adam.mean(axis=0)
    adam_std = adam.std(axis=0)

    rms_mean = rms.mean(axis=0)
    rms_std = rms.std(axis=0)

    plt.figure(figsize=(10,6))

    plt.plot(adam_mean, label="Adam")
    plt.fill_between(range(len(adam_mean)),
                     adam_mean - adam_std,
                     adam_mean + adam_std,
                     alpha=0.2)

    plt.plot(rms_mean, label="RMSprop")
    plt.fill_between(range(len(rms_mean)),
                     rms_mean - rms_std,
                     rms_mean + rms_std,
                     alpha=0.2)

    plt.xlabel("Episodes")
    plt.ylabel("Total Reward")
    plt.title("Adam vs RMSprop on CartPole")
    plt.legend()
    plt.show()

plot_results(adam_results, rms_results)


NameError: name 'adam_results' is not defined

In [None]:
import numpy as np
from scipy.stats import ttest_ind

# Run experiments to generate results
adam_results = run_experiment("adam", seeds=5)
rms_results = run_experiment("rmsprop", seeds=5)

# ---------------------------
# AUC (Area Under the Curve)
# ---------------------------
def calculate_auc(results):
    return results.sum(axis=1)

adam_auc = calculate_auc(adam_results)
rms_auc = calculate_auc(rms_results)

# ---------------------------
# 1. Convergence Speed
# ---------------------------
def convergence_speed(results, threshold=475):
    speeds = []
    for run in results:
        reached = False
        for i, reward in enumerate(run):
            if reward >= threshold:
                speeds.append(i)
                reached = True
                break
        if not reached:
            speeds.append(len(run))
    return np.array(speeds)

adam_speed = convergence_speed(adam_results)
rms_speed = convergence_speed(rms_results)

# ---------------------------
# 2. Final Performance
# ---------------------------
def final_performance(results, window=50):
    return results[:, -window:].mean(axis=1)

adam_final = final_performance(adam_results)
rms_final = final_performance(rms_results)

# ---------------------------
# 3. Stability (Variance)
# ---------------------------
adam_variance = adam_results.var(axis=1)
rms_variance = rms_results.var(axis=1)

# ---------------------------
# 4. Early Learning
# ---------------------------
def early_performance(results, window=100):
    return results[:, :window].mean(axis=1)

adam_early = early_performance(adam_results)
rms_early = early_performance(rms_results)

# ---------------------------
# 5. Statistical Test
# ---------------------------
t_stat, p_value = ttest_ind(adam_final, rms_final)

# ---------------------------
# PRINT RESULTS
# ---------------------------
print("----- METRIC VALUES -----")
print("AUC: Adam =", adam_auc.mean(), " | RMSprop =", rms_auc.mean())
print("Final Reward: Adam =", adam_final.mean(), " | RMSprop =", rms_final.mean())
print("Convergence Episodes: Adam =", adam_speed.mean(), " | RMSprop =", rms_speed.mean())
print("Variance: Adam =", adam_variance.mean(), " | RMSprop =", rms_variance.mean())
print("Early Learning: Adam =", adam_early.mean(), " | RMSprop =", rms_early.mean())
print("P-value (final reward):", p_value)

# ---------------------------
# Determine Winner Per Metric
# ---------------------------
score_adam = 0
score_rms = 0

print("\n----- WINNER PER METRIC -----")

# AUC (higher better)
if adam_auc.mean() > rms_auc.mean():
    print("AUC: Adam better")
    score_adam += 1
else:
    print("AUC: RMSprop better")
    score_rms += 1

# Final Reward (higher better)
if adam_final.mean() > rms_final.mean():
    print("Final Reward: Adam better")
    score_adam += 1
else:
    print("Final Reward: RMSprop better")
    score_rms += 1

# Convergence Speed (lower better)
if adam_speed.mean() < rms_speed.mean():
    print("Convergence Speed: Adam better")
    score_adam += 1
else:
    print("Convergence Speed: RMSprop better")
    score_rms += 1

# Variance (lower better)
if adam_variance.mean() < rms_variance.mean():
    print("Stability (Variance): Adam better")
    score_adam += 1
else:
    print("Stability (Variance): RMSprop better")
    score_rms += 1

# Early Learning (higher better)
if adam_early.mean() > rms_early.mean():
    print("Early Learning: Adam better")
    score_adam += 1
else:
    print("Early Learning: RMSprop better")
    score_rms += 1

# ---------------------------
# Overall Winner
# ---------------------------
print("\n----- OVERALL RESULT -----")

if score_adam > score_rms:
    print("Overall Winner on CartPole: ADAM")
elif score_rms > score_adam:
    print("Overall Winner on CartPole: RMSPROP")
else:
    print("Overall Result: Tie / Comparable Performance")

print("\nScore -> Adam:", score_adam, "| RMSprop:", score_rms)
