In [1]:
from agents.QLearning import QLearning
from agents.SARSA import SARSA
from agents.MonteCarlo import MonteCarlo
from alayna_agents.perfect_agent import PerfectAgent
import metrics
from pongEnv import PongEnv
from pongVisualizer import PongVisualizer
from main import generate_episode
from main import reset_environment
from main import run_trials
from main import run_trials_with_hyperparams
from main import save_agent
from main import createDict

import os
import argparse
results = []

pygame 2.6.1 (SDL 2.28.4, Python 3.11.7)
Hello from the pygame community. https://www.pygame.org/contribute.html


In [2]:
args = argparse.Namespace()
args.viz = False
args.save = False
args.right = True
args.left = False
args.gamma = 0.9
args.alpha = 0.8
args.epsilon = 0.01
monte_metrics = run_trials(MonteCarlo, args)
results.append(createDict("Monte Carlo", MonteCarlo, monte_metrics))

Running trials for <class 'agents.MonteCarlo.MonteCarlo'> with non-default args {'gamma': 0.9, 'alpha': 0.8, 'epsilon': 0.01}

Experiment Metrics:
Average Rewards (last 30 episodes): 195.19
Average Win Rate (last 30 episodes): 61.00%
Percentage of State-Action Pairs Visited (States 0 to 450): 3.11%
Percentage of State-Action Pairs Visited (States 450 to 900): 3.29%



In [3]:
args = argparse.Namespace()
args.viz = False
args.save = False
args.right = True
args.left = False
args.gamma = 0.5
args.alpha = 0.1
args.epsilon = 0.01
sarsa_metrics = run_trials(SARSA, args)
results.append(createDict("SARSA", SARSA, sarsa_metrics))

Running trials for <class 'agents.SARSA.SARSA'> with non-default args {'gamma': 0.5, 'alpha': 0.1, 'epsilon': 0.01}

Experiment Metrics:
Average Rewards (last 30 episodes): 245.31
Average Win Rate (last 30 episodes): 82.00%
Percentage of State-Action Pairs Visited (States 0 to 450): 0.44%
Percentage of State-Action Pairs Visited (States 450 to 900): 1.10%



In [4]:
args = argparse.Namespace()
args.viz = False
args.save = False
args.right = True
args.left = False
args.gamma = 0.8
args.alpha = 0.95
args.epsilon = 0.01
qlearning_metrics = run_trials(QLearning, args)
results.append(createDict("Q-Learning", QLearning, qlearning_metrics))

Running trials for <class 'agents.QLearning.QLearning'> with non-default args {'gamma': 0.8, 'alpha': 0.95, 'epsilon': 0.01}

Experiment Metrics:
Average Rewards (last 30 episodes): 128.67
Average Win Rate (last 30 episodes): 83.67%
Percentage of State-Action Pairs Visited (States 0 to 450): 12.07%
Percentage of State-Action Pairs Visited (States 450 to 900): 25.77%



In [5]:
import shutil

METRICS_PATH = '/Users/steppan1/Desktop/Reinforcement Learning/jhu-reinforcement-learning/final_metrics'

if METRICS_PATH:
    if not os.path.exists(METRICS_PATH):
        os.makedirs(METRICS_PATH)
    else:
        shutil.rmtree(METRICS_PATH)
        os.makedirs(METRICS_PATH)

In [6]:
metrics.plot_agent_scores(agent_name="Monte Carlo", agent_scores=monte_metrics["avg_scores"], save_path=METRICS_PATH)
metrics.plot_state_visitation(monte_metrics["state_visit_percentages"], "Monte Carlo", save_path=METRICS_PATH)
metrics.plot_visit_percentage(agent_name="Monte Carlo", visit_count=monte_metrics["state_action_visit_count"], save_path=METRICS_PATH)
metrics.plot_mean_visited_states_per_action(visit_count=monte_metrics["state_action_visit_count"], agent_name="Monte Carlo", save_path=METRICS_PATH)
metrics.plot_state_action_distribution_logscale(visit_count=monte_metrics["state_action_visit_count"], agent_name="Monte Carlo", save_path=METRICS_PATH)

In [7]:
metrics.plot_agent_scores(agent_name="SARSA", agent_scores=sarsa_metrics["avg_scores"], save_path=METRICS_PATH)
metrics.plot_state_visitation(sarsa_metrics["state_visit_percentages"], "SARSA", save_path=METRICS_PATH)
metrics.plot_visit_percentage(agent_name="SARSA", visit_count=sarsa_metrics["state_action_visit_count"], save_path=METRICS_PATH)
metrics.plot_mean_visited_states_per_action(visit_count=sarsa_metrics["state_action_visit_count"], agent_name="SARSA", save_path=METRICS_PATH)
metrics.plot_state_action_distribution_logscale(visit_count=sarsa_metrics["state_action_visit_count"], agent_name="SARSA", save_path=METRICS_PATH)

In [8]:
metrics.plot_agent_scores(agent_name="Q-Learning", agent_scores=qlearning_metrics["avg_scores"], save_path=METRICS_PATH)
metrics.plot_state_visitation(qlearning_metrics["state_visit_percentages"], "Q-Learning", save_path=METRICS_PATH)
metrics.plot_visit_percentage(agent_name="Q-Learning", visit_count=qlearning_metrics["state_action_visit_count"], save_path=METRICS_PATH)
metrics.plot_mean_visited_states_per_action(visit_count=qlearning_metrics["state_action_visit_count"], agent_name="Q-Learning", save_path=METRICS_PATH)
metrics.plot_state_action_distribution_logscale(visit_count=qlearning_metrics["state_action_visit_count"], agent_name="Q-Learning", save_path=METRICS_PATH)


In [9]:
labels = [x['label'] for x in results]
metrics.plot_winning_percentage(labels, [x['win_rates'] for x in results], save_path=METRICS_PATH)
metrics.plot_cumulative_return([x['rewards'] for x in results], labels, save_path=METRICS_PATH)
metrics.plot_mean_visited_states_percentage([x['visits'] for x in results], labels, save_path=METRICS_PATH)
metrics.plot_all_agents_scores([x['scores'] for x in results], labels, save_path=METRICS_PATH)
metrics.plot_all_agents_scores_smoothed([x['scores'] for x in results], labels, save_path=METRICS_PATH)
metrics.plot_winning_percentage_over_episodes([x['win_statuses'] for x in results], labels, save_path=METRICS_PATH)
metrics.plot_state_action_distribution_all_logscale([x['visits'] for x in results], labels, save_path=METRICS_PATH)
metrics.plot_state_visitation_all([x['visit_percentages'] for x in results], labels, save_path=METRICS_PATH)