In [1]:
pip install stable_baselines3

Collecting stable_baselines3
  Downloading stable_baselines3-2.3.2-py3-none-any.whl.metadata (5.1 kB)
Collecting gymnasium<0.30,>=0.28.1 (from stable_baselines3)
  Downloading gymnasium-0.29.1-py3-none-any.whl.metadata (10 kB)
Collecting farama-notifications>=0.0.1 (from gymnasium<0.30,>=0.28.1->stable_baselines3)
  Downloading Farama_Notifications-0.0.4-py3-none-any.whl.metadata (558 bytes)
Downloading stable_baselines3-2.3.2-py3-none-any.whl (182 kB)
Downloading gymnasium-0.29.1-py3-none-any.whl (953 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m953.9/953.9 kB[0m [31m6.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading Farama_Notifications-0.0.4-py3-none-any.whl (2.5 kB)
Installing collected packages: farama-notifications, gymnasium, stable_baselines3
Successfully installed farama-notifications-0.0.4 gymnasium-0.29.1 stable_baselines3-2.3.2
Note: you may need to restart the kernel to use updated packages.


In [2]:
import os
from typing import List
import matplotlib.pyplot as plt
import numpy as np
from stable_baselines3 import SAC

from src.game.ml.ml_environment import create_environment
from stable_baselines3.common.evaluation import evaluate_policy

In [6]:

model_name = "ppo_trained_model"

path = %pwd
output_path = os.path.join(path, "output")
model_path = os.path.join(output_path, model_name)

initial_bankroll = 68000
num_episodes = 1000

model = SAC.load(model_path)

total_rewards = []
bet_sizes = [[] for _ in range(3)]  # List for each round
final_bankrolls = []

env = create_environment(initial_bankroll)

for _ in range(num_episodes):
    obs = env.reset()
    if isinstance(obs, tuple):  # New Gymnasium API
        obs = obs[0]
    done = False
    episode_reward = 0
    round_count = 0

    while not done:
        action, _ = model.predict(obs, deterministic=True)
        step_result = env.step(action)

        if len(step_result) == 5:  # New Gymnasium API
            obs, reward, terminated, truncated, info = step_result
            done = terminated or truncated
        else:  # Old Gym API
            obs, reward, done, info = step_result

        episode_reward += reward

        if round_count < 3:
            # Assuming action[0] is the bet percentage
            bet_sizes[round_count].append(action[0])
        round_count += 1

    total_rewards.append(episode_reward)
    final_bankrolls.append(obs[0])  # Assuming obs[0] is the bankroll

In [7]:
print(f"Average reward over {num_episodes} episodes: {np.mean(total_rewards)}")
print(f"Standard deviation of rewards: {np.std(total_rewards)}")
print(f"Best reward: {np.max(total_rewards)}")
print(f"Worst reward: {np.min(total_rewards)}")
print(f"Average final bankroll: {np.mean(final_bankrolls)}")

# Plot average bet sizes per round
plt.figure(figsize=(10, 6))
rounds = ['Round 1', 'Round 2', 'Round 3']
avg_bets = [np.mean(bets) for bets in bet_sizes]
plt.bar(rounds, avg_bets)
plt.title('Average Bet Size per Round')
plt.ylabel('Bet Size (%)')
plt.savefig(os.path.join(output_path, 'average_bets.png'))
plt.close()

# Plot distribution of final bankrolls
plt.figure(figsize=(10, 6))
plt.hist(final_bankrolls, bins=50)
plt.title('Distribution of Final Bankrolls')
plt.xlabel('Bankroll')
plt.ylabel('Frequency')
plt.savefig(os.path.join(output_path, 'final_bankrolls.png'))
plt.close()

Average reward over 1000 episodes: 1.1311862468719482
Standard deviation of rewards: 1.1550878286361694
Best reward: 2.470588207244873
Worst reward: -1.4117647409439087
Average final bankroll: 0.2916666865348816
