In [2]:
!git clone https://github.com/jezknee/StartupsGame.git

Cloning into 'StartupsGame'...
remote: Enumerating objects: 259, done.[K
remote: Counting objects: 100% (259/259), done.[K
remote: Compressing objects: 100% (180/180), done.[K
remote: Total 259 (delta 166), reused 167 (delta 76), pack-reused 0 (from 0)[K
Receiving objects: 100% (259/259), 2.52 MiB | 6.48 MiB/s, done.
Resolving deltas: 100% (166/166), done.


In [3]:
import sys
import os

# Get the current working directory
current_dir = os.getcwd()

# Add the directory containing your .py files to the Python path
# Assuming the cloned repository is named 'StartupsGame' and is in the current directory
repo_path = os.path.join(current_dir, 'StartupsGame')
sys.path.append(repo_path)

print(f"Added {repo_path} to the system path.")

Added /content/StartupsGame to the system path.


In [4]:
from ai_agent import Agent
import numpy as np
import gymnasium as gym
import matplotlib.pyplot as plt
import traceback
import sys
import startups_AI_game as sg
import startups_RL_environment as sr
import pandas as pd
from datetime import datetime
import time

def plotLearning(x, scores, eps_history, filename):
    print("Creating plot...")
    try:
        fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(10, 8))

        # Plot scores
        ax1.plot(x, scores, 'b-', alpha=0.7, label='Score')
        ax1.set_xlabel('Episode')
        ax1.set_ylabel('Score')
        ax1.set_title('Training Scores')
        ax1.grid(True)
        ax1.legend()

        # Plot epsilon values
        ax2.plot(x, eps_history, 'r-', alpha=0.7, label='Epsilon')
        ax2.set_xlabel('Episode')
        ax2.set_ylabel('Epsilon')
        ax2.set_title('Epsilon Decay')
        ax2.grid(True)
        ax2.legend()

        plt.tight_layout()
        plt.savefig(filename, dpi=300, bbox_inches='tight')
        print(f"Plot saved as {filename}")
        plt.show()
    except Exception as e:
        print(f"Error in plotting: {e}")
        traceback.print_exc()

print("Script starting...")



if __name__ == '__main__':
    print("Main block entered")
    default_companies = [["Giraffe Beer", 5],["Bowwow Games",6],["Flamingo Soft",7],["Octo Coffee", 8],["Hippo Powertech", 9],["Elephant Mars Travel", 10]]
    player_actions_pick_up = ["pickup_deck", "pickup_market"]
    player_actions_put_down = ["putdown_shares", "putdown_market"]

    try:
        print("Creating environment...")
        env = sr.StartupsEnv(total_players=4, num_humans=0, default_company_list=default_companies)
        print(f"Environment created successfully. Action space: {env.action_space}, Observation space: {env.observation_space}")


        print("Creating agent...")
        # make input_dims match the observation space without hardcoding
        agent = Agent(alpha=0.0005, gamma=0.99, n_actions=env.action_space.n, epsilon=1.0, batch_size=64, input_dims=env.observation_space.shape[0], epsilon_dec=0.996, epsilon_end=0.01, mem_size=1000000, fname='C:\\Users\\jezkn\\OneDrive\\Documents\\Startups\\StartupsGame\\startup_model4.keras')
        print("Agent created successfully")

        game_history = []
        action_history = []

        scores = []
        eps_history = []
        num_episodes = 1

        for i in range(num_episodes):
            print(f"Starting episode {i}")
            done = False
            score = 0
            episode_start_time = time.time()  # Add this line

            try:
                observation, info = env.reset()
                print(f"Episode {i} - Initial observation shape: {observation.shape}")
            except Exception as e:
                print(f"Error during env.reset(): {e}")
                traceback.print_exc()
                continue

            step_count = 0
            rl_actions_taken = 0
            max_steps = 50

            while not done and step_count < max_steps:
                try:
                    current_phase = env.state_controller.get_current_phase()
                    print(f"Step {step_count}, Phase: {current_phase}")

                    if current_phase in (sr.TurnPhase.RL_PUTDOWN, sr.TurnPhase.RL_PICKUP):
                        action = agent.choose_action(observation, env)
                        print(f"RL agent choosing action {action}")
                        rl_actions_taken += 1

                        observation_, reward, terminated, truncated, info = env.step(action)
                        done = terminated or truncated

                        if 'invalid_action' in info and info['invalid_action']:
                            print(f"Invalid action taken: {action}")

                        agent.remember(observation, action, reward, observation_, done)
                        observation = observation_
                        score += reward

                        if agent.memory.mem_cntr > agent.batch_size:
                            agent.learn()

                        print(f"Action {action}, Reward: {reward}, Score: {score}")

                    else:
                        # Not RL agent's turn - step anyway to advance game state
                        # Pass a dummy action (0) since other players will be handled internally
                        observation_, reward, terminated, truncated, info = env.step(0)
                        done = terminated or truncated
                        observation = observation_
                        print(f"Other players' turn, game state advanced")

                    step_count += 1

                except Exception as e:
                    print(f"Error during step {step_count} of episode {i}: {e}")
                    traceback.print_exc()
                    break

            if step_count >= max_steps:
                print(f"Episode {i} exceeded {max_steps} steps, ending...")

            episode_runtime = time.time() - episode_start_time
            game_history.append({
                'episode': i,
                'score': score,
                'runtime': episode_runtime,
                'steps': step_count,
                'rl_actions': rl_actions_taken,
                'epsilon': agent.epsilon,
                'rl_rank': env._calculate_player_rank()+1,
                'rl_coins': env._get_coins_for_score()
            })
            eps_history.append(agent.epsilon)
            scores.append(score)

            avg_score = np.mean(scores[max(0, i-100):i+1])
            print(f'episode {i}, score {score:.2f}, average score {avg_score:.2f}, epsilon {agent.epsilon:.3f}')

            if i % 100 == 0 and i > 0:
                try:
                    agent.save_model()
                    print(f"Model saved at episode {i}")
                except Exception as e:
                    print(f"Error saving model: {e}")

        # Final model save
        try:
            agent.save_model()
            print("Final model saved")
        except Exception as e:
            print(f"Error saving final model: {e}")

        # After training loop, before plotting
        print("Saving game history...")
        pd.DataFrame(game_history).to_csv('C:\\Users\\jezkn\\OneDrive\\Documents\\Startups\\game_history.csv', index=False)
        print(f"Saved history for {len(game_history)} episodes")

        print("Training completed, creating plot...")
        filename = 'C:\\Users\\jezkn\\OneDrive\\Documents\\Startups\\startups_plot.png'
        x = [i+1 for i in range(len(scores))]
        plotLearning(x, scores, eps_history, filename)
        print("Script completed successfully")

    except Exception as e:
        print(f"Error occurred: {e}")
        traceback.print_exc()
        sys.exit(1)

print("Script finished")

Script starting...
Main block entered
Creating environment...
Environment created successfully. Action space: Discrete(19), Observation space: Box(-inf, inf, (71,), float32)
Creating agent...
No saved model found. Building new model...
Agent created successfully
Starting episode 0
Episode 0 - Initial observation shape: (71,)
Step 0, Phase: TurnPhase.OTHER_PLAYERS
Player 1 picks up from deck.
Player 1 puts down Flamingo Soft putdown_shares.
The market is now {}
Player 1's shares are now: {'Flamingo Soft': 1}
Player 1's anti-monopoly chips are now {'Flamingo Soft'}
RL agent's turn. Hand size: 3
Step starting - Current phase: TurnPhase.RL_PICKUP, Hand size: 3
Executing action Action Type: pickup_deck, Card: None in phase TurnPhase.RL_PICKUP
Hand size before action: 3
Pickup executed. New hand size: 4
Changing phase. Current phase: TurnPhase.RL_PICKUP, Hand size: 4
New phase: TurnPhase.RL_PUTDOWN, Hand size: 4
Other players' turn, game state advanced
Step 1, Phase: TurnPhase.RL_PUTDOWN
RL 

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
The market is now {'Flamingo Soft': 1}
Player 2's shares are now: {}
Player 2's anti-monopoly chips are now set()
Putdown executed. New hand size: 3
Changing phase. Current phase: TurnPhase.RL_PUTDOWN, Hand size: 3
New phase: TurnPhase.OTHER_PLAYERS, Hand size: 3
Player 3 picks up from deck.
Player 3 puts down Elephant Mars Travel putdown_shares.
The market is now {'Flamingo Soft': 1}
Player 3's shares are now: {'Hippo Powertech': 1, 'Elephant Mars Travel': 1}
Player 3's anti-monopoly chips are now {'Hippo Powertech', 'Elephant Mars Travel'}
Player 4 puts down Elephant Mars Travel Action Type: putdown_shares, Card: Elephant Mars Travel.
The market is now {'Flamingo Soft': 1}
Player 4's shares are now: {'Elephant Mars Travel': 1}
Player 4's anti-monopoly chips are now set()
Player 1 picks up from deck.
Player 1 puts down Hippo Powertech putdown_shares.
The market is now {'Flamingo Soft': 1}
Player 1's shares are now: {'Fla

KeyboardInterrupt: 