# Multi Agent Reinforcement Learning
In this notebook we create several reinforcement learning environments, based on *open AI*'s FrozenLake game:
- a single-agent frozen lake environment
- a multi-agent/ single goal environment
- a multi-agent/ 4 goals environment

In [None]:
import numpy as np
import gymnasium as gym
from gymnasium import spaces
import matplotlib.pyplot as plt
import time
import random
import sys
import pygame
from collections import defaultdict
import os
import colorsys

from utils import run_simulation, visualizePolicyCommonGoal
from environments import MAPS, FrozenLakeOneGoal, createMap,FrozenLake4goals
from algorithms import SingleGoalCentralQLearning, RandomPolicy, IndependentQLearning, AlternatingIQL

## Single agent

## Multi agents
### Common goal

In [None]:
if __name__ == "__main__":
    print("Training the agents...")
    
    num_agent = 2
    
    n_ep            = int(10e4)
    learning_rate   = 0.1
    discount_factor = 0.9
    explo_rate      = 5.0
    explo_decay     = 0.999
    min_explo_rate  = 0.05
    map_name        = '4x4'
    
    if map_name == None:
        map_size = 4
    else:
        map_size = map_name[0]
        
    seed            = 0
    
    map_    = createMap(num_agent, map_size, seed=seed, map_name=None)
    env     = FrozenLakeOneGoal(map_=map_, max_steps=100, num_agents=num_agent)
    
    trained_agent   = run_simulation(map_, num_agent, learning_rate, discount_factor, explo_rate, explo_decay, min_explo_rate, num_episodes=n_ep)
    print("Training complete!")
    
    # Visualize the learned policy
    print("Visualizing the learned policy...")
    visualizePolicyCommonGoal(env, map_, trained_agent, num_episodes=4, num_agents=num_agent)
    

### 4 goals

In [4]:
n_agents=4
env_params={"num_agents":n_agents, 
                "grid_size":(8, 8), 
                "slip_prob":0., 
                "hole_prob":0.3, 
                "seed":25, 
                "collaboration_bonus":0,
                "collision_penalty":30}
env=FrozenLake4goals(**env_params)
env.render()

G H . . . . . G
. . . H . . H H
. H H H . H . .
. H . A . . . A
. . . A . . . H
. H H A H . . .
H H H H . . H .
G . . . . H . G



In this environment there is 4 goals, the maximum reward is attained if all agents are evenly reparted:
- we add a collision penalty of 30 everytime 2 agents are on the same tile
- each goal is worth 100 at first and each time an agent reaches the reward the next reward is halved 

In [None]:
iql_params = {
            "learning_rate": 0.3,           # How quickly the agent incorporates new information (alpha)
            "discount_factor": 0.99,        # How much future rewards are valued (gamma)
            "exploration_rate": 1.0,        # Initial exploration rate (epsilon)
            "min_exploration_rate": 0.05,   # Minimum exploration rate
            "exploration_decay": 0.999,     # How quickly exploration decreases
        }
##train params
max_episodes=10000   
max_steps=200

iql = IndependentQLearning(env,**iql_params)
results = iql.train(episodes=max_episodes, max_steps=max_steps,verbose=True)
print("\nPolitiques indépendantes apprises:")
iql.render_policy()

