In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from decpomdp import DecPOMDP
import Classes
from Classes import Belief
from constant import Constants
import random
import time
import os


In [2]:
problems = {'dectiger','longFireFight','relay4',
            '2generals','oneDoor','prisoners',
            'boxPushing','GridSmall',
            'recycling','wirelessDelay','broadcastChannel'
            }

In [3]:
file_name = '2generals'
planning_horizon = 4
num_iterations = 1


In [4]:
#import problem
problem = DecPOMDP("wirelessDelay", 1,horizon=planning_horizon)
# constant = Constants(problem)
# utilities = Utilities(constant)
# Classes.set_problem(problem)

# # print(f"game of initiated with SOTA set to = {sota_} with horizon {planning_horizon}")
print("GAME DESCRIPTION :")
print(f"game size :\n\t|S| = {len(problem.states)}")
print(f"\t|Z| = {problem.num_joint_observations}\n\t|U| = {problem.num_joint_actions} with |U_i| = {problem.num_actions[0]}")
print(f"intiial_belief : {problem.b0}")
print(f"cooperative reward matrix :\n {problem.reward_fn_sa}\n\n\n")

print(f"{problem.num_actions[0]},{problem.num_actions[0]}")

GAME DESCRIPTION :
game size :
	|S| = 64
	|Z| = 36
	|U| = 4 with |U_i| = 2
intiial_belief : [0.25 0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
 0.   0.   0.25 0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
 0.   0.   0.   0.   0.25 0.   0.   0.   0.   0.   0.   0.   0.   0.
 0.   0.   0.   0.   0.   0.   0.25 0.   0.   0.   0.   0.   0.   0.
 0.   0.   0.   0.   0.   0.   0.   0.  ]
cooperative reward matrix :
 [[ 0. -1. -2. -3. -1. -2. -3. -4. -2. -3. -4. -5. -3. -4. -5. -6.  0. -1.
  -2. -3. -1. -2. -3. -4. -2. -3. -4. -5. -3. -4. -5. -6.  0. -1. -2. -3.
  -1. -2. -3. -4. -2. -3. -4. -5. -3. -4. -5. -6.  0. -1. -2. -3. -1. -2.
  -3. -4. -2. -3. -4. -5. -3. -4. -5. -6.]
 [ 0. -1. -2. -3. -1. -2. -3. -4. -2. -3. -4. -5. -3. -4. -5. -6.  0. -1.
  -2. -3. -1. -2. -3. -4. -2. -3. -4. -5. -3. -4. -5. -6.  0. -1. -2. -3.
  -1. -2. -3. -4. -2. -3. -4. -5. -3. -4. -5. -6.  0. -1. -2. -3. -1. -2.
  -3. -4. -2. -3. -4. -5. -3. -4. -5. -6.]
 [ 0. -1. -2. -3. -1. -2. -3. -

In [5]:
# solve
def SOLVE(game,iterations):
    start_time = time.time()
    policy,leader_values,follower_values = game.solve(iterations,0.9)
    end_time = time.time()
    solve_time = end_time - start_time
    return policy,leader_values,follower_values,solve_time,


In [6]:
def initialize_storage():
    database = {"gametype":[],
                "SOTA" : [],
                "horizon": [],
                    "num_iterations" : [],
                    "average_time" : [],
                    "number_of_beliefs" : [],
                    "ave_leader_value_b0":[],
                    "ave_follower_value_b0":[]
                    # "density" = []
                    # "gap":[]
                   
                    }
    policies = {"cooperative" : [] ,"zerosum":[],"stackelberg":[]}
    policy_comparison_matrix = {"cooperative" : [] ,"zerosum":[],"stackelberg":[]}
    return database,policies,policy_comparison_matrix

def add_to_database(database,horizon,game_type,num_iterations,average_time,num_beliefs,V0_B0,V1_B0,SOTA):
    sota = {True:"State of the Art" , False:"Stackelberg"}
    database["gametype"].append(game_type)
    database["horizon"].append(horizon)
    database["SOTA"].append(sota[SOTA])
    database["num_iterations"].append(num_iterations)
    database["average_time"].append(average_time)
    database["number_of_beliefs"].append(num_beliefs)
    database["ave_leader_value_b0"].append(V0_B0)
    database["ave_follower_value_b0"].append(V1_B0)
    # database["gap"].append(abs(V0_B0-V1_B0))
    # database["density"].append(density)
    return

def export_database(database):
    database = pd.DataFrame(database)
    path = "Results/"
    file= f"{file_name}_{planning_horizon}_{num_iterations}"
    database.to_csv(path+file, index=False)
    return


In [7]:
def policy_comparison_matrix(game):
    weak_leader_value = game.DP(game.belief_space.initial_belief,game.policies["weak"][0],game.policies["strong"][1])
    strong_leader_value = game.DP(game.belief_space.initial_belief,game.policies["strong"][0],game.policies["weak"][1])
    
    return weak_leader_value,strong_leader_value
        


In [8]:
problem = DecPOMDP(file_name,1,horizon=planning_horizon)
database,policies,policy_comparison_matrix = initialize_storage()

for gametype in ["cooperative","zerosum","stackelberg"]:
    for sota_ in [False,True]:
        for horizon in range(1,planning_horizon+1):
            print(f"\n============= {gametype} GAME WITH HORIZON {horizon} , SOTA {sota_} ===========")
            #initialize game with fixed planning horizon
            game = Classes.PBVI(problem=problem.set_horizon(horizon),horizon=horizon,density=0.1,gametype=gametype,sota=sota_)
            #solve game with num_iterations
            policy,leader_values,follower_values, time_  = SOLVE(game,num_iterations)
            #add values to database
            for iters in range(num_iterations):
                add_to_database(database,horizon,gametype,iters+1,time_,game.belief_space.belief_size(),leader_values[iters],follower_values[iters],sota_)
        policies[gametype].append(policy)

    

game.build_comparison_matrix(policy_comparison_matrix,policies)
print("Calculations done... exporting to csv....")
export_database(database)
policy_comparison_matrix



iteration : 1
	belief expansion done


	backup at timestep 1 done
	backup at timestep 0 done
	backward induction done
    extracting policy...
policy extraction done

iteration : 1
	belief expansion done
	backup at timestep 2 done
	backup at timestep 1 done
	backup at timestep 0 done
	backward induction done
    extracting policy...
policy extraction done

iteration : 1
	belief expansion done
	backup at timestep 3 done
	backup at timestep 2 done
	backup at timestep 1 done
	backup at timestep 0 done
	backward induction done
    extracting policy...
policy extraction done

iteration : 1
	belief expansion done
	backup at timestep 4 done
	backup at timestep 3 done
	backup at timestep 2 done
	backup at timestep 1 done
	backup at timestep 0 done
	backward induction done
    extracting policy...
policy extraction done

iteration : 1
	belief expansion done
	backup at timestep 1 done
	backup at timestep 0 done
	backward induction done
    extracting policy...
policy extraction done

iteration : 1
	belief expansion done
	backup at ti

{'cooperative': array([[-40., -10.],
        [-10., -40.]]),
 'zerosum': array([[-40.        ,  -1.        ],
        [ -7.5       , -30.15291916]]),
 'stackelberg': array([[-40., -10.],
        [-10., -40.]])}

game.value_function

In [25]:
games = ["cooperative","zerosum","stackelberg"] 
columns = pd.MultiIndex.from_product([games, ['Strong Follower', 'Weak Follower']], names=['Gametype', 'Follower type'])

for gametype in games:
    database = pd.DataFrame(policy_comparison_matrix[gametype],columns=["Strong F","Weak F"],index=["Strong L","Weak L"])
    path = "policy_matrix/"
    file= f"{file_name}_{gametype}_{horizon}_{num_iterations}"
    database.to_csv(path+file, index=False)

# Create a DataFrame from the list of lists
