In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from decpomdp import DecPOMDP
from constant import Constants
import time
import os



In [2]:
file_name = "dectiger"
game_type = "cooperative"
planning_horizon = 10
num_iterations = 2
sota_ = True


In [3]:
#import problem
problem = DecPOMDP(file_name,horizon=planning_horizon)
Constants.initialize(problem)
import Classes

print(f"game of type {game_type} initiated with SOTA set to = {sota_} with horizon {planning_horizon}")
print(f"game size :\n\t|S| = {len(problem.states)}")
print(f"\t|Z| = {problem.num_joint_observations}\n\t|U| = {problem.num_joint_actions} with |U_i| = {problem.num_actions[0]}")
print(f"intiial_belief : {problem.b0}")


print(f"{problem.num_actions[0]},{problem.num_actions[0]}")

game of type cooperative initiated with SOTA set to = True with horizon 10
game size :
	|S| = 2
	|Z| = 4
	|U| = 9 with |U_i| = 3
intiial_belief : [0.5 0.5]
3,3


In [4]:
# solve
def SOLVE(game):
    start_time = time.time()
    alpha_value, point_value = game.solve(num_iterations,0.8 )
    end_time = time.time()
    solve_time = end_time - start_time
    return point_value,solve_time

In [5]:
def initialize_database():
    database = {"gametype":[],
                "SOTA" : [],
                "horizon": [],
                    "num_iterations" : [],
                    "average_time" : [],
                    "number_of_beliefs" : [],
                    "ave_leader_value_b0":[],
                    "ave_follower_value_b0":[]
                    # "density" = []
                    # "gap":[]
                   
                    }
    return database

def add_to_database(database,horizon,game_type,num_iterations,average_time,num_beliefs,V0_B0,V1_B0,SOTA):
    sota = {True:"State of the Art" , False:"Stackelberg"}
    database["gametype"].append(game_type)
    database["horizon"].append(horizon)
    database["SOTA"].append(sota[SOTA])
    database["num_iterations"].append(num_iterations)
    database["average_time"].append(average_time)
    database["number_of_beliefs"].append(num_beliefs)
    database["ave_leader_value_b0"].append(V0_B0)
    database["ave_follower_value_b0"].append(V1_B0)
    # database["gap"].append(abs(V0_B0-V1_B0))
    # database["density"].append(density)
    return


In [6]:
database = initialize_database()
for gametype in ["stackelberg","cooperative","zerosum"]:
    for sota in [False,True]:
        for horizon in range(1,planning_horizon+1):
            print(f"\n===== GAME WITH HORIZON {horizon} , SOTA {sota} =====")
            game = Classes.PBVI(problem=problem,horizon=horizon,density=0.1,gametype=gametype,limit=200,sota=sota)
            point_value, time_ = SOLVE(game)
            num_beliefs = game.belief_space.belief_size()
            add_to_database(database,horizon,gametype,2,time_,num_beliefs,point_value[0][0],point_value[1][0],sota)
#POLICY PRINTING: 
# print("\nLEADER POLICY\n")
# policy[0].print_trees()
# print("\nFOLLOWER POLICY\n")
# policy[1].print_trees()        



database = pd.DataFrame(database)
path = "Results/"
file_name = f"{file_name}_{game_type}_{horizon}_experiment_results.csv"
database.to_csv(path+file_name, index=False)




===== GAME WITH HORIZON 1 , SOTA False =====
	belief expansion done, belief space size = 3

iteration : 0
payoffs :
Leader
 [  -2.  -46.  -46.  -46.  -15. -100.  -46. -100.  -15.] 
Follower
 [-53.5 -52.5 -68.  -78.5 -47.5 -11.   -9.5 -58.  -72. ]



Game stackelberg  ::  Original: -2.888888888888885  --  Reconstructed: (-2.888888888888885, -52.611111111111114)   --  belief [0.5 0.5]  -- DR [0.9797979797979799, 0, 0, 0, 0, 0, 0.02020202020202011, 0, 0]
[array([-1.77777778, -4.        ]), array([-19.33333333, -85.88888889])]
payoffs :
Leader
 [  -2.  -46.  -46.  -46.  -15. -100.  -46. -100.  -15.] 
Follower
 [-53.5 -52.5 -68.  -78.5 -47.5 -11.   -9.5 -58.  -72. ]

	alpha backup at timestep 0 done
	backward induction done
iteration : 1
payoffs :
Leader
 [  -2.  -46.  -46.  -46.  -15. -100.  -46. -100.  -15.] 
Follower
 [-53.5 -52.5 -68.  -78.5 -47.5 -11.   -9.5 -58.  -72. ]

Game stackelberg  ::  Original: -2.888888888888885  --  Reconstructed: (-2.888888888888885, -52.611111111111114)   --  belief [0.5 0.5]  -- DR [0.9797979797979799, 0, 0, 0, 0, 0, 0.02020202020202011, 0, 0]
[array([-1.77777778, -4.        ]), array([-19.33333333, -85.88888889])]
payoffs :
Leader
 [  -2.  -46.  -46.  -46.  -15. -100.  -46. -100.  -15.] 
Follower
 [

In [7]:
database

Unnamed: 0,gametype,SOTA,horizon,num_iterations,average_time,number_of_beliefs,ave_leader_value_b0,ave_follower_value_b0
0,stackelberg,Stackelberg,1,2,0.072933,3,-2.888889,-52.611111
1,stackelberg,Stackelberg,2,2,0.236112,3,8.93696,-103.450839
2,stackelberg,Stackelberg,3,2,0.38881,3,12.846646,-219.682645
3,stackelberg,Stackelberg,4,2,0.624644,3,37.540506,-492.386338
4,stackelberg,Stackelberg,5,2,0.841611,3,77.373495,-972.66988
5,stackelberg,Stackelberg,6,2,0.961995,3,165.467181,-1954.337601
6,stackelberg,Stackelberg,7,2,1.184859,3,331.601845,-3833.792816
7,stackelberg,Stackelberg,8,2,1.309391,3,631.795179,-7724.137431
8,stackelberg,Stackelberg,9,2,1.66561,3,1325.901585,-15491.204097
9,stackelberg,Stackelberg,10,2,1.745307,3,2626.256046,-31019.700303


In [8]:
print(f"RESULTS FOR {file_name}:\n")

RESULTS FOR dectiger_cooperative_10_experiment_results.csv:

