In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from decpomdp import DecPOMDP
import Classes
from constant import Constants
import time
import os



In [2]:
file_name = "recycling"
game_type = "zerosum"
planning_horizon = 3
num_iterations = 1
sota_ = False


In [3]:
#import problem
problem = DecPOMDP(file_name, 1,horizon=planning_horizon)
# constant = Constants(problem)
# utilities = Utilities(constant)
Classes.set_problem(problem)

print(f"game of type {game_type} initiated with SOTA set to = {sota_} with horizon {planning_horizon}")
print(f"game size :\n\t|S| = {len(problem.states)}")
print(f"\t|Z| = {problem.num_joint_observations}\n\t|U| = {problem.num_joint_actions} with |U_i| = {problem.num_actions[0]}")
print(f"intiial_belief : {problem.b0}")


print(f"{problem.num_actions[0]},{problem.num_actions[0]}")

problem set to recycling
game of type zerosum initiated with SOTA set to = False with horizon 3
game size :
	|S| = 4
	|Z| = 4
	|U| = 9 with |U_i| = 3
intiial_belief : [1. 0. 0. 0.]
3,3


In [4]:
# solve
def SOLVE(game):
    start_time = time.time()
    policy = game.solve(3,0.9)
    end_time = time.time()
    solve_time = end_time - start_time
    value_fn = game.value_function
    return policy,solve_time,value_fn

In [5]:
def initialize_database():
    database = {"gametype":[],
                "SOTA" : [],
                "horizon": [],
                    "num_iterations" : [],
                    "average_time" : [],
                    "number_of_beliefs" : [],
                    "leader_value_b0":[],
                    "follower_value_b0":[]
                    # "density" = []
                    # "gap":[]
                   
                    }
    return database

def add_to_database(database,horizon,game_type,num_iterations,average_time,num_beliefs,V0_B0,V1_B0,SOTA):
    database["gametype"].append(game_type)
    database["horizon"].append(horizon)
    database["SOTA"].append(SOTA)
    database["num_iterations"].append(num_iterations)
    database["average_time"].append(average_time)
    database["number_of_beliefs"].append(num_beliefs)
    database["leader_value_b0"].append(V0_B0)
    database["follower_value_b0"].append(V1_B0)
    # database["gap"].append(abs(V0_B0-V1_B0))
    # database["density"].append(density)
    return


In [6]:
database = initialize_database()
for sota_ in [True,False]:
    for horizon in range(1,planning_horizon+1):
        print(f"\n===== GAME WITH HORIZON {horizon} , SOTA {sota_} =====")
        game = Classes.PBVI(problem=problem,horizon=horizon,density=0.1,gametype=game_type,sota=sota_)
        policy, time_ , value_fn = SOLVE(game)
        num_beliefs = game.belief_space.belief_size()
        value0,value1= value_fn.get_values_initial_belief()
        add_to_database(database,horizon,game_type,2,time_,num_beliefs,value0,value1,sota_)

database = pd.DataFrame(database)
file_name = f"{file_name}_{game_type}_{horizon}_experiment_results.csv"
database.to_csv(file_name, index=False)




===== GAME WITH HORIZON 1 , SOTA True =====
belief space initialized for 1 timesteps with initial belief =  [array([1., 0., 0., 0.])]
	belief expansion done
iteration : 0


	backup at timestep 1 done
	backup at timestep 0 done
	backward induction done
iteration : 1
	backup at timestep 1 done
	backup at timestep 0 done
	backward induction done
iteration : 2
	backup at timestep 1 done
	backup at timestep 0 done
	backward induction done

===== GAME WITH HORIZON 2 , SOTA True =====
belief space initialized for 2 timesteps with initial belief =  [array([1., 0., 0., 0.])]
	belief expansion done
iteration : 0
	backup at timestep 2 done
	backup at timestep 1 done
	backup at timestep 0 done
	backward induction done
iteration : 1
	backup at timestep 2 done


In [None]:
print(f"RESULTS FOR {file_name}:\n")

Unnamed: 0,gametype,SOTA,horizon,num_iterations,average_time,number_of_beliefs,leader_value_b0,follower_value_b0
0,stackelberg,True,1,2,0.105332,1,"[-2.888888888888885, -2.888888888888885, -2.88...","[-52.611111111111114, -52.611111111111114, -52..."
1,stackelberg,True,2,2,0.789244,4,"[-5.77777777777777, -5.77777777777777, -5.7777...","[-52.611111111111114, -52.611111111111114, -52..."
2,stackelberg,True,3,2,2.827996,7,"[-8.666666666666655, -8.666666666666655, -8.66...","[-52.611111111111114, -52.611111111111114, -52..."
3,stackelberg,False,1,2,0.058213,1,"[-2.888888888888885, -2.888888888888885, -2.88...","[-52.611111111111114, -52.611111111111114, -52..."
4,stackelberg,False,2,2,0.918278,4,"[-5.777777777777775, -5.777777777777775, -5.77...","[-105.22222222222224, -105.22222222222224, -10..."
5,stackelberg,False,3,2,3.327659,7,"[-8.666666666666645, -8.666666666666645, -8.66...","[-157.83333333333337, -157.83333333333337, -15..."
