In [1]:
import numpy as np
from random import Random
import pandas as pd
import gymnasium as gym
from gymnasium import spaces
from seeds import known_seeds
import evaluation
import utils
from action import ActionSpace

In [2]:
space = spaces.Box(low=0, high=1000000, shape=(6, 7), dtype=np.int32)
space.sample()

array([[ 49461, 217663, 243236, 165296,  25488, 296050, 880074],
       [840641, 429168,  93942, 932152, 547166, 335660, 346238],
       [419396, 665582, 973270, 673749, 491487, 811349,  65740],
       [312398, 372723,  31519, 419444, 693365, 476445, 441507],
       [195527, 121764, 598273, 316268, 456260, 296196, 868524],
       [495931, 578763, 166407, 148068, 688970, 100063, 732816]],
      dtype=int32)

In [3]:
    #old fleet function
    def init_fleet(self):
        fleet = {"DC1":{},"DC2":{},"DC3":{},"DC4":{}}
        for i in fleet.keys():
            for j in self.server_generations:
                fleet[i].update(j, {})
                fleet[i][j].update("servers", [])
                fleet[i][j].update("timestep_bought", [])
                fleet[i][j].update("total_owned", 0)
        return fleet

In [4]:

action_space = spaces.Box(low=0, high=3, shape=(4,7), dtype=np.int32)
action_space.sample()

array([[1, 1, 2, 2, 1, 3, 1],
       [2, 2, 2, 3, 0, 3, 3],
       [2, 1, 1, 0, 1, 3, 3],
       [1, 1, 2, 1, 1, 2, 2]], dtype=int32)

In [91]:
from gymnasium.wrappers import FlattenObservation # type: ignore
class CustomEnv(gym.Env):
    def __init__(self):
        #super(self).__init__()

        """
        define self.actionspace and self.observation_space below using 
        variables available in "gym.spaces"
        """

        #agent action space (actions it can make)
        #datacenter, servergen, action, where the action corresponds to the chosen number e.g. 0 = buy, 1=move etc
        self.action_space = spaces.Box(low=0, high=3, shape=(4,7), dtype=np.int32)
        
        self.default_demand, self.datacenters, self.servers, self.selling_prices = utils.load_problem_data()

        self.fleet_columns = ['datacenter_id', 'server_generation', 'server_id', 'action',
       'server_type', 'release_time', 'purchase_price', 'slots_size', 'energy_consumption', 
       'capacity', 'life_expectancy', 'cost_of_moving','average_maintenance_fee', 'cost_of_energy',
       'latency_sensitivity', 'slots_capacity', 'selling_price', 'lifespan', 'moved']

        #agent observation space (what the agent can "see"/information that is fed to agent)
        #a 3d array of latency, server_gen, demand, concatenated with 
        #a 3d array of latency, server_gen, supply
        self.observation_space = spaces.Box(low=0, high=1000000, shape=(6, 7), dtype=np.int32)

        self.seeds_array = known_seeds("training")
        self.seed_counter = 0

        self.server_generations = ['CPU.S1', 'CPU.S2', 'CPU.S3', 'CPU.S4', 'GPU.S1', 'GPU.S2', 'GPU.S3']
        self.latencies = ['low', 'medium', 'high']
        self.data_centers = ['DC1', 'DC2', 'DC3', 'DC4']

    #might need func below to convert agent action into a relevant action
    #def conv_agent_action_to_move(self, action):
    
    #returns mask for the action space based on possible plays
    #def valid_action_mask(self):
    
    #initiallise/reset all of the base variables at the end of the "game"
    #has to return a base/initial observation

    def convert_demand_to_observation(self, demand):
        demand_observation = np.zeros((3,7), np.int32)
        for i in range(len(self.server_generations)):
            servergen_demand = demand[demand["server_generation"] == self.server_generations[i]]
            for j in range(len(self.latencies)):
                latency_demand = servergen_demand[self.latencies[j]]
                demand_observation[j][i] = latency_demand.sum()
        return demand_observation

    def convert_fleet_to_observation(self, fleet):
        observed_fleet = np.zeros((3,7), np.int32)
        for i in range(len(self.data_centers[0:3])):
            for j in range(len(self.server_generations)):
                #filter for the datacenter
                filtered_dc = fleet[fleet["datacenter_id"] == self.data_centers[i]]
                #get sum of the server generation
                gen_total = filtered_dc[filtered_dc["server_generation"] == self.server_generations[j]].shape[0]
                if(self.server_generations[i] == "DC3"):
                    #get dc4 and add onto dc3 total
                    filtered_dc = fleet[fleet["datacenter_id"] == self.datacenters[i+1]]
                    gen_total += filtered_dc[filtered_dc["server_generation"] == self.server_generations[j]].shape[0]
                observed_fleet[i][j] = gen_total
        return observed_fleet

    def init_fleet(self):
        self.fleet = pd.DataFrame(columns=self.fleet_columns)

    def reset(self, seed=None, options=None):
        self.timestep = 1
        self.seed_counter += 1
        self.seed_counter %= 10
        np.random.seed(self.seeds_array[self.seed_counter])

        self.actionspace = ActionSpace()

        self.init_fleet()
        self.demand = evaluation.get_actual_demand(self.default_demand)
        self.timestep_demand = self.demand[self.demand["time_step"] == self.timestep]
        observation_demand = self.convert_demand_to_observation(self.timestep_demand)
        observation_fleet = self.convert_fleet_to_observation(self.fleet)
        observation = np.concatenate((observation_demand, observation_fleet))
        self.done = False
        return observation, {}

    # def buy_demand(self, datacenter, server_gen, timestep_demand):
    #     self.fleet
    
    #buys "number" amount of servers at datacenter
    #NO VALIDITY CHECKS RIGHT NOW
    def buy(self, datacenter, server_gen, number=10):
        ts_fleet = pd.DataFrame(columns=self.fleet_columns[0:3])
        fleet_array = []
        datacenter_array =[]
        server_gen_array=[]
        buy_array=[]
        server_id_array=[]
        for i in range(number):
            server_id = self.actionspace.generate_server_id(server_gen)
            datacenter_array.append(datacenter)
            server_gen_array.append(server_gen)
            server_id_array.append(server_id)
            buy_array.append("buy")
            fleet_array.append([datacenter, server_gen, server_id, "buy"])
        temp = pd.DataFrame({"datacenter_id":datacenter_array, "server_generation":server_gen_array,
         "server_id": server_id_array, "action": buy_array})


        ts_fleet = pd.concat([ts_fleet, temp])
        ts_fleet = ts_fleet.merge(self.servers, on='server_generation', how='left')
        ts_fleet = ts_fleet.merge(self.datacenters, on='datacenter_id', how='left')
        ts_fleet = ts_fleet.merge(self.selling_prices, 
                            on=['server_generation', 'latency_sensitivity'], 
                            how='left')
        ts_fleet.fillna(0)
        self.fleet = pd.concat([self.fleet, ts_fleet])

    #called in a loop where each time it is called the agent chooses an action and change
    #state of game appropriately according to agent action
    def step(self, action):
        """
        after agent move, do it, calc the new observation state
        and the reward from that move it made, if "end" of game set self.done to True

        """

        actions = self.actionspace.convert_actionspace_to_actionV2(action)

        for i in actions:
            if(i[0] == "buy"):
                self.buy(datacenter = i[3], server_gen = i[1])

        
        Zf = evaluation.get_capacity_by_server_generation_latency_sensitivity(self.fleet)
        D = evaluation.get_time_step_demand(self.demand, self.timestep)
        U = evaluation.get_utilization(D, Zf)
        #check if fleet is empty
        if self.fleet.shape[0] > 0:
            # get the server capacity at timestep
            Zf = evaluation.get_capacity_by_server_generation_latency_sensitivity(self.fleet)

            # evaluate objective function at current timestep
            U = evaluation.get_utilization(D, Zf)
    
            L = evaluation.get_normalized_lifespan(self.fleet)
    
            P = evaluation.get_profit(D, 
                           Zf, 
                           self.selling_prices,
                           self.fleet)
                           
            o = U * L * P

            OBJECTIVE += o
            reward = U + L + P + o + (OBJECTIVE/self.time_step)
        else:
            reward = -5

        #reached final timestep
        if(self.timestep == 168):
            self.done = True
        self.timestep += 1

        #extra info on the game if wanted for yourself
        info = {}
        truncated = False
        
        self.timestep_demand = self.demand.query("time_step == {self.timestep}")
        
        observation_demand = self.convert_demand_to_observation(self.timestep_demand)
        observation_fleet = self.convert_fleet_to_observation(self.fleet)
        observation = np.concatenate((observation_demand, observation_fleet))

        return (observation, reward, self.done, truncated, info)


In [92]:
from stable_baselines3.common.env_checker import check_env
env = CustomEnv()
check_env(env)

  self.fleet = pd.concat([self.fleet, ts_fleet])


KeyError: 'high'