## Gymnasium

In [2]:
import random

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import torch
import torch.nn as nn
from torch.distributions.normal import Normal
import gymnasium as gym
from gymnasium.wrappers import FlattenObservation
from skrl.models.torch.deterministic import DeterministicMixin
from skrl.models.torch import Model
from skrl.memories.torch import RandomMemory



### Blackjack

In [30]:
env = gym.make('Blackjack-v1', natural=False, sab=False)

In [31]:
class DeterministicActor(DeterministicMixin, Model):
    def __init__(self, observation_space, action_space, device="cpu", clip_acitons=False):
        Model.__init__(self, observation_space, action_space, device)
        DeterministicMixin.__init__(self, clip_acitons)
        
        # if isinstance(observation_space, gym.spaces.Tuple):
        #     self.observation_spaces = observation_space.spaces
        #     self.num_observations = sum([space.shape[0] for space in self.observation_spaces])
        # else:
        #     raise ValueError("Observation space must be of type gym.spaces.Tuple")
        
        self.linear_layer_1 = nn.Linear(self.num_observations, 32)
        self.linear_layer_2 = nn.Linear(32, 16)
        self.action_layer = nn.Linear(16, self.num_actions)
    
    def compute(self, inputs, role):
        concatenated_inputs = torch.cat(
            [inputs["states"][space_idx] for space_idx in range(len(self.observation_spaces))],
            dim=1
        )
        
        x = F.relu(self.layer_1(concatenated_inputs))
        x = F.relu(self.layer_2(x))
        return torch.sigmoid(self.action_layer(x)), {}

In [32]:
class DeterministicCritic(DeterministicMixin, Model):
    def __init__(self, observation_space, action_space, device, clip_acitons=False):
        Model.__init__(self, observation_space, action_space, device)
        DeterministicMixin.__init__(self, clip_acitons)
        
        self.linear_layer_1 = nn.Linear(self.num_observations + self.num_actions, 32)
        self.linear_layer_2 = nn.Linear(32, 16)
        self.linear_layer_3 = nn.Linear(16, 1)
    
    def compute(self, inputs, role):
        x = F.relu(self.layer_1(torch.cat([inputs["states"], inputs["taken_action"]], dim=1)))
        x = F.relu(self.layer_2(x))
        return self.linear_layer_3(x), {}

In [33]:
memory = RandomMemory(memory_size=1000)

In [34]:
models = {}
models["policy"] = DeterministicActor(gym.spaces.flatten_space(env.observation_space), env.action_space)


In [35]:
type(env.observation_space)

gymnasium.spaces.tuple.Tuple

gymnasium.spaces.discrete.Discrete

In [21]:
obsv_space

(Discrete(32), Discrete(11), Discrete(2))

In [26]:
gym.spaces.flatten_space(env.observation_space)

Box(0, 1, (45,), int64)