In [95]:
import numpy as np
import torch
import torch.optim as optim

from ppo_utils import (
    a_gae,
    tc_loss_function,
    ratio,
    r_gamma,
    v_loss,
    get_action_from_probs,
    make_model_value_function
)
from card_representation import CardRepresentation
from action_representation import ActionRepresentation
from siamese_net import PseudoSiameseNet, logits_to_probs, clone_model_weights

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [96]:
# This is a hardcoded example
def build_card_rep_for_state(state: str) -> CardRepresentation:
    """
    Given a 'state' like 'Preflop', 'Flop', 'Turn', 'River', or 'Showdown',
    build an example CardRepresentation. This is a toy demonstration:
      - Preflop: sets hole cards only
      - Flop: sets hole + flop
      - Turn: sets hole + flop + turn
      - River or Showdown: sets hole + flop + turn + river
    """
    cr = CardRepresentation()

    # For demonstration, we always use the same hole cards: As, Ac
    cr.set_preflop([(12,3), (12,2)])  # rank=12 => Ace, suits=3 => clubs, 2 => diamonds

    if state in ['Flop', 'Turn', 'River', 'Showdown']:
        flop_cards = [(7,1), (3,3), (9,2)]  # e.g. 9d, 5s, Jc
        cr.set_flop(flop_cards)

    if state in ['Turn', 'River', 'Showdown']:
        turn_card = (5,0)  # 7h
        cr.set_turn(turn_card)

    if state in ['River', 'Showdown']:
        river_card = (11,3) # Ks
        cr.set_river(river_card)

    return cr


def get_action(state: str, player: int):
    """
    Given a 'state' like 'Preflop', 'Flop', 'Turn', 'River', or 'Showdown',
    and a 'player' (0 or 1), return an action for that player.
    This is a toy demonstration.

    Returns a list of 4 elements:
        - round_id: 0-3
        - action_index_in_round: 0-5
        - player_id: 0, 1
        - action_idx: 0-8, depending on the action_type
    """
    if state == 'Preflop':
        if player == 0:
            return [0, 0, 0, 6] # hero (player_id=0) bets pot
        else:
            return [0, 1, 1, 2] # villain (player_id=1) calls
    elif state == 'Flop':
        if player == 0:
            return [1, 0, 0, 3] # hero (player_id=0) bets small
        else:
            return [1, 1, 1, 2] # villain (player_id=1) calls
    elif state == 'Turn':
        if player == 0:
            return [2, 0, 0, 1] # hero (player_id=0) checks
        else:
            return [2, 1, 1, 1] # villain (player_id=1) checks
    elif state == 'River':
        if player == 0:
            return [3, 0, 0, 8] # hero (player_id=0) shoves
        else:
            return [3, 1, 1, 2] # villain (player_id=1) calls

def build_action_rep_for_state(state: str) -> ActionRepresentation:
    """
    Builds an ActionRepresentation for a given 'state'.
    We'll fill it with a minimal set of actions so far.
    """
    ar = ActionRepresentation(nb=9, max_actions_per_round=6, rounds=4)

    # Suppose on Preflop we have 2 actions that happened:
    #  - hero (player_id=0) bet pot => action_idx=6
    #  - villain (player_id=1) calls => action_idx=1
    # We'll fill those for *all* states up to Flop, 
    # then add more as we get further into the hand.
    # ar.add_action(0, 0, 0, 6, legal_actions=range(9))  # channel=0
    # ar.add_action(0, 1, 1, 2, legal_actions=range(9))  # channel=1

    # if state in ['Flop','Turn','River','Showdown']:
    #     # Let's say on the Flop there's 1 action: hero bets small => action_idx=2
    #     ar.add_action(1, 0, 0, 3, legal_actions=range(9))  # channel=6 (round_id=1, index=0)

    # if state in ['Turn','River','Showdown']:
    #     # On Turn, hero checks => action_idx=1
    #     ar.add_action(2, 0, 0, 1, legal_actions=range(9))  # channel=12

    # if state in ['River','Showdown']:
    #     # On River, hero shoves => action_idx=8, for example
    #     ar.add_action(3, 0, 0, 8, legal_actions=range(9))  # channel=18

    states = ['Preflop', 'Flop', 'Turn', 'River', 'Showdown']
    for prev_state in states:
        if prev_state == state: # This is inside the loop because the Hero is player 0. This way on the last iteration we don't add the villain's action
            break
        for player in [0, 1]:
            round_id, action_index_in_round, player_id, action_idx = get_action(prev_state, player)
            ar.add_action(round_id, action_index_in_round, player_id, action_idx)

    return ar

def build_reward_for_state(state: str) -> float:
    """
    Builds a reward for a given 'state'.
    We'll use a simple reward scheme:
      - Preflop: -20
      - Flop: -20
      - Turn: -80
      - River: 0
      - Showdown: 240
    """
    rewards = {'Preflop': -20, 'Flop': -20, 'Turn': -80, 'River': 0, 'Showdown': 240} #Showdown reward gets added to the River reward, so there are equal actions taken and results and states
    return rewards[state]


def get_deltas(state):
    """
    Return (delta1, delta2, delta3) for the given street
    in the trinal-clip PPO approach.
    """
    delta1 = 3
    if state == 'Preflop':
        delta2, delta3 = 20, 10
    elif state == 'Flop':
        delta2, delta3 = 40, 20
    elif state == 'Turn':
        delta2, delta3 = 120, 80
    elif state == 'River':
        delta2, delta3 = 120, 120
    elif state == 'Showdown':
        delta2, delta3 = 120, 120
    return (delta1, delta2, delta3)


In [97]:


def to_torch_input(card_rep: np.array, action_rep: np.array):
    """
    Convert the card_rep/card_tensor and action_rep/action_tensor
    to torch tensors of shape (1,...) for the siamese model.
    """
    card_np = card_rep[np.newaxis, ...]      # (1,6,4,13)
    action_np = action_rep[np.newaxis, ...]# (1,24,4,9)
    card_t = torch.from_numpy(card_np).float()
    action_t = torch.from_numpy(action_np).float()
    return action_t, card_t

# def run_one_iteration_old(iter_idx: int, old_policy_net: PseudoSiameseNet, new_policy_net: PseudoSiameseNet,
#                       optimizer: optim.Optimizer):
#     """
#     Demonstrates a single iteration (episode) with states=[Preflop,Flop,Turn,River,Showdown]
#     and rewards=[-20, -20, -80, 0, 240].
#     We'll do partial PPO logic: 
#       - compute advantage,
#       - old/new policy ratio,
#       - trinal-clip policy loss,
#       - a toy value loss, 
#       - gradient update => new_policy_net changes.
#     """
#     print(f"\n=== Iteration {iter_idx} ===")
#     states = ['Preflop','Flop','Turn','River', 'Showdown']
#     rewards = [-20, -20, -80, 0, 240]

#     # We'll track some metrics
#     total_pol_loss = 0
#     total_val_loss = 0
#     steps_count = 0

#     # Build a model-based value function using the *new* net 
#     # (In typical PPO, the value function is updated simultaneously with the new policy.)
#     model_value_func = make_model_value_function(new_policy_net, build_card_rep_for_state, build_action_rep_for_state)

#     for i, st in enumerate(states[:-1]):  # skip Showdown itself
#         # 1) Compute advantage
#         future_rewards = rewards[i:]
#         future_states  = states[i:]
#         advantage = a_gae(future_rewards, future_states, model_value_func, gamma=0.999, lambda_=0.99)

#         # 2) Build card/action reps for this state
#         card_rep = build_card_rep_for_state(st)
#         action_rep = build_action_rep_for_state(st)
#         action_t, card_t = to_torch_input(card_rep, action_rep)

#         # 3) old policy => old_probs
#         with torch.no_grad():
#             old_logits, _ = old_policy_net(action_t, card_t)
#             old_probs = logits_to_probs(old_logits)[0].cpu().numpy()

#         # 4) new policy => new_probs + new_value
#         new_logits, new_value = new_policy_net(action_t, card_t)
#         new_probs_t = logits_to_probs(new_logits)[0]
#         new_probs = new_probs_t.detach().cpu().numpy()

#         # 5) sample an action from the *new* policy
#         action_idx = np.random.choice(len(new_probs), p=new_probs)

#         # 6) ratio = new_probs[action_idx]/old_probs[action_idx]
#         ratio_val = ratio(old_probs, new_probs, action_idx)

#         # 7) policy loss
#         deltas = get_deltas(st)
#         pol_loss_val = tc_loss_function(ratio_val, advantage, epsilon=0.2, deltas=deltas)

#         # 8) value loss
#         #    compute r_gamma from future rewards
#         r_g = r_gamma(np.array(future_rewards), gamma=0.999)
#         val_loss_val = v_loss(r_g, st, deltas, value_function_fn=model_value_func)

#         # 9) build a toy combined loss => do a gradient update
#         #   - incorporate pol_loss_val and val_loss_val into the PyTorch graph 
#         #   - We'll do a negative log(prob_of_action) scaled by pol_loss_val,
#         #     plus MSE( new_value, val_loss_val ).
#         chosen_log_prob = torch.log(new_probs_t[action_idx] + 1e-8)
#         pol_loss_t = torch.tensor(pol_loss_val, dtype=torch.float32)
#         val_loss_t = torch.tensor(val_loss_val, dtype=torch.float32)

#         combined_loss = - pol_loss_t * chosen_log_prob + (new_value[0] - val_loss_t)**2

#         optimizer.zero_grad()
#         combined_loss.backward()
#         optimizer.step()

#         total_pol_loss += pol_loss_val
#         total_val_loss += val_loss_val
#         steps_count += 1

#         print(f"  State={st}, ratio={ratio_val:.3f}, advantage={advantage:.2f}, action_idx={action_idx}")
#         print(f"    pol_loss={pol_loss_val:.3f}, val_loss={val_loss_val:.3f}")

#     if steps_count > 0:
#         print(f"=> iteration {iter_idx} done. avg pol_loss={total_pol_loss/steps_count:.3f}, avg val_loss={total_val_loss/steps_count:.3f}")


In [98]:
class HandResult:
    def __init__(self, card_reps: list, action_reps: list, actions_taken:list, rewards: list, deltas: list):
        if len(card_reps) != len(action_reps) or len(card_reps) != len(card_reps): # We need to make sure that the length of the card_reps, action_reps and rewards are the same. Action before showdown should be a None value or smth like that
            print(len(card_reps), len(action_reps), len(rewards))
            raise ValueError("card_reps, rewards and action_reps must have the same length") 
        self.states = list((card_rep.card_tensor, action_reps.action_tensor) for card_rep, action_reps in zip(card_reps, action_reps))
        self.rewards = rewards
        self.rounds = [{
            'state': self.states[i],
            'action_taken': actions_taken[i],
            'reward': rewards[i],
            'deltas': deltas[i]
        } for i in range(len(self.states))]

    def new_state(self, card_rep: CardRepresentation, action_rep: ActionRepresentation, action_taken: int, reward: int, deltas: list):
        self.states.append((card_rep, action_rep))
        self.rewards.append(reward)
        self.rounds.append({
            'state': (card_rep.card_tensor, action_rep.action_tensor),
            'action_taken': action_taken,
            'reward': reward,
            'deltas': deltas
        })


def get_hand_result(method1: bool) -> HandResult:
    """
    Given a list of 'states' like ['Preflop','Flop','Turn','River','Showdown'],
    build a numpy array of shape (5,1) for the siamese model.

    I am sure both methods output the same result.
    """
    states = ['Preflop','Flop','Turn','River']
    # empty np.array with length 5
    if (method1):
        card_reps = [build_card_rep_for_state(state) for state in states]
        action_reps = [build_action_rep_for_state(state) for state in states]
        actions_taken = [get_action(state, 0)[3] for state in states]
        rewards = [build_reward_for_state(state) for state in states]
        deltas = [get_deltas(state) for state in states]
        return HandResult(card_reps, action_reps, actions_taken, rewards, deltas)
    else:
        for state in states:
            card_rep = build_card_rep_for_state(state)
            action_rep = build_action_rep_for_state(state)
            action_taken = get_action(state, 0)[3]
            reward = build_reward_for_state(state)
            deltas = get_deltas(state)
            if state == 'Preflop':
                hand_result = HandResult([card_rep], [action_rep], [action_taken], [reward], [deltas])
            else:
                hand_result.new_state(card_rep, action_rep, action_taken, reward, deltas)
        return hand_result

In [107]:
def run_one_iteration(iter_idx: int, rounds_array, old_policy_net: PseudoSiameseNet, new_policy_net: PseudoSiameseNet,
                      optimizer: optim.Optimizer):
    """
    Demonstrates a single iteration (episode) with states=[Preflop,Flop,Turn,River,Showdown]
    and rewards=[-20, -20, -80, 0, 240].
    We'll do partial PPO logic: 
      - compute advantage,
      - old/new policy ratio,
      - trinal-clip policy loss,
      - a toy value loss, 
      - gradient update => new_policy_net changes.
    """
    print(f"\n=== Iteration {iter_idx} ===")
    # states = ['Preflop','Flop','Turn','River','Showdown']
    # rewards = [-20, -20, -80, 0, 240]

    # We'll track some metrics
    total_pol_loss = 0
    total_val_loss = 0
    steps_count = 0

    # Build a model-based value function using the *new* net 
    # (In typical PPO, the value function is updated simultaneously with the new policy.)
    model_value_func = make_model_value_function(new_policy_net)
    states, rewards = zip(*[(round["state"], round["reward"]) for round in rounds_array])

    for i, round in enumerate(rounds_array):  # skip Showdown itself

        deltas = round['deltas']
        action_taken = round['action_taken']
        state = round['state']

        # 1) Compute advantage
        if i < len(rounds_array) - 1:
            future_rewards = rewards[i:]
            future_states  = states[i:]
            advantage_t = a_gae(future_states, future_rewards, model_value_func, gamma=0.999, lambda_=0.99)

        # 2) Build card/action reps for this state
        card_tensor = state[0]
        action_tensor = state[1]
        action_t, card_t = to_torch_input(card_tensor, action_tensor)

        # 3) old policy => old_probs
        old_logits, _ = old_policy_net.forward(action_t, card_t) # I think dont want to use no_grad here
        old_probs = logits_to_probs(old_logits)[0].detach().cpu().numpy()

        # 4) new policy => new_probs + new_value
        new_logits, new_value = new_policy_net.forward(action_t, card_t)
        new_probs_t = logits_to_probs(new_logits)[0]
        new_probs = new_probs_t.detach().cpu().numpy()

        # 5) sample an action from the *new* policy. THIS IS THE ACTION_TAKEN
        

        # 6) ratio = new_probs[action_idx]/old_probs[action_idx]
        ratio_t = ratio(old_probs, new_probs, action_taken)

        # 7) policy loss
        deltas = round['deltas']
        pol_loss_t = tc_loss_function(ratio_t, advantage_t, epsilon=0.2, deltas=deltas)
        pol_loss_val = pol_loss_t.item()

        # 8) value loss
        #    compute r_gamma from future rewards
        r_g = r_gamma(np.array(future_rewards), gamma=0.999)
        val_loss_t = v_loss(r_g, state, deltas, new_value)
        val_loss_val = val_loss_t.item()

        # 9) build a toy combined loss => do a gradient update
        #   - incorporate pol_loss_val and val_loss_val into the PyTorch graph 
        #   - We'll do a negative log(prob_of_action) scaled by pol_loss_val,
        #     plus MSE( new_value, val_loss_val ).
        chosen_log_prob = torch.log(new_probs_t[action_taken] + 1e-8)
        pol_loss_t = torch.tensor(pol_loss_val, dtype=torch.float32)
        val_loss_t = torch.tensor(val_loss_val, dtype=torch.float32)

        combined_loss = - pol_loss_t * chosen_log_prob + (new_value[0] - val_loss_t)**2

        optimizer.zero_grad()
        combined_loss.backward()
        optimizer.step()

        total_pol_loss += pol_loss_val
        total_val_loss += val_loss_val
        steps_count += 1
        print(f"  State {i}, ratio={ratio_t.item():.3f}, advantage={advantage_t.item():.2f}, action_idx={action_taken}")
        print(f"    pol_loss={pol_loss_val:.3f}, val_loss={val_loss_val:.3f}")
        print(f"   old_probs={old_probs}, new_probs={new_probs}")
    if steps_count > 0:
        print(f"=> iteration {iter_idx} done. avg pol_loss={total_pol_loss/steps_count:.3f}, avg val_loss={total_val_loss/steps_count:.3f}")


def train_model(model, hand_results):
    old_policy_net = PseudoSiameseNet()
    new_policy_net = PseudoSiameseNet()
    clone_model_weights(model, old_policy_net)
    clone_model_weights(model, new_policy_net)
    optimizer = optim.Adam(new_policy_net.parameters(), lr=0.001)
    for i, hand_result in enumerate(hand_results):
        run_one_iteration(i, hand_result.rounds, old_policy_net, new_policy_net, optimizer)
        # Optionally, if you want to update old_policy_net to match new_policy_net after each iteration:
        clone_model_weights(new_policy_net, old_policy_net)
    # Return the updated new_policy_net if needed.
    return new_policy_net

In [100]:
# First the model plays and generates all the states representations. You get for each round:
# - the card representations
# - the action representations
# - the rewards array

# Then we run the PPO algorithm to update the policy and value function

In [78]:
hand_result1 = get_hand_result(True)
hand_result2 = get_hand_result(False)

In [79]:
# Here I checked both methods were working equally
key = 'state'
for index in range(len(hand_result1.rounds)):
    print((hand_result1.rounds[index][key][1] == hand_result2.rounds[index][key][1]).all())

True
True
True
True


In [80]:
rounds_array = hand_result2.rounds
rounds_array[0]["reward"]

-20

# Hand toy example

In [101]:
hand_result = get_hand_result(True)
hand_results = [hand_result for _ in range(10)]

In [20]:
# Here I wanted to manually check all the actions where being added correctly

id = 3

print(hand_result.rounds[id]["state"][1], hand_result.rounds[id]["reward"])


[[[0. 0. 0. 0. 0. 0. 1. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 1. 0. 0.]
  [1. 1. 1. 1. 1. 1. 1. 1. 1.]]

 [[0. 0. 0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 1. 0. 0. 0. 0. 0. 0.]
  [0. 0. 1. 0. 0. 0. 1. 0. 0.]
  [1. 0. 1. 0. 1. 1. 1. 1. 1.]]

 [[0. 0. 0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0. 0. 0.]]

 [[0. 0. 0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0. 0. 0.]]

 [[0. 0. 0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0. 0. 0.]]

 [[0. 0. 0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0. 0. 0.]]

 [[0. 0. 0. 1. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 1. 0. 0. 0. 0. 0.]
  [1. 1. 1. 1. 1. 1. 1. 1. 1.]]

 [[0. 0. 0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 1. 0. 0. 0. 0. 0. 0.]
  [0. 0. 1. 1. 0. 0. 0. 0. 0.]
  [1. 0. 1. 1. 1. 1. 1. 1

In [108]:
old_policy = PseudoSiameseNet()
new_policy = PseudoSiameseNet()
run_one_iteration(0, hand_result.rounds, old_policy, new_policy, optim.Adam(new_policy.parameters(), lr=0.001))

# THERES SOMETHING WRONG WITH THE SIAMESE ARCHITECTURE


=== Iteration 0 ===
  State 0, ratio=0.966, advantage=-1.77, action_idx=6
    pol_loss=-1.709, val_loss=400.569
   old_probs=[0.12034971 0.11289378 0.10960522 0.10757452 0.11895547 0.1075887
 0.11078171 0.10497434 0.10727652], new_probs=[0.10752725 0.10844526 0.1204056  0.11732838 0.10775412 0.11255424
 0.10696795 0.10967582 0.10934138]
  State 1, ratio=1.094, advantage=-1.19, action_idx=3
    pol_loss=-1.300, val_loss=1604.177
   old_probs=[0.12046541 0.11281162 0.10955175 0.10730614 0.11871742 0.10787983
 0.11108684 0.10496014 0.10722081], new_probs=[0.10722628 0.10717817 0.12024306 0.11734135 0.10883825 0.11348834
 0.106752   0.10990152 0.10903112]
  State 2, ratio=0.941, advantage=-0.80, action_idx=1
    pol_loss=-0.753, val_loss=6414.206
   old_probs=[0.12034764 0.11287798 0.10949554 0.10733338 0.11878126 0.10798245
 0.11097647 0.10499205 0.10721324], new_probs=[0.10691146 0.10623211 0.12010626 0.11681299 0.11013546 0.11435253
 0.10639744 0.11017168 0.10888004]
  State 3, ratio=1

In [109]:
bot = PseudoSiameseNet()
train_model(bot, hand_results)



=== Iteration 0 ===
  State 0, ratio=1.000, advantage=-1.77, action_idx=6
    pol_loss=-1.770, val_loss=400.604
   old_probs=[0.10840059 0.10487235 0.10628538 0.11452141 0.10376357 0.11191905
 0.11618525 0.1191173  0.11493514], new_probs=[0.10840059 0.10487235 0.10628538 0.11452141 0.10376357 0.11191905
 0.11618525 0.1191173  0.11493514]
  State 1, ratio=1.009, advantage=-1.19, action_idx=3
    pol_loss=-1.200, val_loss=1604.547
   old_probs=[0.10862861 0.10486101 0.10629661 0.11436199 0.10392162 0.11185011
 0.11631168 0.11906023 0.11470824], new_probs=[0.10800943 0.10427322 0.10619068 0.11536682 0.10440952 0.11197127
 0.11573382 0.11955587 0.11448931]
  State 2, ratio=0.990, advantage=-0.80, action_idx=1
    pol_loss=-0.792, val_loss=6416.100
   old_probs=[0.10871647 0.10480455 0.10622469 0.11424129 0.10398738 0.11173145
 0.11635831 0.11929496 0.11464089], new_probs=[0.10747356 0.10374237 0.10582772 0.11520442 0.10539279 0.11239198
 0.11530242 0.1205129  0.11415179]
  State 3, ratio=

PseudoSiameseNet(
  (action_conv): Sequential(
    (0): Conv2d(24, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): ReLU()
    (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (card_conv): Sequential(
    (0): Conv2d(6, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): ReLU()
    (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (action_fc): Sequential(
    (0): Linear(in_features=128, out_features=128, bias=True)
    (1): ReLU()
  )
  (card_fc): Sequential(
    (0): Linear(in_features=192, out_features=128, bias=True)
    (1): ReLU()
  )
  (

In [29]:
hand_result.rounds[0]["state"][1].shape

(24, 4, 9)