In [1]:
import torch

In [4]:
a = torch.tensor([0, 0.2, 0, 0.5])

In [6]:
(a != 0).to(torch.float32)

tensor([0., 1., 0., 1.])

In [1]:
from agent.QCnnRnn.Embedder import Embedder
from agent.QCnnRnn.QCnnRnn import QCnnRnn
import torch.nn as nn
from einops.layers.torch import Rearrange
from torchsummary import summary
import torch
import random

import torch.nn.functional as F

In [2]:


class ReplayBuffer:
    """
    Replay buffer cho QCnnRnn, mỗi sample là một episode dict gồm:
    - env_feats: torch.Tensor [max_steps, 10]
    - item_feats: torch.Tensor [max_steps, max_items, 23]
    - masks: torch.Tensor [max_steps, max_items]
    - actions: torch.Tensor [max_steps-1]
    - rewards: torch.Tensor [max_steps-1]
    - seq_len: int (số bước thực tế, không tính padding)
    """
    def __init__(self, capacity=1000):
        self.buffer = []
        self.capacity = capacity

    def push(self, env_feats, item_feats, masks, actions, rewards, seq_len):
        """
        Thêm một episode vào buffer
        Args:
            env_feats: torch.Tensor [max_steps, 10]
            item_feats: torch.Tensor [max_steps, max_items, 23]
            masks: torch.Tensor [max_steps, max_items]
            actions: torch.Tensor [max_steps]
            rewards: torch.Tensor [max_steps]
            seq_len: int
        """
        if len(self.buffer) >= self.capacity:
            self.buffer.pop(0)
        self.buffer.append((env_feats, item_feats, masks, actions, rewards, seq_len))

    def __len__(self):
        return len(self.buffer)

    def sample(self, batch_size):
        episodes = random.sample(self.buffer, batch_size)
        
        env_feats_list, item_feats_list, masks_list, actions_list, rewards_list, seq_lens_list = zip(*episodes)
        
        env_feats = torch.stack(env_feats_list)          # [B, max_steps, 10]
        item_feats = torch.stack(item_feats_list)        # [B, max_steps, max_items, 23]
        masks = torch.stack(masks_list)                  # [B, max_steps, max_items]
        actions = torch.stack(actions_list)              # [B, max_steps-1]
        rewards = torch.stack(rewards_list)              # [B, max_steps-1]
        seq_lens = torch.tensor(seq_lens_list, dtype=torch.int)            # [B]
        
        return {
            'env_feats': env_feats,
            'item_feats': item_feats,
            'masks': masks,
            'actions': actions,
            'rewards': rewards,
            'seq_lens': seq_lens
        }

In [3]:
import json

# Đọc state từ file
with open('state.json', 'r', encoding='utf-8') as f:
    state = json.load(f)

# Truy cập các thông tin
print("=== GLOBAL STATE ===")
print(f"Score: {state['global_state']['score']}")
print(f"Goal: {state['global_state']['goal']}")
print(f"Level: {state['global_state']['level']}")
print(f"Time left: {state['global_state']['time_left']}s")
print(f"Dynamite: {state['global_state']['dynamite_count']}")

print("\n=== ROPE STATE ===")
print(f"Direction: {state['rope_state']['direction']}°")
print(f"State: {state['rope_state']['state']}")
print(f"Length: {state['rope_state']['length']}")
print(f"Has item: {state['rope_state']['has_item']}")

print("\n=== ITEMS ===")
print(f"Total items: {len(state['items'])}")
for i, item in enumerate(state['items'][:3]):  # Hiển thị 3 items đầu tiên
    print(f"\nItem {i}:")
    print(f"  Type: {item['type']}")
    print(f"  Position: ({item['position']['x']}, {item['position']['y']})")
    print(f"  Size: {item['size']}")
    print(f"  Point: {item['point']}")

=== GLOBAL STATE ===
Score: 1000000
Goal: 3125
Level: 4
Time left: 53s
Dynamite: 0

=== ROPE STATE ===
Direction: 108.50000000000006°
State: swinging
Length: 50
Has item: False

=== ITEMS ===
Total items: 22

Item 0:
  Type: Mole
  Position: (693.6640000000024, 392)
  Size: 30
  Point: 602

Item 1:
  Type: Mole
  Position: (156.8880000000007, 626)
  Size: 30
  Point: 602

Item 2:
  Type: Mole
  Position: (785.896000000003, 490)
  Size: 30
  Point: 602


In [4]:
# load warmup_buffer_rnn.pkl
import pickle
with open('warmup_buffer_rnn.pkl', 'rb') as f:
    warmup_data = pickle.load(f)

In [5]:
for item in warmup_data[0]:
    print(f'{item}: {warmup_data[0][item].shape if isinstance(warmup_data[0][item], torch.Tensor) else warmup_data[0][item]}')

env_feats: torch.Size([15, 10])
item_feats: torch.Size([15, 30, 23])
masks: torch.Size([15, 30])
actions: torch.Size([15])
rewards: torch.Size([15])
seq_len: 11


In [6]:
buffer = ReplayBuffer(capacity=1000)
for epi in warmup_data:
    buffer.push(epi['env_feats'], epi['item_feats'], epi['masks'], epi['actions'], epi['rewards'], epi['seq_len'])

In [7]:
buffer.__len__()

500

In [8]:
sam = buffer.sample(batch_size=4)
sam.keys()
env_feats = sam['env_feats']
item_feats = sam['item_feats']
masks = sam['masks']
actions = sam['actions']
rewards = sam['rewards']
seq_lens = sam['seq_lens']
env_feats.shape, item_feats.shape, masks.shape, actions.shape, rewards.shape, seq_lens.shape

(torch.Size([4, 15, 10]),
 torch.Size([4, 15, 30, 23]),
 torch.Size([4, 15, 30]),
 torch.Size([4, 15]),
 torch.Size([4, 15]),
 torch.Size([4]))

In [9]:
agent = QCnnRnn()

In [10]:
x = agent.get_gru_input(env_feats, item_feats, masks, actions)
x.shape

torch.Size([4, 15, 24])

In [12]:
o, h = agent.gru(x)
o.shape, h.shape

(torch.Size([4, 15, 24]), torch.Size([1, 4, 24]))

In [15]:
agent.predictor(o).shape

torch.Size([4, 15, 50])

In [13]:
# Source - https://stackoverflow.com/a
# Posted by Thong Nguyen, modified by community. See post 'Timeline' for change history
# Retrieved 2025-11-30, License - CC BY-SA 4.0

from prettytable import PrettyTable

def count_parameters(model):
    table = PrettyTable(["Modules", "Parameters"])
    total_params = 0
    for name, parameter in model.named_parameters():
        if not parameter.requires_grad:
            continue
        params = parameter.numel()
        table.add_row([name, params])
        total_params += params
    print(table)
    print(f"Total Trainable Params: {total_params}")
    return total_params
    
count_parameters(agent)


+-----------------------------------+------------+
|              Modules              | Parameters |
+-----------------------------------+------------+
|  backbone.item_extractor.0.weight |    552     |
|   backbone.item_extractor.0.bias  |     24     |
|  backbone.item_extractor.3.weight |    576     |
|   backbone.item_extractor.3.bias  |     24     |
|  backbone.item_extractor.6.weight |     72     |
|   backbone.item_extractor.6.bias  |     24     |
|  backbone.item_extractor.7.weight |     24     |
|   backbone.item_extractor.7.bias  |     24     |
|  backbone.item_extractor.9.weight |     72     |
|   backbone.item_extractor.9.bias  |     24     |
| backbone.item_extractor.10.weight |     24     |
|  backbone.item_extractor.10.bias  |     24     |
|  backbone.env_extractor.0.weight  |    240     |
|   backbone.env_extractor.0.bias   |     24     |
|  backbone.env_extractor.3.weight  |    576     |
|   backbone.env_extractor.3.bias   |     24     |
|      action_embedding.weight 

9026

In [1]:
import torch
pot = torch.load(r"C:\Users\User\Documents\code\rl-training-gold-miner\checkpoints\final_model_raw.pt")

In [2]:
pot.keys()

dict_keys(['agent_state_dict', 'target_agent_state_dict', 'optimizer_state_dict', 'epsilon', 'total_steps', 'episode_rewards', 'episode_lengths'])

In [10]:
torch.save(pot, r"C:\Users\User\Documents\code\rl-training-gold-miner\checkpoints\final_model_raw.pt")