In [1]:
import os
import random
from dataclasses import dataclass
import copy
import h5py
from datasets import Dataset, Image
import numpy as np
import torch
from datasets import load_dataset, concatenate_datasets, load_from_disk
from transformers import ViTFeatureExtractor, ViTModel
from transformers import DecisionTransformerConfig, DecisionTransformerModel, Trainer, TrainingArguments
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import cm 
import torch.nn.functional as F
import torch.nn as nn

os.environ["WANDB_DISABLED"] = "true"

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda


In [2]:
dataset = load_from_disk("f:\hdf5_dm_test_full_hoang.hf")

In [9]:
# Initialize the feature extractor and model
feature_extractor = ViTFeatureExtractor.from_pretrained('google/vit-base-patch16-224')
vit_model = ViTModel.from_pretrained('google/vit-base-patch16-224').to(device)

Some weights of ViTModel were not initialized from the model checkpoint at google/vit-base-patch16-224 and are newly initialized: ['vit.pooler.dense.bias', 'vit.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [10]:
@dataclass
class DecisionTransformerGymDataCollator:
    return_tensors: str = "pt"
    max_len: int = 20 #subsets of the episode we use for training
    state_dim: int = 61803  # size of state space
    act_dim: int = 51  # size of action space
    max_ep_len: int = 1000 # max episode length in the dataset
    scale: float = 1000.0  # normalization of rewards/returns
    state_mean: np.array = None  # to store state means
    state_std: np.array = None  # to store state stds
    p_sample: np.array = None  # a distribution to take account trajectory lengths
    n_traj: int = 0 # to store the number of trajectories in the dataset

    def __init__(self, dataset) -> None:
        self.act_dim = len(dataset[0]["actions"][0])
        self.state_dim = len(dataset[0]["observations"][0])
        
        
        self.dataset = dataset
        # calculate dataset stats for normalization of states
        states = []
        traj_lens = []
        for obs in dataset["observations"]:
            states.extend(obs)
            traj_lens.append(len(obs))
        self.n_traj = len(traj_lens)
        states = np.vstack(states)
        self.state_mean, self.state_std = np.mean(states, axis=0), np.std(states, axis=0) + 1e-6

        traj_lens = np.array(traj_lens)
        self.p_sample = traj_lens / sum(traj_lens)

    def _discount_cumsum(self, x, gamma):
        discount_cumsum = np.zeros_like(x)
        discount_cumsum[-1] = x[-1]
        for t in reversed(range(x.shape[0] - 1)):
            discount_cumsum[t] = x[t] + gamma * discount_cumsum[t + 1]
        return discount_cumsum

    def __call__(self, features):
        batch_size = len(features)
        # this is a bit of a hack to be able to sample of a non-uniform distribution
        batch_inds = np.random.choice(
            np.arange(self.n_traj),
            size=batch_size,
            replace=True,
            p=self.p_sample,  # reweights so we sample according to timesteps
        )
        # a batch of dataset features
        s, a, r, rtg, timesteps, mask, image_features = [], [], [], [], [], [], []

        for ind in batch_inds:
            # for feature in features:
            feature = self.dataset[int(ind)]
            si = random.randint(0, len(feature["rewards"]) - 1) #random starting index for each selected trajectory.

            # get sequences from dataset
            s.append(np.array(feature["observations"][si : si + self.max_len]).reshape(1, -1, self.state_dim)) # take max_len time frames from starting index(si)
            a.append(np.array(feature["actions"][si : si + self.max_len]).reshape(1, -1, self.act_dim))
            r.append(np.array(feature["rewards"][si : si + self.max_len]).reshape(1, -1, 1))
            
            # Extract image from the observations key then reshape to 150,412- then append to a list like all others data, hope so
            images = [np.array(img[3:]).reshape(150,412) for img in feature["observations"][si : si + self.max_len]]
            images = [np.repeat(img[:, :, np.newaxis], 3, axis=2) for img in images]  # Need to do this because ViT expects 3 channels but the images are 2D grayscale
            images = [(img / 255.0).astype(np.float32) for img in images]  # Normalize the images to [0, 1] to avoide errors ValueError: The image to be converted to a PIL image contains values outside the range [0, 1], got [9.666666666666666, 227.33333333333334] which cannot be converted to uint8."}
            with torch.no_grad():
                inputs = feature_extractor(images, return_tensors="pt", do_rescale=False).to(device)
                vit_outputs = vit_model(**inputs)
                image_features.append(vit_outputs.last_hidden_state.mean(dim=1).cpu().numpy().reshape(1, -1, vit_model.config.hidden_size)) # need to get these thing to cpu to convert back to numpy, fitting other data
                
            # d.append(np.array(feature["dones"][si : si + self.max_len]).reshape(1, -1))
            timesteps.append(np.arange(si, si + s[-1].shape[1]).reshape(1, -1))
            timesteps[-1][timesteps[-1] >= self.max_ep_len] = self.max_ep_len - 1  # padding cutoff
            rtg.append(
                self._discount_cumsum(np.array(feature["rewards"][si:]), gamma=1.0)[
                    : s[-1].shape[1]   # TODO check the +1 removed here
                ].reshape(1, -1, 1)
            )
            if rtg[-1].shape[1] < s[-1].shape[1]:
                print("if true")
                rtg[-1] = np.concatenate([rtg[-1], np.zeros((1, 1, 1))], axis=1)

            # padding and state + reward normalization
            tlen = s[-1].shape[1]
            s[-1] = np.concatenate([np.zeros((1, self.max_len - tlen, self.state_dim)), s[-1]], axis=1)
            s[-1] = (s[-1] - self.state_mean) / self.state_std
            a[-1] = np.concatenate(
                [np.ones((1, self.max_len - tlen, self.act_dim)) * -10.0, a[-1]],
                axis=1,
            )
            r[-1] = np.concatenate([np.zeros((1, self.max_len - tlen, 1)), r[-1]], axis=1)
            rtg[-1] = np.concatenate([np.zeros((1, self.max_len - tlen, 1)), rtg[-1]], axis=1) / self.scale
            timesteps[-1] = np.concatenate([np.zeros((1, self.max_len - tlen)), timesteps[-1]], axis=1)
            mask.append(np.concatenate([np.zeros((1, self.max_len - tlen)), np.ones((1, tlen))], axis=1))

        # For padding like above, avoid error ValueError: all the input array dimensions except for the concatenation axis must match exactly, but along dimension 1, the array at index 0 has size 10 and the array at index 1 has size 20
        max_size = max(arr.shape[1] for arr in image_features)
        # padded_image_features = [np.pad(arr, ((0, 0), (0, max_size - arr.shape[1])), mode='constant') for arr in image_features]
        padded_image_features = []
        for arr in image_features:
            # Ensure `arr` has the correct number of dimensions, typically (1, max_len, hidden_size)
            if arr.shape[1] < max_size:
                padding_shape = ((0, 0), (0, max_size - arr.shape[1]), (0, 0))
                padded_arr = np.pad(arr, padding_shape, mode='constant')
            else:
                padded_arr = arr
            padded_image_features.append(padded_arr)

        s = torch.from_numpy(np.concatenate(s, axis=0)).float()
        a = torch.from_numpy(np.concatenate(a, axis=0)).float()
        r = torch.from_numpy(np.concatenate(r, axis=0)).float()
        rtg = torch.from_numpy(np.concatenate(rtg, axis=0)).float()
        timesteps = torch.from_numpy(np.concatenate(timesteps, axis=0)).long()
        mask = torch.from_numpy(np.concatenate(mask, axis=0)).float()
        images = torch.from_numpy(np.concatenate(padded_image_features, axis=0)).float()
        


        return {
            "states": s,
            "actions": a,
            "rewards": r,
            "returns_to_go": rtg,
            "timesteps": timesteps,
            "attention_mask": mask,
            "image_features": images,
        }

In [11]:
class TrainableDT(DecisionTransformerModel):
    def __init__(self, config, image_feature_dim=768, gamma=0.99):
        super().__init__(config)
        self.gamma = gamma
        self.image_feature_dim = image_feature_dim
        self.state_projector = nn.Linear(config.state_dim + image_feature_dim, config.hidden_size)
        self._states_projected = None

    def embed_state(self, states):
        # Use the projected states instead of embedding again
        if self._states_projected is not None:
            return self._states_projected
        # Fallback to the default embed_state behavior if needed
        return super().embed_state(states)

    def forward(self, **kwargs): # kwarg need image features
        # Reshape for custom loss
        n_keys = 11
        n_clicks = 2
        mouse_x_possibles = [-1000.0,-500.0, -300.0, -200.0, -100.0, -60.0, -30.0, -20.0, -10.0, -4.0, -2.0, -0.0, 2.0, 4.0, 10.0, 20.0, 30.0, 60.0, 100.0, 200.0, 300.0, 500.0,1000.0]
        mouse_y_possibles = [-200.0, -100.0, -50.0, -20.0, -10.0, -4.0, -2.0, -0.0, 2.0, 4.0, 10.0, 20.0, 50.0, 100.0, 200.0]
        n_mouse_x = len(mouse_x_possibles)
        n_mouse_y = len(mouse_y_possibles)
        
        # For image and state features
        states = kwargs.pop("states")
        image_features = kwargs.pop("image_features")
        states_with_images = torch.cat([states, image_features], dim=-1)
        # states_projected = self.state_projector(states_with_images)
        self._states_projected = self.state_projector(states_with_images)
        kwargs["states"] = self._states_projected
        output = super().forward(**kwargs)
        self._states_projected = None
        
        action_preds = output[1]
        action_targets = kwargs["actions"]
        attention_mask = kwargs["attention_mask"]
        act_dim = action_preds.shape[2]
        action_preds = action_preds.reshape(-1, act_dim)[attention_mask.reshape(-1) > 0]
        action_targets = action_targets.reshape(-1, act_dim)[attention_mask.reshape(-1) > 0]


        # Action_pred n_keys: 11 keys for wasd, space, reload, 1,2,3
        action_preds[:, :n_keys] = torch.sigmoid(action_preds[:, :n_keys])
        # Action_pred n_clicks: left click, right click
        action_preds[:, n_keys:n_keys+n_clicks] = torch.sigmoid(action_preds[:, n_keys:n_keys+n_clicks])
        # # Action_pred n_mouse_x: to the x mouse poll
        # action_preds[:, n_keys + n_clicks:n_keys + n_clicks + n_mouse_x] = F.softmax(action_preds[:, n_keys + n_clicks:n_keys + n_clicks + n_mouse_x], dim=-1)
        # # Action_pred n_mouse_y: to the y mouse poll
        # action_preds[:, n_keys + n_clicks + n_mouse_x:n_keys + n_clicks + n_mouse_x + n_mouse_y] = F.softmax(action_preds[:, n_keys + n_clicks + n_mouse_x:n_keys + n_clicks + n_mouse_x + n_mouse_y], dim=-1)

        # Loss calculations based on each category
        loss_wasd = F.binary_cross_entropy(action_preds[:, :4], action_targets[:, :4])
        loss_left_click = F.binary_cross_entropy(action_preds[:, n_keys:n_keys+1], action_targets[:, n_keys:n_keys+1])
        loss_mouse_move_x = F.cross_entropy(action_preds[:, n_keys+n_clicks:n_keys+n_clicks+n_mouse_x], action_targets[:, n_keys+n_clicks:n_keys+n_clicks+n_mouse_x])
        loss_mouse_move_y = F.cross_entropy(action_preds[:, n_keys+n_clicks+n_mouse_x:n_keys+n_clicks+n_mouse_x+n_mouse_y], action_targets[:, n_keys+n_clicks+n_mouse_x:n_keys+n_clicks+n_mouse_x+n_mouse_y])

        # loss_crit = 10 * F.mse_loss(reward_t[:, :-1] + self.gamma * v_t_next, v_t[:, :-1])
        # Total loss
        # total_loss = sum([3*loss_wasd,0.25*loss_space, 0.25*loss_reload,0.25*loss_weapon_switch, 2*loss_left_click, 0.25*loss_right_click, 2*loss_mouse_move_x,  2*loss_mouse_move_y])
        total_loss = sum([loss_wasd, loss_left_click, loss_mouse_move_x, loss_mouse_move_y])
        print("|"*100)
        print("Loss WASD",loss_wasd)
        print("Loss lclick",loss_left_click)
        print("Loss mx",loss_mouse_move_x)
        print("Loss my",loss_mouse_move_y)
        print("loss", total_loss)
        print("|"*100)
        print("loss", total_loss)
        return {"loss": total_loss}

    def original_forward(self, **kwargs):
        return super().forward(**kwargs)

In [12]:
collator = DecisionTransformerGymDataCollator(dataset)
config = DecisionTransformerConfig(state_dim=collator.state_dim,   act_dim=collator.act_dim, hidden_size=1024, n_layer=6, n_head=8, dropout=0.0)

In [17]:
model = TrainableDT(config)
model.from_pretrained("F:\!Theis\Final_prj\concat_thing")
model.to(device)

TrainableDT(
  (encoder): DecisionTransformerGPT2Model(
    (wte): Embedding(1, 1024)
    (wpe): Embedding(1024, 1024)
    (drop): Dropout(p=0, inplace=False)
    (h): ModuleList(
      (0-5): 6 x DecisionTransformerGPT2Block(
        (ln_1): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
        (attn): DecisionTransformerGPT2Attention(
          (c_attn): Conv1D(nf=3072, nx=1024)
          (c_proj): Conv1D(nf=1024, nx=1024)
          (attn_dropout): Dropout(p=0, inplace=False)
          (resid_dropout): Dropout(p=0, inplace=False)
        )
        (ln_2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
        (mlp): DecisionTransformerGPT2MLP(
          (c_fc): Conv1D(nf=4096, nx=1024)
          (c_proj): Conv1D(nf=1024, nx=4096)
          (act): ReLU()
          (dropout): Dropout(p=0, inplace=False)
        )
      )
    )
    (ln_f): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
  )
  (embed_timestep): Embedding(4096, 1024)
  (embed_return): Linear(in_fea

In [18]:
training_args = TrainingArguments(
    output_dir="/trained_models/",
    remove_unused_columns=False,
    num_train_epochs=50,
    per_device_train_batch_size=32,
    learning_rate=5e-4,
    weight_decay=1e-4,
    warmup_ratio=0.3,
    optim="adamw_torch",
    lr_scheduler_type="cosine",
    # max_grad_norm=0.5,
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=dataset,
    data_collator=collator,
)



Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


In [19]:
trainer.train()

  0%|          | 0/50 [00:00<?, ?it/s]

||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
Loss WASD tensor(0.7391, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward0>)
Loss lclick tensor(0.7631, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward0>)
Loss mx tensor(3.2941, device='cuda:0', grad_fn=<DivBackward1>)
Loss my tensor(2.8449, device='cuda:0', grad_fn=<DivBackward1>)
loss tensor(7.6413, device='cuda:0', grad_fn=<AddBackward0>)
||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
loss tensor(7.6413, device='cuda:0', grad_fn=<AddBackward0>)


KeyboardInterrupt: 

In [16]:
trainer.save_model(f"concat_thing")

In [None]:
# # Custom generator for streaming datasets
# def load_dataset_generator():
#     for folder in hf_folders:
#         yield load_from_disk(folder)


# main_data_path = "D:\\newcs2_data"
# # Get all .hf folder paths
# hf_folders = [os.path.join(main_data_path, folder) for folder in os.listdir(main_data_path) if folder.endswith(".hf")]

# # Load dataset generator
# dataset_stream = load_dataset_generator()
# # for data in dataset_stream:
# #     print(data)

In [None]:
# # Training loop
# training_args = TrainingArguments(
#     output_dir="/trained_models/",
#     remove_unused_columns=False,
#     num_train_epochs=20,
#     per_device_train_batch_size=64,
#     learning_rate=1e-5,
#     weight_decay=1e-5,
#     warmup_ratio=0.3,
#     optim="adamw_torch",
#     max_grad_norm=0.25,
# )

# config = DecisionTransformerConfig(state_dim=61803, act_dim=51, hidden_size=512, n_layer=6, n_head=8)
# model = TrainableDT(config)
# model.to(device)

# for i, dataset in enumerate(dataset_stream):
#     # Initialize collator with the current dataset
#     collator = DecisionTransformerGymDataCollator(dataset) # longlongtime

#     # Set up the trainer with the current dataset and collator
#     trainer = Trainer(
#         model=model,
#         args=training_args,
#         train_dataset=dataset,
#         data_collator=collator
#     )

#     # Train on the current dataset
#     trainer.train()
#     save_path = f"./trained_models/model_after_dataset_{i+1}"
#     trainer.save_model(save_path)
#     print(f"Model saved to {save_path}")


In [None]:
# # Define training arguments
# training_args = TrainingArguments(
#     output_dir="/trained_models/",
#     remove_unused_columns=False,
#     num_train_epochs=100,
#     per_device_train_batch_size=32,
#     learning_rate=1e-3,
#     weight_decay=1e-3,
#     warmup_ratio=0.5,
#     optim="adamw_torch",
#     max_grad_norm=0.25,
# )

# # Initialize model and config once, outside the loop
# # Load the first dataset to get dimensions for the initial model configuration
# config = DecisionTransformerConfig(state_dim=61803, act_dim=51, hidden_size=1024, n_layer=20, n_head=16)
# first_dataset = next(dataset_stream)
# collator = DecisionTransformerGymDataCollator(first_dataset)
# model = TrainableDT(config)

# # First training session
# trainer = Trainer(
#     model=model,
#     args=training_args,
#     train_dataset=first_dataset,
#     data_collator=collator
# )
# trainer.train()

# # Save the initial model checkpoint
# save_path = "./trained_models/model_after_dataset_1"
# trainer.save_model(save_path)
# print(f"Model saved to {save_path}")

# # Continue training with the remaining datasets
# for i, dataset in enumerate(dataset_stream, start=1):  # start=1 to continue from dataset 1 (meand second)
#     # Update collator with the new dataset
#     collator = DecisionTransformerGymDataCollator(dataset)

#     # Update the trainer with the current dataset and collator, but reuse the same model
#     print(f"Using ++ model_after_dataset_{i} ++ for training")
#     trainer = Trainer(
#         model=model.from_pretrained(f"trained_models\model_after_dataset_{i}"),  # Use the model with weights updated from the previous training
#         args=training_args,
#         train_dataset=dataset,
#         data_collator=collator
#     )

#     # Train on the current dataset
#     trainer.train()

#     # Save the model after each training session
#     save_path = f"./trained_models/model_after_dataset_{i+1}"
#     trainer.save_model(save_path)
#     print(f"Model saved to {save_path}")
