### Introduction

This notebook is intended to train a Decision Transformer using offline data gathered from exploring the CarRacing-v2 environment with a pre-trained DQN model.

### Install initial environment in Google Colab

In [1]:
import sys
import os

if 'google.colab' in sys.modules:
  if not os.path.exists('/content/.already_installed'):
    !git clone https://github.com/YakivGalkin/cnn_decision_transformer
    !apt-get install -y swig
    !pip install -r cnn_decision_transformer/requirements.txt
    with open('/content/.already_installed', 'w') as f:
        f.write('done')
  %cd /content/cnn_decision_transformer

Cloning into 'cnn_decision_transformer'...
remote: Enumerating objects: 82, done.[K
remote: Counting objects: 100% (82/82), done.[K
remote: Compressing objects: 100% (63/63), done.[K
remote: Total 82 (delta 40), reused 53 (delta 17), pack-reused 0[K
Receiving objects: 100% (82/82), 1.31 MiB | 1.74 MiB/s, done.
Resolving deltas: 100% (40/40), done.
Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
The following additional packages will be installed:
  swig4.0
Suggested packages:
  swig-doc swig-examples swig4.0-examples swig4.0-doc
The following NEW packages will be installed:
  swig swig4.0
0 upgraded, 2 newly installed, 0 to remove and 19 not upgraded.
Need to get 1,116 kB of archives.
After this operation, 5,542 kB of additional disk space will be used.
Get:1 http://archive.ubuntu.com/ubuntu jammy/universe amd64 swig4.0 amd64 4.0.2-1ubuntu1 [1,110 kB]
Get:2 http://archive.ubuntu.com/ubuntu jammy/universe amd64 swig all 4.0.2-1ubuntu1

### Load Dataset

In [2]:
#car_racing_15_100
#offline_car_racing_150_1000

import utils.storage as storage
features = storage.load_dataset('offline_car_racing_150_1000')
print(len(features["observations"]))

Downloading file from https://storage.googleapis.com/yakiv-dt-public/datasets/offline_car_racing_150_1000.hdf5 to ./downloaded_datasets/offline_car_racing_150_1000.hdf5
Download complete.
150


In [3]:
import gymnasium as gym
env =  gym.make('CarRacing-v2', continuous=False) #, render_mode='human'

In [4]:
from dataclasses import asdict, dataclass


@dataclass
class TrainConfig:
    num_train_epochs: int = 5000
    max_ep_len: int = 1000
    max_length: int = 10
    rtg_gamma: float = 1.0

    prefix: str = 'DT'
    log_interval: int = 50
    save_steps: int = 1000
    per_device_train_batch_size: int = 32
    learning_rate: float = 0.0001
    weight_decay: float = 0.0001
    warmup_ratio: float = 0.1
    max_grad_norm: float = 0.25

trainConfig = TrainConfig()


In [5]:

import wandb
os.environ["WANDB_DISABLED"] = "false"
os.environ['WANDB_NOTEBOOK_NAME'] = 'DT_train.ipynb'
os.environ["WANDB_LOG_MODEL"] = "checkpoint"

#wandb.login(key="f060d3284088ffaf4624e2de8b236f39711a99a2") # move to .env!
wandb.init( name = "vdt_001",
           mode="online",
           entity="yakiv",
            project="VDT",
            #resume= "allow"
            config=asdict(trainConfig)
           )




<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


### Train

In [6]:

from visual_decision_transformer.visual_decision_transformer_trainable import VisualDecisionTransformerGymDataCollator, TrainableVisualDecisionTransformer
from visual_decision_transformer.configuration import DecisionTransformerConfig
from utils.dataset_wrappers import DummyDataset
from utils.dataset_wrappers import CarRacingFeatureDataset
from transformers import Trainer, TrainingArguments

feature_dataset = CarRacingFeatureDataset(src=features)
collator = VisualDecisionTransformerGymDataCollator(feature_dataset, max_len=trainConfig.max_length,   max_ep_len=trainConfig.max_ep_len,)

dt_config = DecisionTransformerConfig(state_dim=collator.state_dim, act_dim=collator.act_dim,
                                      max_length = trainConfig.max_length,
                                      max_ep_len = trainConfig.max_ep_len,
                                      )

model = TrainableVisualDecisionTransformer(dt_config)


training_args = TrainingArguments(
    output_dir="output/",
    report_to="wandb",
    save_steps= trainConfig.save_steps,
    remove_unused_columns=False,
    optim="adamw_torch",
    num_train_epochs=trainConfig.num_train_epochs,
    per_device_train_batch_size= trainConfig.per_device_train_batch_size,
    learning_rate= trainConfig.learning_rate,
    weight_decay= trainConfig.weight_decay,
    warmup_ratio= trainConfig.warmup_ratio,
    max_grad_norm= trainConfig.max_grad_norm,
    logging_steps= trainConfig.log_interval,
)



trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=DummyDataset(len(feature_dataset)), #there is a 'hack'  - no need to pass actual data
    data_collator=collator,

)

trainer.train()


  from tensorflow.tsl.python.lib.core import pywrap_ml_dtypes


Downloading file from https://storage.googleapis.com/yakiv-dt-public/models/nature_cnn_dql_pretrained.pt to ./downloaded_models/nature_cnn_dql_pretrained.pt
Download complete.


Step,Training Loss
50,1.6382
100,1.5435
150,1.409
200,1.3027
250,1.2061
300,1.1295
350,1.074
400,1.0201
450,0.9648
500,0.936


[34m[1mwandb[0m: Adding directory to artifact (./output/checkpoint-1000)... Done. 0.1s
[34m[1mwandb[0m: Adding directory to artifact (./output/checkpoint-2000)... Done. 0.1s
[34m[1mwandb[0m: Adding directory to artifact (./output/checkpoint-3000)... Done. 0.1s
[34m[1mwandb[0m: Adding directory to artifact (./output/checkpoint-4000)... Done. 0.1s
[34m[1mwandb[0m: Adding directory to artifact (./output/checkpoint-5000)... Done. 0.1s
[34m[1mwandb[0m: Adding directory to artifact (./output/checkpoint-6000)... Done. 0.1s
[34m[1mwandb[0m: Adding directory to artifact (./output/checkpoint-7000)... Done. 0.1s
[34m[1mwandb[0m: Adding directory to artifact (./output/checkpoint-8000)... Done. 0.1s
[34m[1mwandb[0m: Adding directory to artifact (./output/checkpoint-9000)... Done. 0.1s
[34m[1mwandb[0m: Adding directory to artifact (./output/checkpoint-10000)... Done. 0.1s
[34m[1mwandb[0m: Adding directory to artifact (./output/checkpoint-11000)... Done. 0.1s
[34m[1

TrainOutput(global_step=25000, training_loss=0.15604460973203182, metrics={'train_runtime': 1022.7397, 'train_samples_per_second': 733.324, 'train_steps_per_second': 24.444, 'total_flos': 7.950956820028232e+18, 'train_loss': 0.15604460973203182, 'epoch': 5000.0})

In [8]:
import torch
torch.save(model, 'model_tr.pth')


  and should_run_async(code)


In [9]:
!gcloud auth login
!gcloud config set project clever-393810
#!gcloud auth list
#!gcloud storage buckets list
!gsutil cp ./model_tr.pth gs://yakiv-dt-public/datasets/model_tr.pth

  and should_run_async(code)


Go to the following link in your browser:

    https://accounts.google.com/o/oauth2/auth?response_type=code&client_id=32555940559.apps.googleusercontent.com&redirect_uri=https%3A%2F%2Fsdk.cloud.google.com%2Fauthcode.html&scope=openid+https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fuserinfo.email+https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fcloud-platform+https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fappengine.admin+https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fsqlservice.login+https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fcompute+https%3A%2F%2Fwww.googleapis.com%2Fauth%2Faccounts.reauth&state=tlgYcz8xQMcs5GgALY9WE7wCvvUvuJ&prompt=consent&access_type=offline&code_challenge=-rYRMTPXM4jCfN5R1gTg1m8cWpNHnvm3FjMPpE1j1GM&code_challenge_method=S256

Enter authorization code: 4/0AfJohXmryVygAWq69-4X4HUSF4pkeFFRJ3d6RrHnj0jGAbVcr4eU7ghn3QJ7fakgx_3Miw

You are now logged in as [yakiv@flutterbase.com].
Your current project is [None].  You can change this setting by running:
  $ gcloud config set project PROJECT_ID
Upd