### Introduction

This notebook generates a CarRacing-v2 dataset for offline reinforcement learning by using a pretrained DQN for environment exploration, with configurable episode count and length, and stores it in HDF5 on Google Cloud Storage.

### Install initial environment in Google Colab

In [1]:
import sys
import os

if 'google.colab' in sys.modules:
  if not os.path.exists('/content/.already_installed'):
    !git clone https://github.com/FlutterbaseDotCom/hdt
    !apt-get install -y swig
    !pip install -r hdt/requirements.txt
    with open('/content/.already_installed', 'w') as f:
        f.write('done')
  %cd /content/hdt

### Imports

In [2]:
import os
from dataclasses import dataclass
import gymnasium as gym
import numpy as np
import torch
from stable_baselines3 import DQN
from stable_baselines3.common.torch_layers import NatureCNN
from stable_baselines3.common.vec_env import DummyVecEnv, VecTransposeImage


  from .autonotebook import tqdm as notebook_tqdm


### Generate Data

Load pretrained DQN Model

In [None]:
import utils.storage as storage
loaded_model = storage.get_pretrained_model(DQN,  'dql_car_racing.bin')


Iteract with environmenr, build trajectories

In [3]:
from cnn_decision_transformer.cnn_feature_extractor import prepare_env_observation_array


env =  gym.make('CarRacing-v2', continuous=False) 

MAX_EPISODE_STEPS = 1000 
NUM_EPISODES = 150
RTG_GAMMA = 1.0 # "Return to GO" with the default GAMMA is stored in the initial dataset. 
CUT_IF_NO_REWARD = 50


features = {
    "observations": [],
    "actions": [],
    "rewards": [],
    "dones": [],
    "rtg": []
}

for episode in range(NUM_EPISODES):
    print(f"Episode: {episode} of {NUM_EPISODES}:" )
    [obs, _] = env.reset()
    done = False

    o, a, r, d, g = [], [], [], [], []
    total_reward = 0
    step_index = 0
    while not done:
        step_index +=  1

        action, _states = loaded_model.predict(obs,deterministic=True)
        new_obs, reward, done, t, i = env.step(action)
        total_reward = total_reward + reward
        oarr = prepare_env_observation_array(obs)
        o.append(oarr.flatten())
        a.append(action.item())
        r.append(reward)
        d.append(done)
        obs = new_obs
        print(".", end="")

        # any positive reward within last 50 steps?
        if step_index > MAX_EPISODE_STEPS or (len(r) > CUT_IF_NO_REWARD and max(r[-CUT_IF_NO_REWARD:]) <= 0):
            break

    fin_positive = len(r)-1
    for fin_positive in range (len(r)-1, 0, -1):
        if r[fin_positive] > 0:
            break
    features["observations"].append(o[:fin_positive+1])
    features["actions"].append(a[:fin_positive+1])
    features["dones"].append(d[:fin_positive+1])

    r = r[:fin_positive+1]
    features["rewards"].append(r)    
    rtg = np.zeros_like(r)
    rtg[-1] = r[-1]
    for i in reversed(range(len(r) - 1)):
        rtg[i] = r[i] + RTG_GAMMA * rtg[i + 1]
    features["rtg"].append(rtg)
    
    print(f"\nTotal reward: {total_reward} steps: {len(o)}")
env.close()

Exception: Can't get attribute '_make_function' on <module 'cloudpickle.cloudpickle' from '/Users/jacob/.pyenv/versions/3.10.12/lib/python3.10/site-packages/cloudpickle/cloudpickle.py'>
Exception: Can't get attribute '_make_function' on <module 'cloudpickle.cloudpickle' from '/Users/jacob/.pyenv/versions/3.10.12/lib/python3.10/site-packages/cloudpickle/cloudpickle.py'>


Episode: 0 of 20:
....................................................................................................
Total reward: 22.25806451612903 episodes steps: 100
Episode: 1 of 20:
....................................................................................................
Total reward: 16.49006622516558 episodes steps: 100
Episode: 2 of 20:
....................................................................................................
Total reward: 6.835016835016857 episodes steps: 100
Episode: 3 of 20:
....................................................................................................
Total reward: 9.455252918287968 episodes steps: 100
Episode: 4 of 20:
....................................................................................................
Total reward: 23.003300330033 episodes steps: 100
Episode: 5 of 20:
....................................................................................................
Total reward: 14.73498233215

In [12]:
import utils.storage as storage
dataset_file = f'car_racing_{NUM_EPISODES}_{MAX_EPISODE_STEPS}.hdf5'
storage.save_to_hdf5(features, f"./{dataset_file}")

In [8]:
#gcloud auth login
!gcloud config set project clever-393810
#!gcloud auth list
#!gcloud storage buckets list
!gsutil cp ./{dataset_file} gs://yakiv-dt-public/datasets/{dataset_file}