# **Homework 2**

## First Iteration ##

In [None]:
import tensorflow as tf

# Check if GPU is available
print('GPU is', 'available' if tf.config.list_physical_devices('GPU') else 'NOT AVAILABLE')

In [None]:
# Cell 1: Import Libraries
import numpy as np
import sys
import gymnasium as gym
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import (
    Dense, Activation, Dropout, Flatten, Conv2D, MaxPooling2D, BatchNormalization
)
from tensorflow.keras import optimizers, callbacks
from sklearn.metrics import classification_report, confusion_matrix
from gymnasium.wrappers import RecordVideo

In [None]:
# Cell 2: Data Preparation
# Define the paths to your training and validation data
trainingset = 'train/'
validationset = 'test/'

batch_size = 64
target_size = (96, 96)  # Adjust based on your dataset

# Training data augmentation
train_datagen = ImageDataGenerator(
    rescale=1.0 / 255,
    zoom_range=0.1,
    width_shift_range=0.1,
    height_shift_range=0.1,
    horizontal_flip=True,
    vertical_flip=False,
    rotation_range=20,
    shear_range=0.1,
    fill_mode='nearest'
)

# Validation data should not be augmented
validation_datagen = ImageDataGenerator(rescale=1.0 / 255)

# Create generators
train_generator = train_datagen.flow_from_directory(
    directory=trainingset,
    target_size=target_size,
    color_mode="rgb",
    batch_size=batch_size,
    class_mode="categorical",
    shuffle=True
)

validation_generator = validation_datagen.flow_from_directory(
    directory=validationset,
    target_size=target_size,
    color_mode="rgb",
    batch_size=batch_size,
    class_mode="categorical",
    shuffle=False
)

# Gather dataset information
num_samples = train_generator.n
num_classes = train_generator.num_classes
input_shape = train_generator.image_shape

classnames = list(train_generator.class_indices.keys())
img_h, img_w, img_channels = input_shape
print(f"Image height = {img_h}, Image Width = {img_w}, Channels = {img_channels}")
print(f"Image input shape: {input_shape}")
print(f"Classes: {classnames}")
print(f"Loaded {num_samples} training samples from {num_classes} classes.")
print(f"Loaded {validation_generator.n} validation samples from {validation_generator.num_classes} classes.")

In [None]:
# Cell X: Analyze Class Distribution
import matplotlib.pyplot as plt

# Count of samples per class
class_counts = train_generator.classes
class_names = list(train_generator.class_indices.keys())

plt.figure(figsize=(8, 6))
plt.bar(class_names, [np.sum(class_counts == i) for i in range(len(class_names))], color='skyblue')
plt.xlabel('Classes')
plt.ylabel('Number of Samples')
plt.title('Class Distribution in Training Set')
plt.show()

In [None]:
# Cell 12: Create Separate Generators for Minority Classes
import os

# Define the target size and batch size
target_size = (96, 96)
batch_size = 64

# Initialize data augmentation for minority classes
minority_datagen = ImageDataGenerator(
    rescale=1.0 / 255,
    zoom_range=0.2,
    width_shift_range=0.2,
    height_shift_range=0.2,
    horizontal_flip=True,
    rotation_range=30,
    shear_range=0.2,
    fill_mode='nearest'
)

# Paths to minority class directories
minority_classes = ['0', '4']
train_dir = 'train/'

augmented_generators = []

for cls in minority_classes:
    class_dir = os.path.join(train_dir, cls)
    if not os.path.isdir(class_dir):
        print(f"Directory for class {cls} not found. Skipping augmentation.")
        continue
    generator = minority_datagen.flow_from_directory(
        directory=train_dir,
        target_size=target_size,
        color_mode="rgb",
        batch_size=batch_size,
        class_mode="categorical",
        classes=[cls],  # Only target the minority class
        shuffle=True
    )
    augmented_generators.append(generator)

print(f"Number of augmented generators: {len(augmented_generators)}")

In [None]:
# Cell Y: Compute Class Weights
from sklearn.utils import class_weight
import numpy as np

# Compute class weights
class_weights = class_weight.compute_class_weight(
    class_weight='balanced',
    classes=np.unique(train_generator.classes),
    y=train_generator.classes
)

# Convert to a dictionary
class_weights_dict = {i: weight for i, weight in enumerate(class_weights)}
print("Class Weights:", class_weights_dict)

In [None]:
# Cell 13: Combine Original and Augmented Generators
import itertools

# Create a combined generator
train_combined = itertools.chain(
    train_generator,
    *augmented_generators
)

# Define a generator that yields data from the combined generator
def combined_generator(combined):
    for data in combined:
        yield data

# Reset the iterator
train_combined = combined_generator(train_combined)

In [None]:
# Cell 5: Deeper CNN with Different Activation
def DeepCNN(input_shape, num_classes):
    model = Sequential(name="DeepCNN")

    # First Convolutional Block
    model.add(Conv2D(32, (3, 3), activation='relu', padding='same', input_shape=input_shape))
    model.add(Conv2D(32, (3, 3), activation='relu', padding='same'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.3))

    # Second Convolutional Block
    model.add(Conv2D(64, (3, 3), activation='relu', padding='same'))
    model.add(Conv2D(64, (3, 3), activation='relu', padding='same'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.3))

    # Third Convolutional Block
    model.add(Conv2D(128, (3, 3), activation='relu', padding='same'))
    model.add(Conv2D(128, (3, 3), activation='relu', padding='same'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.4))

    # Flatten and Dense Layers
    model.add(Flatten())
    model.add(Dense(512, activation='relu'))
    model.add(Dropout(0.5))

    # Output Layer
    model.add(Dense(num_classes, activation='softmax'))

    # Compile the model with RMSprop optimizer
    optimizer = optimizers.RMSprop(learning_rate=0.001)

    model.compile(
        loss='categorical_crossentropy',
        optimizer=optimizer,
        metrics=['accuracy']
    )
    return model

# Instantiate and summarize the model
model_deep = DeepCNN(input_shape, num_classes)
model_deep.summary()

In [None]:
import tensorflow as tf
from tensorflow.keras import layers, models, optimizers, regularizers
from tensorflow.keras.layers import LeakyReLU

def ImprovedDeepCNN(input_shape, num_classes):
    model = models.Sequential(name="ImprovedDeepCNN")
    
    # First Convolutional Block
    model.add(layers.Conv2D(32, (3, 3), padding='same', input_shape=input_shape))
    model.add(layers.BatchNormalization())
    model.add(LeakyReLU())
    model.add(layers.Conv2D(32, (3, 3), padding='same'))
    model.add(layers.BatchNormalization())
    model.add(LeakyReLU())
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Dropout(0.3))
    
    # Second Convolutional Block
    model.add(layers.Conv2D(64, (3, 3), padding='same'))
    model.add(layers.BatchNormalization())
    model.add(LeakyReLU())
    model.add(layers.Conv2D(64, (3, 3), padding='same'))
    model.add(layers.BatchNormalization())
    model.add(LeakyReLU())
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Dropout(0.3))
    
    # Third Convolutional Block
    model.add(layers.Conv2D(128, (3, 3), padding='same'))
    model.add(layers.BatchNormalization())
    model.add(LeakyReLU())
    model.add(layers.Conv2D(128, (3, 3), padding='same'))
    model.add(layers.BatchNormalization())
    model.add(LeakyReLU())
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Dropout(0.4))
    
    # Global Average Pooling and Dense Layers
    model.add(layers.GlobalAveragePooling2D())
    model.add(layers.Dense(512, kernel_regularizer=regularizers.l2(0.001)))
    model.add(LeakyReLU())
    model.add(layers.Dropout(0.5))
    
    # Output Layer
    model.add(layers.Dense(num_classes, activation='softmax'))
    
    # Compile the model with Adam optimizer
    optimizer = optimizers.Adam(learning_rate=0.0001)
    
    model.compile(
        loss='categorical_crossentropy',
        optimizer=optimizer,
        metrics=['accuracy']
    )
    return model

# Instantiate and summarize the improved model
model_improved = ImprovedDeepCNN(input_shape, num_classes)
model_improved.summary()

In [None]:
# Cell 7: Select Model
# Choose one of the models defined above
model = model_improved  # Replace with model_deep or model_improved

In [None]:
# Cell 8: Define Callbacks
early_stopping = callbacks.EarlyStopping(
    monitor='val_loss',
    patience=10,  # Increased patience for potentially longer training
    verbose=1,
    mode='auto',
    restore_best_weights=True
)

reduce_lr = callbacks.ReduceLROnPlateau(
    monitor='val_loss',
    factor=0.2,
    patience=3,  # Increased patience
    verbose=1,
    min_lr=1e-6
)

# Optionally, add ModelCheckpoint to save the best model
checkpoint = callbacks.ModelCheckpoint(
    'models/best_model.keras',
    monitor='val_accuracy',
    save_best_only=True,
    mode='max'
)

In [None]:
# Cell 9: Train the Model
# Calculate steps per epoch and validation steps
steps_per_epoch = int(np.ceil(train_generator.n / batch_size))
val_steps = int(np.ceil(validation_generator.n / batch_size))

try:
    history = model.fit(
        train_combined,
        epochs=100,
        steps_per_epoch=steps_per_epoch,
        validation_data=validation_generator,
        validation_steps=val_steps,
        callbacks=[early_stopping, reduce_lr, checkpoint],
        class_weight=class_weights_dict,
        verbose=1
    )
except KeyboardInterrupt:
    print("Training interrupted.")

# Save the final model based if you chose deep or improved model
if model == model_deep:
    model.save('models/final_model_deep.keras')
else:
    model.save('models/final_model_improved.keras')

In [None]:
from tensorflow.keras.layers import Input, Average
from tensorflow.keras.models import Model
from tensorflow.keras import optimizers
from tensorflow.keras.models import load_model

model_deep = load_model('models/final_model_deep.keras')
model_improved = load_model('models/final_model_improved.keras')

input_shape_deep = model_deep.input_shape[1:]  # Exclude batch size
input_shape_improved = model_improved.input_shape[1:]

assert input_shape_deep == input_shape_improved, "Input shapes of both models must be the same."

# Create an input layer that matches the input shape
input_layer = Input(shape=input_shape_deep)

# Get predictions from both models
preds_deep = model_deep(input_layer)
preds_improved = model_improved(input_layer)

# Average the outputs
averaged_preds = Average()([preds_deep, preds_improved])

# Create the combined model
combined_model = Model(inputs=input_layer, outputs=averaged_preds)

# Compile the combined model
combined_model.compile(
    loss='categorical_crossentropy',
    optimizer=optimizers.Adam(learning_rate=0.0001),
    metrics=['accuracy']
)

# Save the combined model
combined_model.save('models/combined_model.keras')

# Cell 11: Evaluate the Model
# Load the combined model
combined_model = load_model('models/combined_model.keras')

# Evaluate the model on the validation set
val_loss, val_acc = combined_model.evaluate(validation_generator, verbose=1)
print(f"Validation Accuracy: {val_acc:.4f}")

In [None]:
# Cell 10: Evaluate the Model
# Load the best saved model
best_model = load_model('models/combined_model.keras')

# Evaluate on validation data
val_steps = int(np.ceil(validation_generator.n / batch_size))
loss, acc = best_model.evaluate(validation_generator, steps=val_steps, verbose=1)
print(f'Loss: {loss:.4f}')
print(f'Accuracy: {acc:.4f}')

In [None]:
# Cell 11: Classification Report
preds = best_model.predict(validation_generator, steps=val_steps, verbose=0)
Ypred = np.argmax(preds, axis=1)
Ytest = validation_generator.classes  # Ensure shuffle=False in validation_generator

print(classification_report(Ytest, Ypred, target_names=classnames, digits=3))

In [None]:
# Cell 12: Confusion Matrix
cm = confusion_matrix(Ytest, Ypred)

import matplotlib.pyplot as plt
import seaborn as sns

plt.figure(figsize=(10, 8))
sns.heatmap(cm, annot=True, fmt='d', xticklabels=classnames, yticklabels=classnames, cmap='Blues')
plt.xlabel('Predicted')
plt.ylabel('True')
plt.title('Confusion Matrix')
plt.show()

In [None]:
# Cell 13: Text-Based Confusion Matrix
conf = []
for i in range(cm.shape[0]):
    for j in range(cm.shape[1]):
        if i != j and cm[i][j] > 0:
            conf.append([i, j, cm[i][j]])

conf = np.array(conf)
conf = conf[np.argsort(-conf[:, 2])]  # Sort by descending error count

print(f'{"True":<16} {"Predicted":<16} {"Errors":<10} {"Error %":<10}')
print('-' * 60)
for k in conf:
    true_class = classnames[int(k[0])]
    pred_class = classnames[int(k[1])]
    errors = int(k[2])
    error_pct = (errors / validation_generator.n) * 100
    print(f'{true_class:<16} -> {pred_class:<16} {errors:<10} {error_pct:.2f}%')

In [None]:
# Cell 9: Model Deployment with Gymnasium (Final Revised for Continuous Actions)
import numpy as np
from gymnasium.wrappers import RecordVideo

def play(env, model, predefined_actions):
    seed = 2000
    obs, _ = env.reset(seed=seed)

    # Drop initial frames with no action
    no_action = predefined_actions[0]  # [0.0, 0.0, 0.0]
    for _ in range(50):
        obs, _, _, _, _ = env.step(no_action)

    done = False
    while not done:
        # Preprocess the observation
        img = preprocess_observation(obs, target_size)
        p = model.predict(np.expand_dims(img, axis=0))  # Shape: (1, 5)
        predicted_class = np.argmax(p)  # Integer 0-4

        # Map the predicted class to a predefined action
        action = predefined_actions.get(predicted_class, predefined_actions[0])  # Array

        # Ensure the action is a float32 NumPy array
        action = np.array(action, dtype=np.float32)

        # Step the environment with the action
        obs, _, terminated, truncated, _ = env.step(action)
        done = terminated or truncated

    env.close()

def preprocess_observation(obs, target_size):
    from tensorflow.keras.preprocessing.image import img_to_array, array_to_img

    # Convert observation to PIL Image
    img = array_to_img(obs)
    # Resize image
    img = img.resize(target_size)
    # Convert to array and normalize
    img = img_to_array(img) / 255.0
    return img

# Define predefined actions (Continuous)
predefined_actions = {
    0: np.array([0.0, 0.0, 0.0], dtype=np.float32),  # No Action
    1: np.array([-1.0, 0.0, 0.0], dtype=np.float32), # Steer Left
    2: np.array([1.0, 0.0, 0.0], dtype=np.float32),  # Steer Right
    3: np.array([0.0, 1.0, 0.0], dtype=np.float32),  # Accelerate (Gas)
    4: np.array([0.0, 0.0, 1.0], dtype=np.float32),  # Brake
    # Add more actions as needed
}

# Initialize the environment without 'continuous' parameter
env_arguments = {
    'domain_randomize': False,
    'render_mode': 'rgb_array'
}

env_name = 'CarRacing-v3'
env = gym.make(env_name, **env_arguments)

# Wrap the environment to record videos
video_dir = 'video_recordings'  # Specify the directory to save video recordings
env = RecordVideo(env, video_dir)

print("Environment:", env_name)
print("Action space:", env.action_space)
print("Observation space:", env.observation_space)

# Play the game using the trained model
play(env, best_model, predefined_actions)
#play(env, best_model_regression, predefined_actions)

# (Optional Approach) Reinforcement Learning Model

In [None]:
import os
import json
import random
import cv2
import numpy as np
import gymnasium as gym
import random
import datetime
import matplotlib.pyplot as plt
from collections import deque

import tensorflow as tf
from tensorflow.keras import layers, models, optimizers, losses

# Set channels_first to match the PyTorch-like input (batch, channels, height, width)
# Usually TensorFlow defaults to channels_last, but we can specify data_format per layer.
# Another approach is to rearrange inputs to channels_last before passing to the model.
# Here, we will specify data_format='channels_first' for our convolutional layers.
tf.keras.backend.set_image_data_format('channels_first')


############################################
# UTIL FUNCTIONS
############################################

def write_json_to_file(data, file_path):
    try:
        with open(file_path, 'w') as json_file:
            json.dump(data, json_file, indent=4)
        print(f"JSON data successfully written to {file_path}")
    except Exception as e:
        print(f"Error writing JSON data to {file_path}: {e}")


def read_json_from_file(file_path):
    try:
        with open(file_path, 'r') as json_file:
            data = json.load(json_file)
        print(f"JSON data successfully read from {file_path}")
        return data
    except Exception as e:
        print(f"Error reading JSON data from {file_path}: {e}")
        return None


def make_all_paths(is_dynamic_root=True, dir_name="rl_class"):
    ROOT = "reinforcement_learning"
    if is_dynamic_root:
        # Create a dynamic directory name based on index number
        dir_name = dir_name + "_" + str(datetime.datetime.now().strftime("%Y%m%d_%H%M%S"))
    else:
        dir_name = dir_name

    path_root = ROOT + "/" + dir_name + "/"
    dirs = ["models", "plots", "videos"]
    for d in dirs:
        path = path_root + d
        if not os.path.exists(path):
            os.makedirs(path)
        print(">> Created dir", path)
    return path_root


def plot_state_car(data, title=None):
    assert len(data.shape) == 3, "Can only handle 3D mats."
    fig, axs = plt.subplots(1, data.shape[0], figsize=(10, 4))
    for i in range(data.shape[0]):
        axs[i].imshow(data[i], cmap='gray')
        axs[i].axis('off')
    plt.title(title)
    plt.show()


def plot_frame_car(data, title=None):
    plt.imshow(data, cmap="gray")
    plt.axis('off')
    plt.title(title)
    plt.show()


def preprocess_frame_car(frame):
    # frame is RGB image
    # Convert to grayscale
    frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    frame = cv2.resize(frame, (96, 96))
    return frame


def seed_everything(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    tf.random.set_seed(seed)


############################################
# CONFIG
############################################

class Config:
    SEED = random.randint(0, 1000)

    STARTING_EPISODE_TRAIN = 0
    ENDING_EPISODE_TRAIN = STARTING_EPISODE_TRAIN + 30

    STARTING_EPISODE_TEST = ENDING_EPISODE_TRAIN + 1
    ENDING_EPISODE_TEST = STARTING_EPISODE_TEST + 30

    SKIP_FRAMES = 2
    TRAINING_BATCH_SIZE = 32
    UPDATE_TARGET_MODEL_FREQUENCY = 5
    N_FRAMES = 3
    HIDDEN_DIMENSION_FC = 150

    GAS_WEIGHT = 1.3

    ACTION_SPACE = [
        (-1, 1, 0.2), (0, 1, 0.2), (1, 1, 0.2),
        (-1, 1, 0), (0, 1, 0), (1, 1, 0),
        (-1, 0, 0.2), (0, 0, 0.2), (1, 0, 0.2),
        (-1, 0, 0), (0, 0, 0), (1, 0, 0)
    ]


############################################
# DQN MODEL IN TENSORFLOW
############################################

class DQN(tf.keras.Model):
    def __init__(self, n_frames, n_actions, h_dimension):
        super(DQN, self).__init__()
        # We will assume input shape: (batch, n_frames, 96, 96)
        # Layers
        self.conv1 = layers.Conv2D(6, kernel_size=(7, 7), strides=(3, 3), activation='relu', data_format='channels_first')
        self.pool1 = layers.MaxPooling2D(pool_size=(2, 2), data_format='channels_first')
        self.conv2 = layers.Conv2D(12, kernel_size=(4, 4), activation='relu', data_format='channels_first')
        self.pool2 = layers.MaxPooling2D(pool_size=(2, 2), data_format='channels_first')
        self.flatten = layers.Flatten()
        self.fc1 = layers.Dense(h_dimension, activation='relu')
        self.fc2 = layers.Dense(n_actions)

        # To build the model and check shapes, we could run a dummy call once if needed:
        # self.build((None, n_frames, 96, 96))

    def call(self, x):
        # x: (batch_size, n_frames, 96, 96)
        x = self.conv1(x)
        x = self.pool1(x)
        x = self.conv2(x)
        x = self.pool2(x)
        x = self.flatten(x)
        x = self.fc1(x)
        x = self.fc2(x)
        return x  # (BS, ACTIONS)


############################################
# DQN AGENT
############################################

class DQNAgent:
    def __init__(self,
                 action_space,
                 epsilon=1.0,
                 gamma=0.95,
                 epsilon_min=0.1,
                 epsilon_decay=0.9999,
                 lr=1e-3,
                 memory_len=5000,
                 frames=3,
                 hidden_dimension=None):
        self.epsilon = epsilon
        self.gamma = gamma
        self.epsilon_min = epsilon_min
        self.epsilon_decay = epsilon_decay
        self.memory_len = memory_len
        self.lr = lr
        self.memory = deque(maxlen=self.memory_len)
        self.action_space = action_space

        self.n_actions = len(self.action_space)
        self.target_model = DQN(frames, self.n_actions, hidden_dimension)
        self.model = DQN(frames, self.n_actions, hidden_dimension)

        # Initialize by calling model once with dummy input
        dummy_state = tf.random.uniform((1, frames, 96, 96), dtype=tf.float32)
        self.model(dummy_state)
        self.target_model(dummy_state)

        self.optimizer = optimizers.Adam(learning_rate=self.lr)
        self.loss_fn = losses.MeanSquaredError()

    def update_target_model(self):
        self.target_model.set_weights(self.model.get_weights())

    def is_explore(self):
        return np.random.rand() <= self.epsilon

    def act(self, state, is_only_random=False, is_only_exploit=False):
        # state: tensor shape (1, frames, 96, 96)
        if (not is_only_exploit and self.is_explore()) or is_only_random:
            action_index = np.random.randint(self.n_actions)
        else:
            q_values = self.target_model(state, training=False)[0]
            action_index = tf.argmax(q_values).numpy()
        return self.action_space[action_index]

    def memorize(self, state, action, reward, next_state, done):
        # state, next_state are tf Tensors
        # action is tuple from action_space; we store index instead of action
        action_index = self.action_space.index(action)
        self.memory.append((state, action_index, reward, next_state, done))

    def replay(self, batch_size):
        minibatch = random.sample(self.memory, batch_size)
        # Extract batch
        states = []
        actions = []
        rewards = []
        next_states = []
        dones = []

        for state, a_idx, r, n_s, d in minibatch:
            states.append(state[0].numpy())     # state shape: (1, frames, h, w), take [0] to get (frames, h, w)
            next_states.append(n_s[0].numpy())  # same for next_state
            actions.append(a_idx)
            rewards.append(r)
            dones.append(d)

        states = tf.convert_to_tensor(states, dtype=tf.float32)
        next_states = tf.convert_to_tensor(next_states, dtype=tf.float32)
        # Reshape states to (batch, frames, 96, 96)
        # Already in that shape if each state was (frames, h, w)
        states = tf.reshape(states, (batch_size, -1, 96, 96))
        next_states = tf.reshape(next_states, (batch_size, -1, 96, 96))
        actions = tf.convert_to_tensor(actions, dtype=tf.int32)
        rewards = tf.convert_to_tensor(rewards, dtype=tf.float32)
        dones = tf.convert_to_tensor(dones, dtype=tf.float32)

        # Compute target
        # Q(s,a) and Q'(s', a')
        q_values = self.model(states, training=False)
        q_next = self.target_model(next_states, training=False)
        max_q_next = tf.reduce_max(q_next, axis=1)
        # target for actions taken
        # if done: target = reward
        # else: target = reward + gamma * max_q_next
        target_values = rewards + (1.0 - dones) * self.gamma * max_q_next

        # Now we need to apply these target values only to the chosen actions
        # Q-update: only update the chosen actions
        action_one_hot = tf.one_hot(actions, self.n_actions, dtype=tf.float32)
        q_selected = tf.reduce_sum(q_values * action_one_hot, axis=1)
        # We want to minimize MSE between q_selected and target_values
        with tf.GradientTape() as tape:
            q_values_pred = self.model(states, training=True)
            q_selected_pred = tf.reduce_sum(q_values_pred * action_one_hot, axis=1)
            loss = self.loss_fn(target_values, q_selected_pred)
        grads = tape.gradient(loss, self.model.trainable_variables)
        self.optimizer.apply_gradients(zip(grads, self.model.trainable_variables))

        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

    def load_model(self, name):
        # Load the model from the SavedModel directory
        self.target_model = tf.keras.models.load_model(name)
        self.model = tf.keras.models.load_model(name)

    def save_model(self, name):
        self.target_model.save(name, save_format="tf")


############################################
# MAIN TRAINING LOOP
############################################

def train_car_racing():
    seed_everything(seed=Config.SEED)
    PATH_ROOT = make_all_paths(is_dynamic_root=True)
    util_config = {k: v for k, v in Config.__dict__.items() if not k.startswith('__')}
    write_json_to_file(util_config, file_path=PATH_ROOT + "config.json")

    device = "GPU" if len(tf.config.list_physical_devices('GPU')) > 0 else "CPU"
    print('>> Using device:', device)

    agent = DQNAgent(frames=Config.N_FRAMES, action_space=Config.ACTION_SPACE,
                     hidden_dimension=Config.HIDDEN_DIMENSION_FC)

    env = gym.make('CarRacing-v3', render_mode="rgb_array")
    from gymnasium.wrappers import RecordVideo
    env = RecordVideo(env, os.path.join(PATH_ROOT, 'videos'),
                      episode_trigger=lambda x: x % Config.UPDATE_TARGET_MODEL_FREQUENCY == 0)

    epi_total_rewards = []
    for e in range(Config.STARTING_EPISODE_TRAIN, Config.ENDING_EPISODE_TRAIN + 1):
        env.episode_id = e
        epi_total_reward = 0
        epi_negative_reward_counter = 0
        epi_time_frame_counter = 1
        epi_done = False

        init_state = env.reset(seed=e)[0]
        init_state = preprocess_frame_car(init_state)

        # State queue [S0, S0, S0] initially
        state_queue = deque([init_state] * Config.N_FRAMES, maxlen=Config.N_FRAMES)

        while True:
            state_array = np.array(state_queue)  # shape (frames, 96, 96)
            state_tensor = tf.convert_to_tensor(state_array, dtype=tf.float32)[tf.newaxis, ...]

            action = agent.act(state_tensor)

            reward = 0
            for _ in range(Config.SKIP_FRAMES):
                next_state, r, done, _, _ = env.step(action)
                reward += r
                if done:
                    break

            if epi_time_frame_counter > 100 and reward < 0:
                epi_negative_reward_counter += 1
            else:
                epi_negative_reward_counter = 0

            if action[1] == 1 and action[2] == 0:
                reward *= Config.GAS_WEIGHT

            epi_total_reward += reward

            next_state = preprocess_frame_car(next_state)
            next_state_queue = deque([frame for frame in state_queue], maxlen=Config.N_FRAMES)
            next_state_queue.append(next_state)

            next_state_array = np.array(next_state_queue)
            next_state_tensor = tf.convert_to_tensor(next_state_array, dtype=tf.float32)[tf.newaxis, ...]

            # Memorize
            agent.memorize(state_tensor, action, reward, next_state_tensor, done)

            state_queue = next_state_queue

            if epi_negative_reward_counter >= 25 or epi_total_reward < 0:
                break

            if len(agent.memory) > Config.TRAINING_BATCH_SIZE:
                agent.replay(Config.TRAINING_BATCH_SIZE)

            epi_time_frame_counter += 1

        epi_total_rewards.append(epi_total_reward)
        stats_string = 'Episode: {}/{}, Scores(Time Frames): {}, Total Rewards: {:.2f}, Epsilon: {:.2f}'
        print(stats_string.format(
            e,
            Config.ENDING_EPISODE_TRAIN,
            epi_time_frame_counter,
            float(epi_total_reward),
            float(agent.epsilon))
        )

        if e % Config.UPDATE_TARGET_MODEL_FREQUENCY == 0:
            plt.figure()
            plt.plot(epi_total_rewards, label="cum rew", color="blue")
            plt.title("Rewards during training episodes")
            plt.savefig(os.path.join(PATH_ROOT, 'plots', f'reward_{e}.pdf'))
            plt.close()

            # Save the model in SavedModel format (no .h5 extension)
            agent.save_model(os.path.join(PATH_ROOT, "models", f"model_{e}"))
            agent.update_target_model()
            write_json_to_file({"CUM_REW": epi_total_rewards}, os.path.join(PATH_ROOT, "stats.json"))

    env.close()


############################################
# TEST FUNCTION
############################################

def test_car_racing(model_to_load):
    device = "GPU" if len(tf.config.list_physical_devices('GPU')) > 0 else "CPU"
    print('>> Using device:', device)

    env = gym.make('CarRacing-v2', render_mode="human")
    seed_everything(seed=Config.SEED)

    agent = DQNAgent(frames=Config.N_FRAMES, action_space=Config.ACTION_SPACE,
                     hidden_dimension=Config.HIDDEN_DIMENSION_FC)
    agent.load_model(model_to_load)

    PICKED_EPISODES = [1]
    for e in PICKED_EPISODES:
        env.episode_id = e
        init_state = env.reset(seed=e)[0]
        init_state = preprocess_frame_car(init_state)

        state_queue = deque([init_state] * Config.N_FRAMES, maxlen=Config.N_FRAMES)
        epi_n_neg_rew = 0

        while True:
            state_array = np.array(state_queue)
            state_tensor = tf.convert_to_tensor(state_array, dtype=tf.float32)[tf.newaxis, ...]
            action = agent.act(state_tensor, is_only_exploit=True)

            reward = 0
            for _ in range(Config.SKIP_FRAMES):
                next_state, r, epi_done, _, _ = env.step(action)
                reward += r
                if epi_done:
                    break

            epi_n_neg_rew = epi_n_neg_rew + 1 if reward <= 0 else 0
            if epi_n_neg_rew >= 100:
                break

            next_state = preprocess_frame_car(next_state)
            next_state_queue = deque([frame for frame in state_queue], maxlen=Config.N_FRAMES)
            next_state_queue.append(next_state)
            state_queue = next_state_queue
    env.close()
    
print("Everything loaded!")

Everything loaded!


In [5]:
############################################
# MAIN EXECUTION
############################################

# To train:
train_car_racing()
# To test:
# args = parse_args()
# test_car_racing(model_to_load=args['mod'])

>> Created dir reinforcement_learning/rl_class_20241210_114018/models
>> Created dir reinforcement_learning/rl_class_20241210_114018/plots
>> Created dir reinforcement_learning/rl_class_20241210_114018/videos
JSON data successfully written to reinforcement_learning/rl_class_20241210_114018/config.json
>> Using device: GPU


  logger.warn(


Episode: 0/100, Scores(Time Frames): 133, Total Rewards: 12.44, Epsilon: 0.99




INFO:tensorflow:Assets written to: reinforcement_learning/rl_class_20241210_114018/models\model_0\assets


INFO:tensorflow:Assets written to: reinforcement_learning/rl_class_20241210_114018/models\model_0\assets


JSON data successfully written to reinforcement_learning/rl_class_20241210_114018/stats.json
Episode: 1/100, Scores(Time Frames): 184, Total Rewards: 18.99, Epsilon: 0.97
Episode: 2/100, Scores(Time Frames): 100, Total Rewards: -0.01, Epsilon: 0.96


KeyboardInterrupt: 