In [4]:
import gymnasium as gym
from gymnasium import spaces
from stable_baselines3 import SAC
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.callbacks import BaseCallback, EvalCallback, CheckpointCallback
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.policies import BaseFeaturesExtractor
from stable_baselines3.common.callbacks import CallbackList

import torchvision.models as models
from torchvision import transforms

import numpy as np
import torch
import cv2
import time
import os
import torch.nn as nn
import matplotlib.pyplot as plt

import mss
import pygetwindow as gw
import easyocr as ocr

import pydirectinput
import vgamepad as vg

import psutil

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [3]:
class Reader:
    def __init__(self):
        self.reader = ocr.Reader(['en'], model_storage_directory='./ocr_model')

    def read(self, img):
        speed_text = self.reader.readtext(img, allowlist='0123456789')
        return speed_text

reader = Reader()

  net.load_state_dict(copyStateDict(torch.load(trained_model, map_location=device)))
  model.load_state_dict(torch.load(model_path, map_location=device))


In [2]:
# dimensions of window to get x, y, w, h 
speed_rect = [1, 12, 45, 30]
done_rect = [130, 40, 50, 60]

# get pixel values of window
def get_screen():
    window = gw.getWindowsWithTitle('Trackmania')[0]
    border_pixels = [40, 12, 40, 10] # top, left, right, bottom
    # get picture of screen
    with mss.mss() as sct:
        monitor = {"top": window.top + border_pixels[0], 
                   "left": window.left + border_pixels[1], 
                   "width": window.width - border_pixels[1] - border_pixels[2], 
                   "height": window.height - border_pixels[0] - border_pixels[3]}
        img = np.array(sct.grab(monitor))
    return img

# get the current speed and check region of screen for finish
def get_speed_done(img): 
    #img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    speed = img[speed_rect[1]:speed_rect[1]+speed_rect[3], speed_rect[0]:speed_rect[0]+speed_rect[2]]
    speed_text = reader.read(speed)
    done = img[done_rect[1]:done_rect[1]+done_rect[3], done_rect[0]:done_rect[0]+done_rect[2]]
    done_text = reader.read(done)
    if speed_text:
        speed = speed_text[0][1]
    else:
        speed = 0
    done = bool(done_text)
    return speed, done      # int?, bool

# convert to grayscale, cut off extra not need for driving


# 84x84x1 for customcnn
#224x244 for resnet 18
def process_screen_pov(img):
    img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    height, width = img.shape
    img = img[height//2: height-50, :]
    img = cv2.GaussianBlur(img, (5, 5), 0)    
    img = cv2.Canny(img, 100, 150)
    # canny edge detection
    img = cv2.resize(img, (84, 84))
    img = img / 255.0               # [0, 1]
    return img.astype(np.float32) # (1, 84, 84)

def process_screen_3rd(img):
    img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    height, width = img.shape
    img = img[height//3: height-70, :]
    #img = cv2.GaussianBlur(img, (5, 5), 0)    
    #img = cv2.Canny(img, 100, 150)
    img = cv2.resize(img, (128, 128))     # (84, 84) for CNN, 
    img = img / 255.0                   # normalise to [0, 1]

    return img                      # (1, 84, 84)


# further preprocessing
# normalise to [0, 1]
# resize to 84x84



In [5]:
# test screen processing

while True:
    img = get_screen()
    img = process_screen_3rd(img)
    cv2.imshow('screen', img)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

: 

In [16]:
class CNN(BaseFeaturesExtractor):
    def __init__(self, observation_space: spaces.Box, features_dim: int = 512):
        super(CNN, self).__init__(observation_space, features_dim)

        # add channels dimension
        n_input_channels = observation_space.shape[0]

        self.conv1 = nn.Conv2d(n_input_channels, 32, kernel_size=5, stride=4)
        self.batch_norm1 = nn.BatchNorm2d(32)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=2)
        self.batch_norm2 = nn.BatchNorm2d(64)
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, stride=2)
        self.batch_norm3 = nn.BatchNorm2d(128)
        self.relu = nn.LeakyReLU()
        self.flatten = nn.Flatten()
        self.linear = nn.Sequential(
            nn.Linear(self._get_conv_output_dim(observation_space), features_dim),
            nn.ReLU()
        )

    def forward(self, observations: torch.Tensor) -> torch.Tensor:
        x = self.relu(self.conv1(observations))
        self.feature_map1 = x  # Store feature map after conv1
        x = self.relu(self.conv2(x))
        self.feature_map2 = x  # Store feature map after conv2
        x = self.relu(self.conv3(x))
        self.feature_map3 = x  # Store feature map after conv3
        x = self.flatten(x)
        x = self.linear(x)
        return x

    def _get_conv_output_dim(self, observation_space):
        with torch.no_grad():
            sample_input = torch.zeros(1, *observation_space.shape)
            x = self.relu(self.conv1(sample_input))
            x = self.relu(self.conv2(x))
            x = self.relu(self.conv3(x))
            output_dim = x.numel()
        return output_dim


def visualise_feature_maps(model):
    cnn_feature_extractor = model.policy.actor.features_extractor
    cnn_feature_extractor.to(device)

    image = get_screen()
    image = process_screen_3rd(image)
    image = np.expand_dims(image, axis=0)
    image = torch.tensor(image, dtype=torch.float32).unsqueeze(0).to(device)

    with torch.no_grad():
        _ = cnn_feature_extractor(image)
    
    feature_maps = [
        cnn_feature_extractor.feature_map1,
        cnn_feature_extractor.feature_map2,
        cnn_feature_extractor.feature_map3
    ]
    
    for idx, feature_map in enumerate(feature_maps, start=1):
        feature_map = feature_map.squeeze(0)  # Remove batch dimension
        num_features = feature_map.shape[0]
        plt.figure(figsize=(15, 15))
        for i in range(min(num_features, 64)):  # Limit to 64 feature maps for display
            plt.subplot(8, 8, i+1)
            plt.imshow(feature_map[i].cpu().numpy(), cmap='gray')
            plt.axis('off')
        plt.suptitle(f'Feature Map {idx}')
        plt.show()


#Grad-CAM -> show useful regions of the image



In [17]:
# -----------------------------------controller
gamepad = vg.VX360Gamepad()

# 1,3 tensor
def send_action(action):
    steering, brake, throttle = action
    gamepad.left_joystick_float(x_value_float=steering, y_value_float=0.0) # left/right
    gamepad.left_trigger_float(value_float=brake) # brake
    gamepad.right_trigger_float(value_float=throttle)
    gamepad.update()

def reset_key():
    pydirectinput.press('enter')



# create environment
class TrackmaniaEnv(gym.Env):
    def __init__(self):
        super(TrackmaniaEnv, self).__init__()
        self.observation_space = spaces.Box(low=0, high=1, shape=(1, 128, 128), dtype=np.float32)
        # obs need to be in channel first format
        self.action_space = spaces.Box(
            low=np.array([-1.0, 0.0, 0.0]), # Steering angle from -1 (left) to 1 (right) throttle from 0 to 1, braking 0 to 1
            high=np.array([1.0, 1.0, 1.0]), 
            dtype=np.float32
        )

        self.done = False           # ep done (crash or finish)
        self.crash = False          # crash -> done
        self.reward = 0             
        
        self.start_time = None
        self.max_episode_length = 10000000
        self.view = '3rd'

        # Episode tracking variables
        self.total_reward = 0
        self.episode_length = 0

        # bring to front
        #window = gw.getWindowsWithTitle('Trackmania')[0]
        #window.activate()
        time.sleep(2)

    def reset(self, seed=None):
        super().reset(seed=seed)
        self.done = False
        self.reward = 0
        self.total_reward = 0  # Reset total reward
        self.episode_length = 0  # Reset episode length
        reset_key()
        time.sleep(1.5)  # Wait for green light
        self.start_time = time.time()
        obs, _, _ = self._get_observation()
        info = {}
        return obs, info

    def step(self, action):
        # send action
        send_action(action)

        # get new obs
        obs, speed, done_check = self._get_observation()
        self.reward = self._calculate_reward(speed, done_check) # Calculate reward
        self.total_reward += self.reward  # Update total reward
        self.episode_length += 1  # Update episode length
        self.done = self._check_done(speed, done_check)
        self.truncated = self._check_truncated(speed)
        
        # Include episode information when done
        info = {"truncated": self.truncated}
        if self.done:
            info['episode'] = {'r': self.total_reward, 'l': self.episode_length}
            print(f"Episode ended | Total Reward: {self.total_reward:.2f} | Length: {self.episode_length}")
        return obs, self.reward, self.done, self.truncated, info
    
    def _get_observation(self):
        img = get_screen()
        speed, done = get_speed_done(img)
        speed = int(speed)
        if self.view == 'pov':
            img = process_screen_pov(img)
        else:
            img = process_screen_3rd(img)
        img = np.expand_dims(img, axis=0) # add channel dim, should be 1, 128, 128
        return img, speed, done
    
    def _calculate_reward(self, speed, done):
        speed_factor = 0.01  # scale speed
        min_speed_threshold = 110  # after 2 seconds
        stall_penalty = -0.1  # Penalty for stalling or not moving
        tt = time.time() - self.start_time #* 0.1
        reward = 0
        reward = speed * speed_factor #* tt
        if speed < min_speed_threshold:
            reward += stall_penalty

        reward = np.round(reward, 4)
        print(reward)
        return reward
        
    def _check_done(self, speed, done):
        if speed < 20 and time.time() - self.start_time > 5:
            self.done = True
        #if done:                # if ocr detects finish = done
            #self.done = True
        return self.done
    
    def _check_truncated(self, speed): # crash
        return False
    
def make_env():
    def _init():
        env = TrackmaniaEnv()
        env = Monitor(env)
        return env
    return _init

In [18]:
model_dir = 'rl/trackmania_gym_SB3/models'

learning_rate=3e-4
buffer_size = 800_000 # 1,000,000
batch_size = 512
tau = 0.005
gamma = 0.99
train_freq= 1
gradient_steps=1

# create env
vec_env = DummyVecEnv([make_env()])
eval_env = DummyVecEnv([make_env()])

# define model arguments
policy_kwargs = dict(
    features_extractor_class=CNN,       # or CNN
    features_extractor_kwargs=dict(features_dim=512),
    net_arch=dict(pi=[512, 256, 256], qf=[512, 256, 256]),    # actor and critic layers
    normalize_images=False,  # already done in proprocess_screen()
)

# train 
def train():

    # load or create new model
    load = False
    if load:
        model_path = 'models/SAC_trackmania82500.zip'
        if os.path.exists(model_path):
            model = SAC.load(model_path, env=vec_env, tensorboard_log='logs/') 
            print("Loaded model from", model_path)         
    else:
        model = SAC(
            'CnnPolicy',
            vec_env,
            verbose=1,
            policy_kwargs=policy_kwargs,
            tensorboard_log='logs/',
            learning_rate=learning_rate,
            buffer_size=buffer_size,  
            batch_size=batch_size,     
            tau=tau,
            gamma=gamma,
            train_freq=train_freq,
            gradient_steps=gradient_steps,
            ent_coef='auto',
            optimize_memory_usage=True, 
            replay_buffer_kwargs=dict(handle_timeout_termination=False)
        )
        print("Created new model")

    # define evals
    eval_callback = EvalCallback(
        eval_env, 
        eval_freq=1000,  # how often to perform evaluation i.e. every 1000 timesteps.
        best_model_save_path="models/best",
        log_path="logs/",
        verbose=1
        
    )
    
    checkpoint_callback = CheckpointCallback(save_freq=10000, 
                                             save_path='./models/', 
                                             name_prefix='SAC_model_{time}'
                                             )

    callback = CallbackList([eval_callback, checkpoint_callback])

    # training loop
    TIMESTEPS = 2500
    iters = 0
    while True:
        iters += 1
        model.learn(
                    total_timesteps=TIMESTEPS, 
                    reset_num_timesteps=False, 
                    tb_log_name=f"SAC_6", 
                    callback=callback
        )
        model.save(f"models/SAC_trackmania{TIMESTEPS*iters}")

        # Retrieve and print statistics from the model's ep_info_buffer
        if len(model.ep_info_buffer) > 0:
            ep_rewards = [ep_info['r'] for ep_info in model.ep_info_buffer]
            ep_lengths = [ep_info['l'] for ep_info in model.ep_info_buffer]
            avg_reward = np.mean(ep_rewards)
            avg_length = np.mean(ep_lengths)
            print(f"Iteration: {iters} | Avg Reward: {avg_reward:.2f} | Avg Length: {avg_length:.2f}")
        else:
            print(f"Iteration: {iters} | No completed episodes in this iteration.")


        # Visualize feature maps every 10 iterations
        if iters % 2 == 0:
            visualise_feature_maps(model)
                                


In [19]:
train()

Using cuda device


MemoryError: Unable to allocate 48.8 GiB for an array with shape (800000, 1, 1, 128, 128) and data type float32

In [24]:
def print_available_memory():
    mem = psutil.virtual_memory()
    print(f"Available memory: {mem.available / (1024 ** 3):.2f} GiB")

print_available_memory()

Available memory: 23.44 GiB
