In [1]:
import gettelemetry as client
import gamepad as gp
import window as gwd
import wandb
import os

import torch
import torch.nn.functional as F
import mss
import cv2
import time
import numpy as np

import pywinctl as gw
import vgamepad as vg
import gymnasium as gym
from gymnasium import spaces
from stable_baselines3 import SAC
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.callbacks import BaseCallback, EvalCallback, CheckpointCallback
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.policies import BaseFeaturesExtractor
from stable_baselines3.common.callbacks import CallbackList

import torchvision.models as models
from torchvision import transforms

In [2]:
steps = 5000

In [3]:
wandb.login()
wandb.init(project="trackmania_sac")

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33mauschra3[0m ([33mauschra3-massachusetts-institute-of-technology[0m). Use [1m`wandb login --relogin`[0m to force relogin


In [4]:
class WindowCap():
    def __init__(self, window_name):
        self.window_name = window_name
        self.window = gw.getWindowsWithTitle(window_name)
        if not self.window:
            raise Exception(f"Window with name '{window_name}' not found.")
        self.window = self.window[0]
        self.top = self.window.top
        self.left = self.window.left
        self.width = self.window.width
        self.height = self.window.height
        self.monitor = {"top": self.top, "left": self.left, "width": self.width, "height": self.height}
        self.sct = mss.mss()
        self.resize = 128

    def capture(self):
        img = np.array(self.sct.grab(self.monitor))
        img = cv2.resize(img, (self.resize, self.resize))       # resize
        img = cv2.cvtColor(img, cv2.COLOR_BGRA2GRAY)        # Convert to grayscale
        img = img / 255.0           # normalize
        return img

    def __del__(self):
        self.sct.close()
        

In [5]:
'''
class CNN(torch.nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = torch.nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1)
        self.conv2 = torch.nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.conv3 = torch.nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1)
        self.fc1 = torch.nn.Linear(128*16*16, 512)
        self.fc2 = torch.nn.Linear(512, 3)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.max_pool2d(x, kernel_size=2, stride=2)
        x = F.relu(self.conv2(x))
        x = F.max_pool2d(x, kernel_size=2, stride=2)
        x = F.relu(self.conv3(x))
        x = F.max_pool2d(x, kernel_size=2, stride=2)
        x = x.view(-1, 128*16*16)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x


transform = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor(),
    transforms.Normalize(mean, std),
])


'''


'\nclass CNN(torch.nn.Module):\n    def __init__(self):\n        super(CNN, self).__init__()\n        self.conv1 = torch.nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1)\n        self.conv2 = torch.nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)\n        self.conv3 = torch.nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1)\n        self.fc1 = torch.nn.Linear(128*16*16, 512)\n        self.fc2 = torch.nn.Linear(512, 3)\n\n    def forward(self, x):\n        x = F.relu(self.conv1(x))\n        x = F.max_pool2d(x, kernel_size=2, stride=2)\n        x = F.relu(self.conv2(x))\n        x = F.max_pool2d(x, kernel_size=2, stride=2)\n        x = F.relu(self.conv3(x))\n        x = F.max_pool2d(x, kernel_size=2, stride=2)\n        x = x.view(-1, 128*16*16)\n        x = F.relu(self.fc1(x))\n        x = self.fc2(x)\n        return x\n\n\ntransform = transforms.Compose([\n    transforms.Resize((128, 128)),\n    transforms.ToTensor(),\n    transforms.Normalize(mean, std),\n])\n\n\n'

In [6]:
# preprocess optimal positions file
# load optimal positons 
with open('models/optimal_positons_good.txt', 'r') as f:
    optimal_positions = f.readlines()

# write x and z to list as tuple of floats
# {'x': 495.9945068359375, 'y': 10.008896827697754, 'z': 631.3551025390625}
optimal_positions_list = []

# for each line in the file
for pos in optimal_positions:
    pos = pos.split(',')
    # remove /n and {}
    pos[0] = pos[0].replace('{', '')
    pos[2] = pos[2].replace('}', '')
    pos[0] = pos[0].replace('\'', '')
    pos[2] = pos[2].replace('\'', '')

    x = round(float(pos[0].split(':')[1]), 3)
    z = round(float(pos[2].split(':')[1]), 3)
    optimal_positions_list.append((x, z))

print(optimal_positions_list)


[(495.995, 631.355), (495.995, 631.82), (495.995, 632.137), (495.995, 632.461), (495.995, 632.791), (495.995, 633.128), (495.995, 633.471), (495.995, 633.82), (495.995, 634.176), (495.99, 634.538), (495.987, 634.906), (495.982, 635.28), (495.972, 635.66), (495.967, 635.852), (495.948, 636.437), (495.929, 636.834), (495.918, 637.034), (495.874, 637.644), (495.857, 637.85), (495.797, 638.477), (495.748, 638.902), (495.69, 639.332), (495.623, 639.768), (495.55, 640.208), (495.471, 640.654), (495.385, 641.106), (495.295, 641.563), (495.2, 642.026), (495.1, 642.494), (494.997, 642.968), (494.889, 643.448), (494.779, 643.933), (494.664, 644.424), (494.547, 644.921), (494.426, 645.424), (494.302, 645.932), (494.176, 646.446), (494.047, 646.966), (493.915, 647.492), (493.781, 648.023), (493.644, 648.561), (493.505, 649.105), (493.365, 649.653), (493.224, 650.202), (493.082, 650.751), (492.94, 651.299), (492.798, 651.848), (492.656, 652.396), (492.513, 652.944), (492.37, 653.492), (492.227, 654

In [7]:
class TrackmaniaEnv(gym.Env) :
    def __init__(self, window_name="Trackmania"):
        super(TrackmaniaEnv, self).__init__()
        self.window = WindowCap(window_name)
        self.client = client.TMClient()
        self.gamepad = gp.GamepadHandler()
        self.action_space = gym.spaces.Box(
            low=np.array([-1.0, 0.0, 0.0]),
            high=np.array([1.0, 1.0, 1.0]), 
            dtype=np.float32)
        self.observation_space = gym.spaces.Box(low=0, high=255, shape=(1, 128, 128), dtype=np.uint8)
        self.reward_range = (-np.inf, np.inf)
        self.metadata = {'render.modes': ['human']}
        self.spec = None
        self.terminated = False
        self.truncated = False
        self.reward = 0
        self.prev_action = np.array([0, 0, 0])
        self.prev_obs = np.zeros((1, 128, 128), dtype=np.uint8)
        
        # set the window id and focus
        self.id = gwd.get_window_id("Trackmania")      
        self.focus = gwd.focus_window(self.id)
        self.speed_buffer = []

        self.steps = 0
        self.episode_reward = 0

        self.telemetry = self.client.retrieve_data()
        self.optimal_positions = optimal_positions_list
        self.checkpoint_list = optimal_positions_list
        self.episode_steps = 0

        self.time_start = 0
        self.checkpoint = 0
        self.checkpoint_splits = []

    def reset(self, seed=None, options=None):
        self.gamepad.reset()
        self.speed_buffer = []
        self.checkpoint_list = optimal_positions_list
        time.sleep(1.5)
        
        #self.focus = gwd.focus_window(self.id)
        if seed is not None:
            self.seed(seed)
        self.terminated = False
        self.episode_steps = 0
        self.reward = 0
        self.prev_action = np.array([0, 0, 0])
        obs = self.window.capture()
        obs = np.expand_dims(obs, axis=0)  # add channel dim for gym
        obs = obs.astype(np.float32)
        self.time_start = time.time()
        
        # Rreturn reset as per gym format
        return obs, {}
    
    def seed(self, seed=None):
        np.random.seed(seed)

    def step(self, action):
        # update new step
        self.steps += 1
        self.episode_steps += 1
        self.gamepad.send_action(action)
        self.prev_action = action
            
        # capture img obs and retrieve telemetry data
        obs = self.window.capture()
        self.telemetry = self.client.retrieve_data() 

        # telemetry data
        checkpoint = self.telemetry['checkpoint']
        lap = self.telemetry['lap']
        speed = self.telemetry['speed']
        position = self.telemetry['position']
        steer = self.telemetry['steer']
        gas = self.telemetry['gas']
        brake = self.telemetry['brake']
        finished = self.telemetry['finished']
        acceleration = self.telemetry['acceleration']
        jerk = self.telemetry['jerk']
        aim_yaw = self.telemetry['aim_yaw']
        aim_pitch = self.telemetry['aim_pitch']
        fl_steer_angle = self.telemetry['fl_steer_angle']
        fr_steer_angle = self.telemetry['fr_steer_angle']
        fl_slip = self.telemetry['fl_slip']
        fr_slip = self.telemetry['fr_slip']
        gear = self.telemetry['gear']

        # call reward function
        reward = self.get_reward(self.telemetry)

        # determine if crashed by checking window of speed
        # TODO -> change to check if positions are not chaning much
        self.speed_buffer.append(speed)
        if len(self.speed_buffer) > 50:
            self.speed_buffer.pop(0)
        speed_av = sum(self.speed_buffer) / len(self.speed_buffer)
        if speed_av < 2 and acceleration <0.1 and time.time() - self.time_start > 5:
            self.terminated = True

        # check for complete track and reset if so
        if finished:
            self.gamepad.press_a()
            self.terminated = True


        # log data
        if self.steps % 100 == 0:
            print(f"step: {self.steps} / {steps}")
        self.reward = reward
        truncated = False
        terminated = self.terminated
        info = {
            'speed': speed,
            'position': position,
            'checkpoint': checkpoint,
            'lap': lap,
        }
        return obs, reward, terminated, truncated, info

    def get_reward(self, telemetry):
        # get telemetry data
        speed = telemetry['speed']
        finished = telemetry['finished']
        acceleration = telemetry['acceleration']
        jerk = telemetry['jerk']
        position = telemetry['position']
        checkpoint  = telemetry['checkpoint']

        reward = 0

        # position reward based on distance from optimal positions
        if self.checkpoint_list[0][0] - 5 < round(position['x'], 3) < self.checkpoint_list[0][0] + 5 and self.checkpoint_list[0][1] - 5 < round(position['z'], 3) < self.checkpoint_list[0][1] + 5 and self.episode_steps > 50:
            checkpoint_reward = 1
            self.checkpoint_list.pop(0)
        else:
            checkpoint_reward = 0

        # reward for speed > 100
        # riding the wall gives 100 speed 
        # > 100 speed gives positive reward
        s_speed = speed * 0.01  # scale speed 100 -> 1
        speed_reward = round(np.exp(s_speed) - 1, 4)  

        # penalty for hitting wall
        if round(jerk, 2) > 1:
            #print(f"jerk: {round(jerk, 2)}")
            crash_reward = -1
        else:   
            crash_reward = 0

        # penalty for slow speed
        if speed < 20:
            slow_reward = -1
        else:
            slow_reward = 0

        # reward for finishing 
        if finished:
            finish_reward = 100
        else:
            finish_reward = 0

        # add rewards
        reward = checkpoint_reward + speed_reward + crash_reward + slow_reward + finish_reward
        print(f"checkpoint: {checkpoint_reward}, speed: {speed_reward}, crash: {crash_reward}, slow: {slow_reward}, finish: {finish_reward}")
        return reward
        
    
    def make_env():
        def _init():
            env = TrackmaniaEnv()
            return env
        return _init

    def close(self):
        del self.window
        self.client.close()  # Close the TMClient connection

In [8]:
class CustomWandbCallback(BaseCallback):
    def __init__(self, verbose=0):
        super().__init__(verbose)
        
    def _on_step(self) -> bool:
        # Log only available metrics
        wandb.log({
            'reward': self.locals['rewards'],
            'timesteps': self.num_timesteps
        })
        
        # Log episode info if available
        info = self.locals.get('infos', [{}])[0]
        if info:
            wandb.log({
                'speed': info.get('speed', 0),
                'checkpoint': info.get('checkpoint', 0),
                'lap': info.get('lap', 0),
                'episode_duration': info.get('episode_duration', 0)
            })
        return True

In [None]:
steps = 100000

# creat env
env = TrackmaniaEnv()
env.reset()
env = DummyVecEnv([lambda: env])


load = False

# load model
if load:
    model = SAC.load("models/trackmania_sac3.zip")
    model.set_env(env)
else:
    # if not loading, new model
    model = SAC('CnnPolicy', 
                env, 
                verbose=1,
                buffer_size=500_000)

# train
model.learn(total_timesteps=steps, callback=CustomWandbCallback())
model.save("models/trackmania_sac5")

# cleanup
env.close()

Attempting to connect to localhost:9000 (Attempt 1/5)...
Connected successfully!
Gamepad initialized
Using cuda device
checkpoint: 0, speed: 0.0, crash: 0, slow: -1, finish: 0
checkpoint: 0, speed: 0.0, crash: 0, slow: -1, finish: 0
checkpoint: 0, speed: 0.0, crash: 0, slow: -1, finish: 0
checkpoint: 0, speed: 0.0, crash: 0, slow: -1, finish: 0
checkpoint: 0, speed: -0.0, crash: 0, slow: -1, finish: 0
checkpoint: 0, speed: 0.0008, crash: 0, slow: -1, finish: 0
checkpoint: 0, speed: -0.0021, crash: 0, slow: -1, finish: 0
checkpoint: 0, speed: -0.0026, crash: 0, slow: -1, finish: 0
checkpoint: 0, speed: 0.0001, crash: 0, slow: -1, finish: 0
checkpoint: 0, speed: 0.0008, crash: 0, slow: -1, finish: 0
checkpoint: 0, speed: 0.0006, crash: 0, slow: -1, finish: 0
checkpoint: 0, speed: 0.0004, crash: 0, slow: -1, finish: 0
checkpoint: 0, speed: 0.0002, crash: 0, slow: -1, finish: 0
checkpoint: 0, speed: 0.0, crash: 0, slow: -1, finish: 0
checkpoint: 0, speed: 0.0005, crash: 0, slow: -1, finish

In [None]:
optimal_positions = {}

# map points
class PlotPoints() :
    def __init__(self):
        super(PlotPoints, self).__init__()
        self.client = client.TMClient()
        self.gamepad = gp.GamepadHandler()
        self.time_start = time.time()
        self.steps = 0

    def reset(self):
        self.gamepad.reset()
        print("reset")
        time.sleep(1.5)
        self.time_start = time.time()

    def step(self):
        self.steps += 1
        self.telemetry = self.client.retrieve_data()  # Retrieve the telemetry data
        optimal_positions[self.steps] = self.telemetry['position']
        # Process telemetry data to compute the reward and determine if the episode is done
        
        position = self.telemetry['position']
        finished = self.telemetry['finished']
        time_step = self.time_start - time.time()
        
        return position, time_step, finished

    def close(self):
        del self.window
        self.client.close()  # Close the TMClient connection

# create new text file and add optimal positions coordinates to the file
# save each line as x, y, z float only
env = PlotPoints()
env.reset()

while True:
    env.step()
    if env.telemetry['finished']:
        break

def save_optimal_positions(optimal_positions):
    with open('models/optimal_positons.txt', 'w') as f:
        for key in optimal_positions:
            f.write(f"{optimal_positions[key]}\n")

save_optimal_positions(optimal_positions)


In [None]:
# create pseudo checkpoints
# drive around the track, save position every second
# save the positions to a file
# when driving count number of psedu checkpoints passed
# by creating a bounding box around the checkpoint
# only count checkpoint once
# reward will then be checkpoints passed / time taken
