In [1]:
import gettelemetry as client
import gamepad as gp
from gamepad import GamepadHandler
import window as gwd
import wandb
import os

import torch
import torch.nn.functional as F
import mss
import cv2
import time
import numpy as np
import math

import pywinctl as gw
import vgamepad as vg
import gymnasium as gym
from gymnasium import spaces
from stable_baselines3 import SAC
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.callbacks import BaseCallback, EvalCallback, CheckpointCallback
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.policies import BaseFeaturesExtractor
from stable_baselines3.common.callbacks import CallbackList

import torchvision.models as models
from torchvision import transforms

import logging
from stable_baselines3.common.monitor import Monitor

In [2]:
steps = 1000

In [3]:
wandb.login()
wandb.init(project="trackmania_sac")

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33mauschra3[0m ([33mauschra3-massachusetts-institute-of-technology[0m). Use [1m`wandb login --relogin`[0m to force relogin


In [4]:
class WindowCap():
    '''find the trackmania window and 
    return gray normalised scaled pixel values'''
    def __init__(self, window_name):
        self.window_name = window_name
        self.window = gw.getWindowsWithTitle(window_name)
        if not self.window:
            raise Exception(f"Window with name '{window_name}' not found.")
        self.window = self.window[0]
        self.top = self.window.top
        self.left = self.window.left
        self.width = self.window.width
        self.height = self.window.height
        self.monitor = {"top": self.top, "left": self.left, "width": self.width, "height": self.height}
        self.sct = mss.mss()
        self.resize = 128

    def capture(self):
        img = np.array(self.sct.grab(self.monitor))
        img = cv2.resize(img, (self.resize, self.resize))       # resize
        img = cv2.cvtColor(img, cv2.COLOR_BGRA2GRAY)        # Convert to grayscale
        img = img / 255.0           # normalize
        return img

    def __del__(self):
        self.sct.close()
        

In [5]:
# demo positions for position comparison
with open('models/optimal_positions_2.txt', 'r') as f:
    optimal_positions = f.readlines()

# write x and z to list as tuple of floats
# {'x': 495.9945068359375, 'y': 10.008896827697754, 'z': 631.3551025390625}
optimal_positions_list = []

# for each line in the file
for pos in optimal_positions:
    pos = pos.split(',')
    # remove /n and {}
    pos[0] = pos[0].replace('{', '')
    pos[2] = pos[2].replace('}', '')
    pos[0] = pos[0].replace('\'', '')
    pos[2] = pos[2].replace('\'', '')

    x = round(float(pos[0].split(':')[1]), 6)
    z = round(float(pos[2].split(':')[1]), 6)
    optimal_positions_list.append((x, z))

# turn into array
optimal_positions_array = np.array(optimal_positions_list)      # array vs list
print(optimal_positions_array.shape)
print(optimal_positions_array)


(1947, 2)
[[496.       624.004211]
 [496.       624.014954]
 [496.       624.032104]
 ...
 [385.598175 556.279663]
 [384.094574 556.857178]
 [382.578369 557.407227]]


In [6]:
class TrackmaniaEnv(gym.Env) :
    def __init__(self, window_name="Trackmania"):
        super(TrackmaniaEnv, self).__init__()
        self.window = WindowCap(window_name)        # capture window
        self.client = client.TMClient()             # start client connection with openplanet server
        self.gamepad = GamepadHandler()           # init gamepad
        if not self.gamepad:
            print("Failed to initialize gamepad")

        self.action_space = spaces.Box(
        low=np.array([-1.0, 0.0, 0.0]),
        high=np.array([1.0, 1.0, 1.0]),
        dtype=np.float32
        )
    
        # obs space
        self.observation_space = spaces.Box(
            low=0,
            high=255,  
            shape=(1, 128, 128),
            dtype=np.uint8 
        )

        self.id = gwd.get_window_id("Trackmania")           
        self.focus = gwd.focus_window(self.id)                      # focus on window
        self.reward_range = (-np.inf, np.inf)           
        self.metadata = {'render.modes': ['human']}
        self.terminated = False                                     # check if race ended
        self.truncated = False                                      # dont know what the difference is
        self.reward = 0                                             # reset reward
        self.prev_action = np.array([0, 0, 0])                      # init action 
        self.prev_obs = np.zeros((1, 128, 128), dtype=np.uint8)     # init obs
        
        self.telemetry = self.client.retrieve_data()                # get the telemetry data from the server

        self.datapoints = optimal_positions_list.copy()            # get human driven data
        self.prev_position = None
        self.previous_closest_index = -1  

        self.speed_buffer = []                                      # keep 'x' frames speed buffer
        self.steps = 0                                              # count training steps
        self.episode_reward = 0                                     # get cumulative reward for the episode
        self.episode_steps = 0
        self.checkpoint = 0                                         
        self.episode_start_time = None
        self.step_start_time = None


    def reset(self, seed = None):
        self.gamepad.reset()                                                                                    
        time.sleep(1.5)                                             # wait for lights to go green
        self.focus = gwd.focus_window(self.id)                      # unsure if necessary

        # reset counters for episode 
        self.episode_steps = 0    
        self.episode_reward = 0                                  
        self.reward = 0
        self.episode_start_time = time.time()                      
        self.step_start_time = time.time()                          
        self.speed_buffer = []                                      
        self.datapoints = optimal_positions_array.copy()                  # renew the checkpoint list
        self.terminated = False
        self.prev_action = np.array([0, 0, 0])
        self.previous_closest_index = -1  
        
        obs = self.window.capture()
        obs = np.expand_dims(obs, axis=0)                           # add channel dim for gym

        self.episode_start_time = time.time()
        self.step_start_time = time.time()

        if seed is not None:
            self.seed(seed)
        
        return obs, {}
    
    def step(self, action):
        # update new step
        self.steps += 1
        self.episode_steps += 1

        self.gamepad.send_action(action)                    # send action to gamepad and update prev action
        self.prev_action = action


        self.step_time = time.time() - self.step_start_time         # count step time
        self.step_start_time = time.time()
            
        obs = self.window.capture()                         # capture new window

        self.telemetry = self.client.retrieve_data()        # get telemetry data# 
        truncated = self._check_truncated(self.telemetry)   # check if car has crashed
        if truncated:
            print("truncated")
        terminated = self._check_finished(self.telemetry)   # check if car has finished, if so press A
        if terminated:
            self.gamepad.press_a()
            self.terminated = True
            print("terminated")

        reward = self.get_reward(self.telemetry)            # get reward
        self.episode_reward += reward
        self.prev_position = self.telemetry['position']

        if self.steps % 10000 == 0:                         # log data
            print(f"step: {self.steps} / {steps}")

        info = {                
            'speed': self.telemetry['speed'],
            'position': self.telemetry['position'],
            'checkpoint': self.telemetry['checkpoint'],
            'lap': self.telemetry['lap'],
            'episode reward': self.episode_reward,}         # log telemetry data

        #print(obs.shape, reward, terminated, info)
        return obs, reward, terminated, truncated, info
    
    def seed(self, seed=None):
        np.random.seed(seed)

    def _check_truncated(self, telemetry):
        '''check if car has crashed into a wall or off the track'''
        speed = telemetry['speed']
        acceleration = telemetry['acceleration']
        self.speed_buffer.append(speed)

        if len(self.speed_buffer) > 50:
            self.speed_buffer.pop(0)
        speed_av = sum(self.speed_buffer) / len(self.speed_buffer)
        if speed_av < 2 and acceleration <0.1 and time.time() - self.episode_start_time > 3:
            return True
        return False
    
    def _check_finished(self, telemetry):  
        '''check if car has finished the track'''
        finished = telemetry['finished']
        return finished

    # get the closest demo point to current position 
    def _closest_point(self, car_position):
        car_position = np.array([self.telemetry['position']['x'], self.telemetry['position']['z']]) # extract x and z
        distances = np.linalg.norm(self.datapoints - car_position, axis=1)      # compare all points to current position, probably inefficient
        closest_index = np.argmin(distances)        # get index of closest point.                    
        return closest_index, self.datapoints[closest_index]        # (index, (x, z))
    
    # update the progress along the track
    def _update_progress(self, car_position):
        print(f"update progress {self.steps}")
        car_position = np.array([self.telemetry['position']['x'], self.telemetry['position']['z']]) # extract x and z
        '''main reward mechanism: calculate distance from last position to trajectory of '''
        progress_reward = 0
        # get closest point to current position


        # get distance along trajectory from last checkpoint to next checkpoint
        x, z = round(float(car_position['x']), 7), round(float(car_position['z']), 7)
        x1, z1 = self.datapoints[0][0], self.datapoints[0][1]
        x2, z2 = self.datapoints[1][0], self.datapoints[1][1]

        dx = x2 - x1
        dz = z2 - z1
        vx = x - x1
        vz = z - z1

        # get distance^2 x1, z1 to x2, z2
        dist_sq = dx**2 + dz**2
        dot_prod = dx * vx + dz * vz

        # projection
        t = dot_prod / dist_sq
        
        # if -ve, position behind this vector, so wait until passed x1, z1
        if t > 0 and dist_sq > 0.01 :
            self.datapoints.pop(0)
            progress_reward = min(t, 10)
            #print(f"dist {dist_sq}")
            #print(f"dot {dot_prod}")
            #print(checkpoint_reward)
        
        return progress_reward

    def get_reward(self, telemetry):
        print(f"reward, {self.steps}")
        # get telemetry data
        speed = telemetry['speed']
        finished = telemetry['finished']
        jerk = telemetry['jerk']
        steer = telemetry['steer']
        position = telemetry['position']
        reward = 0

        speed_reward = speed / 3000 # max speed
        progress_reward = 0 #self.update_progress(position)    # get progress reward
        jerk_reward = -jerk * 0.01  # penalize jerk

        print(f"speed: {speed_reward}, progress_reward {progress_reward}, jerk_reward {jerk_reward}")

        reward = speed_reward + progress_reward + jerk_reward
        print(f"reward: {reward}")
        return reward
        
    
    def make_env():
        def _init():
            env = TrackmaniaEnv()
            return env
        return _init

    def close(self):
        del self.window
        self.client.close()

In [7]:
class CustomWandbCallback(BaseCallback):
    def __init__(self, verbose=0):
        super().__init__(verbose)
        
    def _on_step(self) -> bool:
        # Log only available metrics
        wandb.log({
            'reward': self.locals['rewards'],
            'timesteps': self.num_timesteps
        })
        
        # Log episode info if available
        info = self.locals.get('infos', [{}])[0]
        if info:
            wandb.log({
                'speed': info.get('speed', 0),
                'checkpoint': info.get('checkpoint', 0),
                'lap': info.get('lap', 0),
                'episode_duration': info.get('episode_duration', 0)
            })
        return True

In [8]:
steps = 1_000

# creat env
env = TrackmaniaEnv()
env.reset()

env = DummyVecEnv([lambda: env])

load = False

# load model
if load:
    model = SAC.load("models/trackmania_sac6")
    model.set_env(env)
else:
    # if not loading, new model
    model = SAC('MlpPolicy', 
                env, 
                verbose=1,
                gradient_steps=1,
                ent_coef='auto',
                target_entropy='auto',
                gamma=0.99,
                tau=0.005,
        # reset counters for e
                learning_starts=4000,
                buffer_size=600_000,
                policy_kwargs=dict(
                    net_arch=[512, 512],
                    optimizer_kwargs=dict(weight_decay=1e-5)))

# train
model.learn(total_timesteps=steps, callback=CustomWandbCallback())
model.save("models/trackmania_sac7")

# cleanup
env.close()

Attempting to connect to localhost:9000 (Attempt 1)...
Connected successfully!
Gamepad initialized
Using cuda device
reward, 1
speed: 0.0, progress_reward 0, jerk_reward -6.052394863218069e-05
reward: -6.052394863218069e-05
reward, 2
speed: 6.626667082309723e-05, progress_reward 0, jerk_reward -0.0010184941440820694
reward: -0.0009522274732589722
reward, 3
speed: 0.00017266666889190674, progress_reward 0, jerk_reward -0.0019961756467819216
reward: -0.0018235089778900148
reward, 4
speed: 0.0002679666678110758, progress_reward 0, jerk_reward 7.382065057754517e-05
reward: 0.000341787318388621
reward, 5
speed: 0.00027510001262029014, progress_reward 0, jerk_reward 0.001679118573665619
reward: 0.0019542185862859093
reward, 6
speed: 0.00025950000683466594, progress_reward 0, jerk_reward 0.001467638611793518
reward: 0.0017271386186281839
reward, 7
speed: 0.00035266669591267906, progress_reward 0, jerk_reward -0.0014387267827987672
reward: -0.0010860600868860882
reward, 8
speed: 0.000340566635

KeyboardInterrupt: 

In [49]:
'''

optimal_positions = {}

# map points
class PlotPoints() :
    def __init__(self):
        super(PlotPoints, self).__init__()
        self.client = client.TMClient()
        self.gamepad = GamepadHandler()
        self.time_start = time.time()
        self.steps = 0

    def reset(self):
        self.gamepad.reset()
        print("reset")
        time.sleep(1.5)
        self.time_start = time.time()

    def step(self):
        self.steps += 1
        self.telemetry = self.client.retrieve_data()  # Retrieve the telemetry data
        optimal_positions[self.steps] = self.telemetry['position']
        # Process telemetry data to compute the reward and determine if the episode is done
        
        position = self.telemetry['position']

        finished = self.telemetry['finished']
        time_step = self.time_start - time.time()
        
        return position, time_step, finished

    def close(self):
        del self.window
        self.client.close()  # Close the TMClient connection

# create new text file and add optimal positions coordinates to the file
# save each line as x, y, z float only
env = PlotPoints()

env.reset()


while True:
    env.step()
    print('start')
    if env.telemetry['finished']:
        break

def save_optimal_positions(optimal_positions):
    with open('models/optimal_positons_2.txt', 'w') as f:
        for key in optimal_positions:
            f.write(f"{optimal_positions[key]}\n")

save_optimal_positions(optimal_positions)
'''

'\n\noptimal_positions = {}\n\n# map points\nclass PlotPoints() :\n    def __init__(self):\n        super(PlotPoints, self).__init__()\n        self.client = client.TMClient()\n        self.gamepad = GamepadHandler()\n        self.time_start = time.time()\n        self.steps = 0\n\n    def reset(self):\n        self.gamepad.reset()\n        print("reset")\n        time.sleep(1.5)\n        self.time_start = time.time()\n\n    def step(self):\n        self.steps += 1\n        self.telemetry = self.client.retrieve_data()  # Retrieve the telemetry data\n        optimal_positions[self.steps] = self.telemetry[\'position\']\n        # Process telemetry data to compute the reward and determine if the episode is done\n        \n        position = self.telemetry[\'position\']\n\n        finished = self.telemetry[\'finished\']\n        time_step = self.time_start - time.time()\n        \n        return position, time_step, finished\n\n    def close(self):\n        del self.window\n        self.cl

In [50]:
# create pseudo checkpoints
# drive around the track, save position every second
# save the positions to a file
# when driving count number of psedu checkpoints passed
# by creating a bounding box around the checkpoint
# only count checkpoint once
# reward will then be checkpoints passed / time taken
