In [1]:
import gettelemetry as client
import gamepad as gp
import window as gwd
import wandb
import os

import torch
import torch.nn.functional as F
import mss
import cv2
import time
import numpy as np

import pywinctl as gw
import vgamepad as vg
import gymnasium as gym
from gymnasium import spaces
from stable_baselines3 import SAC
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.callbacks import BaseCallback, EvalCallback, CheckpointCallback
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.policies import BaseFeaturesExtractor
from stable_baselines3.common.callbacks import CallbackList

import torchvision.models as models
from torchvision import transforms

In [2]:
steps = 5000

In [3]:
wandb.login()
wandb.init(project="trackmania_sac")

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33mauschra3[0m ([33mauschra3-massachusetts-institute-of-technology[0m). Use [1m`wandb login --relogin`[0m to force relogin


In [4]:
class WindowCap():
    def __init__(self, window_name):
        self.window_name = window_name
        self.window = gw.getWindowsWithTitle(window_name)
        if not self.window:
            raise Exception(f"Window with name '{window_name}' not found.")
        self.window = self.window[0]
        self.top = self.window.top
        self.left = self.window.left
        self.width = self.window.width
        self.height = self.window.height
        self.monitor = {"top": self.top, "left": self.left, "width": self.width, "height": self.height}
        self.sct = mss.mss()
        self.resize = 128

    def capture(self):
        img = np.array(self.sct.grab(self.monitor))
        img = cv2.resize(img, (self.resize, self.resize))       # resize
        img = cv2.cvtColor(img, cv2.COLOR_BGRA2GRAY)        # Convert to grayscale
        img = img / 255.0           # normalize
        return img

    def __del__(self):
        self.sct.close()
        

In [None]:
# load optimal positons 
with open('models/optimal_positons.txt', 'r') as f:
    optimal_positions = f.readlines()

In [None]:
class TrackmaniaEnv(gym.Env) :
    def __init__(self, window_name="Trackmania"):
        super(TrackmaniaEnv, self).__init__()
        self.window = WindowCap(window_name)
        self.client = client.TMClient()
        self.gamepad = gp.GamepadHandler()
        self.action_space = gym.spaces.Box(
            low=np.array([-1.0, 0.0, 0.0]),
            high=np.array([1.0, 1.0, 1.0]), 
            dtype=np.float32)
        self.observation_space = gym.spaces.Box(low=0, high=255, shape=(1, 128, 128), dtype=np.uint8)
        self.reward_range = (-np.inf, np.inf)
        self.metadata = {'render.modes': ['human']}
        self.spec = None
        self.terminated = False
        self.truncated = False
        self.reward = 0
        self.prev_action = np.array([0, 0, 0])
        self.prev_obs = np.zeros((1, 128, 128), dtype=np.uint8)
        
        # set the window id and focus
        self.id = gwd.get_window_id("Trackmania")      
        self.focus = gwd.focus_window(self.id)
        self.speed_buffer = []

        self.steps = 0
        self.episode_reward = 0
        self.time_start = 0
        self.telemetry = self.client.retrieve_data()
        self.optimal_positions = optimal_positions

    def reset(self, seed=None, options=None):
        self.gamepad.reset()
        self.speed_buffer = []
        time.sleep(1.5)
        #self.focus = gwd.focus_window(self.id)
        if seed is not None:
            self.seed(seed)
        self.terminated = False
        self.reward = 0
        self.prev_action = np.array([0, 0, 0])
        obs = self.window.capture()
        obs = np.expand_dims(obs, axis=0)  # add channel dim for gym
        obs = obs.astype(np.float32)
        self.time_start = time.time()
        
        # Rreturn reset as per gym format
        return obs, {}
    
    def seed(self, seed=None):
        np.random.seed(seed)

    def step(self, action):
        self.steps += 1
        # send action and set the new previous actions
        self.gamepad.send_action(action)
        self.prev_action = action
            
        # capture img obs and retrieve telemetry data
        obs = self.window.capture()
        self.telemetry = self.client.retrieve_data() 

        # telemetry data
        checkpoint = self.telemetry['checkpoint']
        lap = self.telemetry['lap']
        speed = self.telemetry['speed']
        position = self.telemetry['position']
        steer = self.telemetry['steer']
        gas = self.telemetry['gas']
        brake = self.telemetry['brake']
        finished = self.telemetry['finished']
        acceleration = self.telemetry['acceleration']
        jerk = self.telemetry['jerk']
        aim_yaw = self.telemetry['aim_yaw']
        aim_pitch = self.telemetry['aim_pitch']
        fl_steer_angle = self.telemetry['fl_steer_angle']
        fr_steer_angle = self.telemetry['fr_steer_angle']
        fl_slip = self.telemetry['fl_slip']
        fr_slip = self.telemetry['fr_slip']
        gear = self.telemetry['gear']

        # call reward function
        reward = self.get_reward(self.telemetry)

        # determine if crashed by checking window of speed
        # TODO -> change to check if positions are not chaning much
        self.speed_buffer.append(speed)
        if len(self.speed_buffer) > 50:
            self.speed_buffer.pop(0)
        speed_av = sum(self.speed_buffer) / len(self.speed_buffer)

        if speed_av < 2 and acceleration <0.1 and time.time() - self.time_start > 5:
            self.terminated = True

        # check for complete track and reset if so
        if finished:
            self.gamepad.press_a()
            self.terminated = True
        
        # log data
        if self.steps % 100 == 0:
            print(f"step: {self.steps} / {steps}")
        self.reward = reward
        truncated = False
        terminated = self.terminated
        info = {
            'speed': speed,
            'position': position,
            'checkpoint': checkpoint,
            'lap': lap,
        }
        return obs, reward, terminated, truncated, info

    def get_reward(self, telemetry, optimal_positions):
        # get telemetry data
        speed = telemetry['speed']
        finished = telemetry['finished']
        acceleration = telemetry['acceleration']
        jerk = telemetry['jerk']
        postion = telemetry['position']

        reward = 0

        # reward for speed > 100
        # riding the wall gives 100 speed 
        # > 100 speed gives positive reward
        s_speed = speed * 0.01  # scale speed 100 -> 1
        reward += np.exp(s_speed) - 1  

        # penalty for hitting wall
        if round(jerk, 2) > 1:
            print(f"jerk: {round(jerk, 2)}")
            reward -= 1

        # penalty for slow speed
        if speed < 20:
            reward -= 0.1

        # reward for finishing 
        if finished:
            reward += 100

        # position based reward
        # compare each step with the optimal positions
        # fit a curved line to the optimal positions
        # get the distance from the line to the current position
        # reward based on the distance
        

        return reward
        
    
    def make_env():
        def _init():
            env = TrackmaniaEnv()
            return env
        return _init

    def close(self):
        del self.window
        self.client.close()  # Close the TMClient connection

In [5]:
optimal_positions = {}

# map points
class PlotPoints() :
    def __init__(self):
        super(PlotPoints, self).__init__()
        self.client = client.TMClient()
        self.gamepad = gp.GamepadHandler()
        self.time_start = time.time()
        self.steps = 0

    def reset(self):
        self.gamepad.reset()
        print("reset")
        time.sleep(1.5)
        self.time_start = time.time()

    def step(self):
        self.steps += 1
        self.telemetry = self.client.retrieve_data()  # Retrieve the telemetry data
        optimal_positions[self.steps] = self.telemetry['position']
        # Process telemetry data to compute the reward and determine if the episode is done
        
        position = self.telemetry['position']
        finished = self.telemetry['finished']
        time_step = self.time_start - time.time()
        
        return position, time_step, finished



    def close(self):
        del self.window
        self.client.close()  # Close the TMClient connection


In [11]:
# save positons to file
def save_positions():
    with open("models/positions.txt", "w") as f:
        for key in optimal_positions:
            f.write(f"{key}: {optimal_positions[key]}\n")

In [6]:
env = PlotPoints()
env.reset()

while True:
    env.step()
    if env.telemetry['finished']:
        break

Attempting to connect to localhost:9000 (Attempt 1/5)...
Connected successfully!
Gamepad initialized
reset


In [6]:
class CustomWandbCallback(BaseCallback):
    def __init__(self, verbose=0):
        super().__init__(verbose)
        
    def _on_step(self) -> bool:
        # Log only available metrics
        wandb.log({
            'reward': self.locals['rewards'],
            'timesteps': self.num_timesteps
        })
        
        # Log episode info if available
        info = self.locals.get('infos', [{}])[0]
        if info:
            wandb.log({
                'speed': info.get('speed', 0),
                'checkpoint': info.get('checkpoint', 0),
                'lap': info.get('lap', 0),
                'episode_duration': info.get('episode_duration', 0)
            })
        return True

In [7]:
steps = 1_000_000
load = True

# creat env
env = TrackmaniaEnv()
env.reset()
env = DummyVecEnv([lambda: env])

# load model
if load:
    model = SAC.load("models/trackmania_sac3.zip")
    model.set_env(env)
else:
    # if not loading, new model
    model = SAC('CnnPolicy', 
                env, 
                verbose=1,
                buffer_size=500_000)

# train
model.learn(total_timesteps=steps, callback=CustomWandbCallback())
model.save("models/trackmania_sac4")

# cleanup
env.close()

Attempting to connect to localhost:9000 (Attempt 1/5)...
Connected successfully!
Gamepad initialized
step: 100 / 1000000
step: 200 / 1000000
step: 300 / 1000000
step: 400 / 1000000
step: 500 / 1000000
step: 600 / 1000000
step: 700 / 1000000
---------------------------------
| time/              |          |
|    episodes        | 4        |
|    fps             | 19       |
|    time_elapsed    | 39       |
|    total_timesteps | 763      |
| train/             |          |
|    actor_loss      | -10.3    |
|    critic_loss     | 0.343    |
|    ent_coef        | 0.011    |
|    ent_coef_loss   | -20.5    |
|    learning_rate   | 0.0003   |
|    n_updates       | 15262    |
---------------------------------
step: 800 / 1000000
step: 900 / 1000000
step: 1000 / 1000000
jerk: 1.68
step: 1100 / 1000000
step: 1200 / 1000000
---------------------------------
| time/              |          |
|    episodes        | 8        |
|    fps             | 18       |
|    time_elapsed    | 66       |

KeyboardInterrupt: 