In [2]:
import gettelemetry as client
import wandb
import os

import torch
import torch.nn.functional as F
import mss
import cv2
import time
import numpy as np

import pywinctl as gw
import vgamepad as vg
import gymnasium as gym
from stable_baselines3 import SAC
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.callbacks import BaseCallback, EvalCallback, CheckpointCallback
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.policies import BaseFeaturesExtractor
from stable_baselines3.common.callbacks import CallbackList

import torchvision.models as models
from torchvision import transforms

In [None]:

# Enable debug logging
os.environ["WANDB_DEBUG"] = "true"

# Prompt for the WANDB_API_KEY
wandb_api_key = input("Please enter your WANDB API key: ")

# Set the WANDB_API_KEY environment variable
os.environ["WANDB_API_KEY"] = wandb_api_key

# Login to wandb
try:
    wandb.login(key=wandb_api_key)
except wandb.errors.CommError as e:
    print(f"Failed to login to wandb: {e}")
    raise

# Initialize wandb project
try:
    wandb.init(project="trackmania_sac", entity="auschra3")
except wandb.errors.CommError as e:
    print(f"Failed to initialize wandb project: {e}")
    raise


In [3]:
class WindowCap():
    def __init__(self, window_name):
        self.window_name = window_name
        self.window = gw.getWindowsWithTitle(window_name)
        if not self.window:
            raise Exception(f"Window with name '{window_name}' not found.")
        self.window = self.window[0]
        self.top = self.window.top
        self.left = self.window.left
        self.width = self.window.width
        self.height = self.window.height
        self.monitor = {"top": self.top, "left": self.left, "width": self.width, "height": self.height}
        self.sct = mss.mss()
        self.resize = 128

    def capture(self):
        img = np.array(self.sct.grab(self.monitor))
        img = cv2.resize(img, (self.resize, self.resize))       # resize
        img = cv2.cvtColor(img, cv2.COLOR_BGRA2GRAY)        # Convert to grayscale
        img = img / 255.0           # normalize
        return img

    def __del__(self):
        self.sct.close()


    def show_window(self):
        with mss.mss() as sct:
            monitor = {"top": self.top, "left": self.left, "width": self.width, "height": self.height}
            frame_count = 0
            fps = 0
            start_time = time.time()

            while True:
                # Capture screen
                img = np.array(sct.grab(monitor))
                # resize image
                img = cv2.resize(img, (124, 124))

                # Convert to grayscale (direct from BGRA)
                img = cv2.cvtColor(img, cv2.COLOR_BGRA2GRAY)

                # Display the image
                #cv2.imshow("Screen Capture", img)

                # Calculate FPS every 10 frames
                frame_count += 1
                if frame_count % 10 == 0:
                    end_time = time.time()
                    fps = frame_count / (end_time - start_time)
                    print(f"FPS: {fps:.2f}")
                    frame_count = 0
                    start_time = time.time()

                # Exit loop on 'q' key press
                if cv2.waitKey(1) & 0xFF == ord('q'):
                    break

                # Release resources
        cv2.destroyAllWindows()

In [5]:
# setup gamepad
gamepad = vg.VX360Gamepad()

# 1,3 tensor
def send_action(action):
    steering, brake, throttle = action
    gamepad.left_joystick_float(x_value_float=steering, y_value_float=0.0) # left/right
    gamepad.left_trigger_float(value_float=brake) # brake
    gamepad.right_trigger_float(value_float=throttle)
    gamepad.update()


In [None]:
class TrackmaniaEnv(gym.Env) :
    def __init__(self, window_name="Trackmania"):
        super(TrackmaniaEnv, self).__init__()
        self.window = WindowCap(window_name)
        self.client = client.TMClient()
        self.action_space = gym.spaces.Box(
            low=np.array([-1.0, 0.0, 0.0]), # Steering angle from -1 (left) to 1 (right) throttle from 0 to 1, braking 0 to 1
            high=np.array([1.0, 1.0, 1.0]), 
            dtype=np.float32)
        self.observation_space = gym.spaces.Box(low=0, high=255, shape=(1, 128, 128), dtype=np.uint8)
        self.reward_range = (-np.inf, np.inf)
        self.metadata = {'render.modes': ['human']}
        self.spec = None
        self.done = False
        self.reward = 0
        self.prev_action = np.array([0, 0, 0])
        self.prev_obs = np.zeros((1, 128, 128), dtype=np.uint8)

    def reset(self, seed=None): 
        if seed is not None:
            self.seed=seed
        self.done = False
        self.reward = 0
        self.prev_action = np.array([0, 0, 0])
        self.prev_obs = np.zeros((1, 128, 128), dtype=np.uint8)
        return self.window.capture()
    
    def seed(self, seed=None):
        np.random.seed(seed)

    def step(self, action):
        # Send the action to the game
        print(action)
        send_action(action)
        self.prev_action = action
        obs = self.window.capture()  # Capture the current image
        telemetry = self.client.retrieve_data()  # Retrieve the telemetry data

        # Process telemetry data to compute the reward and determine if the episode is done
        checkpoint = telemetry['checkpoint']
        lap = telemetry['lap']
        speed = telemetry['speed']
        position = telemetry['position']
        steer = telemetry['steer']
        gas = telemetry['gas']
        brake = telemetry['brake']
        finished = telemetry['finished']
        acceleration = telemetry['acceleration']
        jerk = telemetry['jerk']
        aim_yaw = telemetry['aim_yaw']
        aim_pitch = telemetry['aim_pitch']
        fl_steer_angle = telemetry['fl_steer_angle']
        fr_steer_angle = telemetry['fr_steer_angle']
        fl_slip = telemetry['fl_slip']
        fr_slip = telemetry['fr_slip']
        gear = telemetry['gear']
    
        reward = speed * 0.1  # Reward is proportional to speed
        if finished:
            reward += 100  # Bonus reward for finishing the race
            self.done = True

        self.reward = reward
        done = self.done
        info = {
            'speed': speed,
            'position': position,
            'checkpoint': checkpoint,
            'lap': lap
        }
        return obs, reward, done, info
    
    def make_env():
        def _init():
            env = TrackmaniaEnv()
            return env
        return _init

    def close(self):
        del self.window
        self.client.close()  # Close the TMClient connection

In [None]:
# log wandb
# login 


'''
wandb.login(key = ''
class WandbCallback(BaseCallback):
    def __init__(self, verbose=0):
        super(WandbCallback, self).__init__(verbose)

    def _on_step(self) -> bool:
        # Log metrics to wandb
        wandb.log({
            'reward': self.locals['rewards'],
            'episode_length': self.locals['episode_lengths'],
            'episode_reward': self.locals['episode_rewards'],
        })
        return True
'''

SyntaxError: '(' was never closed (938139376.py, line 3)

In [7]:
# create environment
env = TrackmaniaEnv()

# Create the SAC model
model = SAC('CnnPolicy', env, verbose=1)

# Train the model with the WandbCallback
model.learn(total_timesteps=10000)

# Close the environment
env.close()

Expecting 76 bytes per message (19 floats)
Attempting to connect to localhost:9000 (Attempt 1/5)...
Connected successfully!

Waiting for 76 bytes...
Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Received chunk of 76 bytes
Current buffer size: 76 bytes

Processing message #1
Message size: 76 bytes
Remaining buffer: 0 bytes

Waiting for 76 bytes...
Received chunk of 76 bytes
Current buffer size: 76 bytes

Processing message #2
Message size: 76 bytes
Remaining buffer: 0 bytes

Waiting for 76 bytes...
Received chunk of 76 bytes
Current buffer size: 76 bytes

Processing message #3
Message size: 76 bytes
Remaining buffer: 0 bytes

Waiting for 76 bytes...
Received chunk of 76 bytes
Current buffer size: 76 bytes

Processing message #4
Message size: 76 bytes
Remaining buffer: 0 bytes

Waiting for 76 bytes...
Received chunk of 76 bytes
Current buffer size: 76 bytes

Processing message #5
Message size: 76 bytes
Remaining buffer: 0 bytes

Waiting fo

TypeError: TrackmaniaEnv.reset() got an unexpected keyword argument 'seed'

Received chunk of 76 bytes
Current buffer size: 76 bytes

Processing message #7
Message size: 76 bytes
Remaining buffer: 0 bytes

Waiting for 76 bytes...
Received chunk of 76 bytes
Current buffer size: 76 bytes

Processing message #8
Message size: 76 bytes
Remaining buffer: 0 bytes

Waiting for 76 bytes...
Received chunk of 76 bytes
Current buffer size: 76 bytes

Processing message #9
Message size: 76 bytes
Remaining buffer: 0 bytes

Waiting for 76 bytes...
Received chunk of 76 bytes
Current buffer size: 76 bytes

Processing message #10
Message size: 76 bytes
Remaining buffer: 0 bytes

Waiting for 76 bytes...
Received chunk of 76 bytes
Current buffer size: 76 bytes

Processing message #11
Message size: 76 bytes
Remaining buffer: 0 bytes

Waiting for 76 bytes...
Received chunk of 76 bytes
Current buffer size: 76 bytes

Processing message #12
Message size: 76 bytes
Remaining buffer: 0 bytes

Waiting for 76 bytes...
Received chunk of 76 bytes
Current buffer size: 76 bytes

Processing mess