In [1]:
import gym
from gym import spaces
import numpy as np

class RaceEnv(gym.Env):
    def __init__(self, data):
        super(RaceEnv, self).__init__()
        
        self.data = data
        self.lap_number = 0
        self.tyre_age = 0
        self.tyre_compound = 0  # Encode tyre compound as integer (e.g., Soft = 0, Medium = 1, Hard = 2)
        self.position = 1  # Start in P1 for simplicity
        
        # Define action space: push, maintain, pit
        self.action_space = spaces.Discrete(3)  # {0: Maintain, 1: Push, 2: Pit}
        
        # Define state space:
        self.observation_space = spaces.Box(
            low=np.array([0, 0, 0, 0, 0]), 
            high=np.array([70, 50, 3, 1000, 20]), 
            dtype=np.float32
        )

    def reset(self):
        self.lap_number = 0
        self.tyre_age = 0
        self.tyre_compound = 0
        self.position = 1
        
        return np.array([
            self.lap_number, 
            self.tyre_age, 
            self.tyre_compound, 
            0,  # Starting lap time delta
            self.position
        ], dtype=np.float32)

    def step(self, action):
        reward = 0
        
        if action == 0:  # Maintain
            lap_time = self._simulate_lap(self.tyre_compound, self.tyre_age, push=False)
            reward = -lap_time
            
        elif action == 1:  # Push
            lap_time = self._simulate_lap(self.tyre_compound, self.tyre_age, push=True)
            reward = -lap_time  # Push = faster lap but higher degradation
            self.tyre_age += 2
        
        elif action == 2:  # Pit
            self.tyre_age = 0
            self.tyre_compound = (self.tyre_compound + 1) % 3  # Cycle through tyre types
            reward = -20  # Penalize for time lost in the pit

        self.tyre_age += 1
        self.lap_number += 1

        # End race after 70 laps
        done = self.lap_number >= 70

        state = np.array([
            self.lap_number, 
            self.tyre_age, 
            self.tyre_compound, 
            lap_time, 
            self.position
        ], dtype=np.float32)

        return state, reward, done, {}

    def _simulate_lap(self, compound, tyre_age, push):
        # Example lap time model based on your degradation data
        base_time = 90  # Example base lap time in seconds

        degradation_rate = {
            0: 0.15,  # Soft
            1: 0.10,  # Medium
            2: 0.07   # Hard
        }[compound]

        time_loss = degradation_rate * tyre_age
        
        if push:
            time_loss += 0.1  # Pushing increases degradation

        lap_time = base_time + time_loss
        
        return lap_time
