# Analyzing Motorsports Data from the Ergast API

## Loading the data with the fastf1 library


In [None]:
import fastf1

session = fastf1.get_session(2023, "Montreal", "Race")

session.load(telemetry=True, laps=True)

laps = session.laps

We want to analyze the data on a per-lap basis. The fastf1 library provides an API that does the necessary slicing and interpolation. We use this API to extract the sequences for Speed, RPM etc. per lap.

In [None]:
import numpy as np
import pandas as pd
from tqdm import tqdm


def extract_telemetry(laps, columns):
    df_telemetry = pd.DataFrame(columns=columns)
    row_dict = {}

    for index, lap in tqdm(laps.iterlaps(), total=laps.shape[0]):
        telemetry = lap.get_telemetry()
        for column in columns:
            row_dict[column] = [
                telemetry["Distance"].tolist(),
                telemetry[column].tolist(),
            ]
        df_telemetry.loc[index] = row_dict

    return df_telemetry


columns = [
    "DistanceToDriverAhead",
    "RPM",
    "Speed",
    "nGear",
    "Throttle",
    "Brake",
    "DRS",
    "X",
    "Y",
    "Z",
]
df_telemetry = extract_telemetry(laps, columns)

> We save the telemetry data as a Python list of list. This format is compatible with PyArrows. This means we can save the dataset as .parquet or we can convert it to a Hugging Face dataset. A 2D Numpy array is not supported by PyArrows.


## Extracting normalized vectors and track visualizations

We normalize the telemetry data to the track distance. In this way, we can use them as feature vectors for the dimensionality reduction in Spotlight.

In [None]:
import numpy as np

dist_index = np.array(list(range(-10, 4400, 5)))


def extract_embeddings(laps, columns):
    column_names = []
    for column in columns:
        column_names.append(column + "_emb")

    df_embedding = pd.DataFrame(columns=column_names)
    row_dict = {}

    for index, lap in tqdm(laps.iterlaps(), total=laps.shape[0]):
        telemetry = lap.get_telemetry()
        for column in columns:
            column_name = column + "_emb"
            row_dict[column_name] = np.interp(
                x=dist_index,
                xp=telemetry["Distance"].to_numpy(),
                fp=telemetry[column].to_numpy(),
            ).tolist()
        df_embedding.loc[index] = row_dict

    return df_embedding


columns = ["RPM", "Speed", "nGear", "Throttle", "Brake", "X", "Y", "Z"]

df_embedding = extract_embeddings(laps, columns)

Next we build some visualizations for the speed and the gear shift and save them as images.

In [None]:
import numpy as np

import matplotlib.pyplot as plt
from matplotlib.collections import LineCollection
from matplotlib import cm
import numpy as np
import matplotlib as mpl

from os import path


# function to print the gear and speed map


def create_speed_image(lapnumber, tel):
    filename = "imgs/speed/speed_vis_" + str(lapnumber) + ".png"

    if path.isfile(filename):
        return filename

    colormap = mpl.cm.plasma
    # Get telemetry data
    x = np.array(tel["X"].values)
    y = np.array(tel["Y"].values)
    color = tel["Speed"]  # value to base color gradient on

    points = np.array([x, y]).T.reshape(-1, 1, 2)
    segments = np.concatenate([points[:-1], points[1:]], axis=1)

    # We create a plot with title and adjust some setting to make it look good.
    fig, ax = plt.subplots(sharex=True, sharey=True, figsize=(12, 6.75))

    # Adjust margins and turn of axis
    plt.subplots_adjust(left=0.1, right=0.9, top=0.9, bottom=0.12)
    ax.axis("off")

    # After this, we plot the data itself.
    # Create background track line
    ax.plot(tel["X"], tel["Y"], color="black", linestyle="-", linewidth=16, zorder=0)

    # Create a continuous norm to map from data points to colors
    norm = plt.Normalize(color.min(), color.max())
    lc = LineCollection(segments, cmap=colormap, norm=norm, linestyle="-", linewidth=5)

    # Set the values used for colormapping
    lc.set_array(color)

    # Merge all line segments together
    line = ax.add_collection(lc)

    # Finally, we create a color bar as a legend.
    cbaxes = fig.add_axes([0.25, 0.05, 0.5, 0.05])
    normlegend = mpl.colors.Normalize(vmin=color.min(), vmax=color.max())
    legend = mpl.colorbar.ColorbarBase(
        cbaxes, norm=normlegend, cmap=colormap, orientation="horizontal"
    )

    plt.savefig(filename, format="png")

    plt.clf()

    plt.close("all")

    return filename


def create_gear_image(lapnumber, tel):
    filename = "imgs/gears/gear_shift_vis_" + str(lapnumber) + ".png"

    if path.isfile(filename):
        return filename

    x = np.array(tel["X"].values)
    y = np.array(tel["Y"].values)

    points = np.array([x, y]).T.reshape(-1, 1, 2)
    segments = np.concatenate([points[:-1], points[1:]], axis=1)
    gear = tel["nGear"].to_numpy().astype(float)

    cmap = cm.get_cmap("Paired")
    lc_comp = LineCollection(segments, norm=plt.Normalize(1, cmap.N + 1), cmap=cmap)
    lc_comp.set_array(gear)
    lc_comp.set_linewidth(4)

    plt.gca().add_collection(lc_comp)
    plt.axis("equal")
    plt.tick_params(labelleft=False, left=False, labelbottom=False, bottom=False)

    cbar = plt.colorbar(mappable=lc_comp, label="Gear", boundaries=np.arange(1, 10))
    cbar.set_ticks(np.arange(1.5, 9.5))
    cbar.set_ticklabels(np.arange(1, 9))

    filename = "imgs/gears/gear_shift_vis_" + str(lapnumber) + ".png"
    plt.savefig(filename, format="png")

    plt.clf()

    plt.close("all")

    return filename


def extract_images(laps):
    df_images = pd.DataFrame(columns=["gear_vis", "speed_vis"])
    row_dict = {}

    for index, lap in tqdm(laps.iterlaps(), total=laps.shape[0]):
        telemetry = lap.get_telemetry()
        row_dict["gear_vis"] = create_gear_image(index, telemetry)
        row_dict["speed_vis"] = create_speed_image(index, telemetry)
        df_images.loc[index] = row_dict

    return df_images


df_images = extract_images(laps)

## Visualize with Spotlight

In [None]:
# concat the dataframes

df_metadata = pd.DataFrame(laps)
df = pd.concat([df_metadata, df_telemetry, df_images, df_embedding], axis=1)

In [None]:
from renumics import spotlight
from renumics.spotlight import dtypes

# dtypes = {"DistanceToDriverAhead": spotlight.Sequence1D, "RPM": spotlight.Sequence1D, "Speed": spotlight.Sequence1D, "nGear": spotlight.Sequence1D,
#    "Throttle": spotlight.Sequence1D, "Brake": spotlight.Sequence1D, "DRS": spotlight.Sequence1D, "X": spotlight.Sequence1D, "Y": spotlight.Sequence1D, "Z": spotlight.Sequence1D}

spotlight.show(df)

## Save as Hugging Face dataset

In [None]:
import datasets

ds = datasets.Dataset.from_pandas(df)

ds.save_to_disk("telemetry_test")

In [None]:
dtypes = {
    "DistanceToDriverAhead": spotlight.Sequence1D,
    "RPM": spotlight.Sequence1D,
    "Speed": spotlight.Sequence1D,
    "nGear": spotlight.Sequence1D,
    "Throttle": spotlight.Sequence1D,
    "Brake": spotlight.Sequence1D,
    "DRS": spotlight.Sequence1D,
    "X": spotlight.Sequence1D,
    "Y": spotlight.Sequence1D,
    "Z": spotlight.Sequence1D,
}

spotlight.show(df, dtype=dtypes)