# Similitud de vectores + LLM

In [None]:
# from google.colab import drive
# drive.mount('/content/drive')

Mounted at /content/drive


# Paso 1:  Caracterización física de circuitos
Cada circuito se describe mediante características estructurales como número de curvas, tipos de curvas, longitud total de rectas, etc. A partir de ellas generamos un vector representativo para comparar con otros circuitos.


In [2]:
import pandas as pd

# Definimos las características manualmente para cada circuito
# ⚠️ Debemos mantener todas las columnas homogéneas entre circuitos
circuit_vectors = pd.DataFrame({
    "barcelona": {
        "num_curves": 16,
        "num_straights": 4,
        "slow_corners": 6,
        "medium_corners": 7,
        "fast_corners": 3,
        "longest_straight_m": 1047,
        "total_length_km": 4.657
    },
    "bahrein": {
        "num_curves": 15,
        "num_straights": 4,
        "slow_corners": 6,
        "medium_corners": 5,
        "fast_corners": 4,
        "longest_straight_m": 1090,
        "total_length_km": 5.412
    },
    "monaco": {
        "num_curves": 19,
        "num_straights": 2,
        "slow_corners": 11,
        "medium_corners": 7,
        "fast_corners": 1,
        "longest_straight_m": 510,
        "total_length_km": 3.337
    },
    "monza": {
        "num_curves": 11,
        "num_straights": 6,
        "slow_corners": 2,
        "medium_corners": 3,
        "fast_corners": 6,
        "longest_straight_m": 1120,
        "total_length_km": 5.793
    }
}).T  # Transponemos para que cada fila sea un circuito

circuit_vectors

Unnamed: 0,num_curves,num_straights,slow_corners,medium_corners,fast_corners,longest_straight_m,total_length_km
barcelona,16.0,4.0,6.0,7.0,3.0,1047.0,4.657
bahrein,15.0,4.0,6.0,5.0,4.0,1090.0,5.412
monaco,19.0,2.0,11.0,7.0,1.0,510.0,3.337
monza,11.0,6.0,2.0,3.0,6.0,1120.0,5.793


Normalizamos los vectores para poder compararlos.

In [3]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
vectors_scaled = pd.DataFrame(
    scaler.fit_transform(circuit_vectors),
    index=circuit_vectors.index,
    columns=circuit_vectors.columns
)

vectors_scaled

Unnamed: 0,num_curves,num_straights,slow_corners,medium_corners,fast_corners,longest_straight_m,total_length_km
barcelona,0.262111,0.0,-0.078326,0.904534,-0.27735,0.419963,-0.152142
bahrein,-0.08737,0.0,-0.078326,-0.301511,0.27735,0.591539,0.65253
monaco,1.310556,-1.414214,1.488195,0.904534,-1.38675,-1.722744,-1.558984
monza,-1.485297,1.414214,-1.331543,-1.507557,1.38675,0.711243,1.058596


Comparación del circuito de Monza (elegido ya que es un circuito que no comparte muchas características con ninguno de los tres circuitos).

In [4]:
from sklearn.metrics.pairwise import cosine_similarity

# Separar Monza
monza_vec = vectors_scaled.loc[["monza"]]
others_vecs = vectors_scaled.drop("monza")

# Calcular similitud coseno
similarities = cosine_similarity(monza_vec, others_vecs)[0]

# Crear DataFrame de similitud
similarity_df = pd.DataFrame({
    "circuit": others_vecs.index,
    "cosine_similarity": similarities
}).sort_values(by="cosine_similarity", ascending=False)

similarity_df

Unnamed: 0,circuit,cosine_similarity
1,bahrein,0.649982
0,barcelona,-0.510264
2,monaco,-0.938214


Cargaremos los reglajes del circuito que tenga mayor similitud.

In [5]:
# Obtener el circuito más similar a Monza
most_similar_circuit = similarity_df.iloc[0]["circuit"]
print(f"El circuito más similar a Monza es: {most_similar_circuit}")

# Construir ruta del CSV correspondiente
kmeans_centroid_path = f"/content/drive/MyDrive/Telemetria-F1-24/{most_similar_circuit}_kmeans_centroids.csv"
dbscan_centroid_path = f"/content/drive/MyDrive/Telemetria-F1-24/{most_similar_circuit}_dbscan_centroids.csv"

# Cargar los centroides
ref_kmeans_centroids = pd.read_csv(kmeans_centroid_path)
ref_dbscan_centroids = pd.read_csv(dbscan_centroid_path)

El circuito más similar a Monza es: bahrein


In [6]:
ref_kmeans_centroids.head()

Unnamed: 0,cluster,m_frontWing,m_rearWing,m_onThrottle,m_offThrottle,m_frontCamber,m_rearCamber,m_frontToe,m_rearToe,m_frontSuspension,...,m_frontSuspensionHeight,m_rearSuspensionHeight,m_brakePressure,m_brakeBias,m_engineBraking,m_rearLeftTyrePressure,m_rearRightTyrePressure,m_frontLeftTyrePressure,m_frontRightTyrePressure,lapTime
0,0,34.0,30.0,40.0,50.0,-3.2,-1.8,0.09,0.08,23.0,...,25.0,55.0,97.0,54.0,30.0,21.0,21.0,23.4,23.4,89225.307692
1,5,38.0,25.0,55.0,60.0,-3.5,-2.2,0.25,0.25,41.0,...,20.0,55.0,100.0,55.0,100.0,20.5,20.5,22.5,22.5,89383.625
2,6,35.263158,32.105263,67.894737,65.0,-3.5,-2.2,0.25,0.25,39.421053,...,20.0,52.631579,96.052632,53.947368,100.0,26.5,26.5,29.5,29.5,89417.842105
3,2,42.514286,37.228571,64.857143,49.428571,-3.5,-2.2,0.022857,0.097143,38.8,...,23.485714,56.971429,100.0,54.914286,74.285714,26.5,26.5,28.471429,28.357143,89578.514286
4,1,39.636364,38.818182,63.636364,70.0,-3.377273,-2.2,0.25,0.25,41.0,...,20.0,55.0,100.0,55.0,100.0,23.0,23.0,26.0,26.0,89691.272727


In [7]:
ref_dbscan_centroids.head()

Unnamed: 0,cluster,m_frontWing,m_rearWing,m_onThrottle,m_offThrottle,m_frontCamber,m_rearCamber,m_frontToe,m_rearToe,m_frontSuspension,...,m_frontSuspensionHeight,m_rearSuspensionHeight,m_brakePressure,m_brakeBias,m_engineBraking,m_rearLeftTyrePressure,m_rearRightTyrePressure,m_frontLeftTyrePressure,m_frontRightTyrePressure,lapTime
0,0,41.3,36.8,69.5,54.0,-3.47,-2.2,0.135,0.16,40.0,...,21.1,56.1,99.2,54.2,92.0,24.16,24.16,26.59,26.54,88966.1


Filtramos por aquellos centroides donde se encuentran los setups más rápidos.

In [8]:
# Cargar el dataset del circuito más similar
df_similar_circuit = pd.read_csv(f"/content/drive/MyDrive/Telemetria-F1-24/{most_similar_circuit}_sanitized.csv")

# Filtrar filas con tiempo de vuelta válido
df_similar_circuit = df_similar_circuit[df_similar_circuit["lapTimeInMS"] > 0]

# Eliminar columnas irrelevantes
columns_to_drop = ['lapNumber', 'circuit', 'm_ballast', 'm_fuelLoad']
df_similar_circuit = df_similar_circuit.drop(columns=columns_to_drop, errors='ignore')

# Eliminar columnas completamente vacías
df_similar_circuit = df_similar_circuit.dropna(axis=1, how='all')

# Eliminar filas con valores nulos
df_similar_circuit = df_similar_circuit.dropna()

# Eliminar vueltas duplicadas (setup + tiempo idéntico)
df_similar_circuit = df_similar_circuit.drop_duplicates()

# Separar tiempos por vuelta y entradas
lap_times = df_similar_circuit["lapTimeInMS"]

In [9]:
# Filtrar setups rápidos por media
threshold = lap_times.mean()
fast_kmeans_centroids = ref_kmeans_centroids[ref_kmeans_centroids["lapTime"] < threshold].reset_index(drop=True)
fast_dbscan_centroids = ref_dbscan_centroids[ref_dbscan_centroids["lapTime"] < threshold].reset_index(drop=True)

print(f"Usando {len(fast_kmeans_centroids)} centroides rápidos como referencia para K-Means.")
fast_kmeans_centroids

Usando 4 centroides rápidos como referencia para K-Means.


Unnamed: 0,cluster,m_frontWing,m_rearWing,m_onThrottle,m_offThrottle,m_frontCamber,m_rearCamber,m_frontToe,m_rearToe,m_frontSuspension,...,m_frontSuspensionHeight,m_rearSuspensionHeight,m_brakePressure,m_brakeBias,m_engineBraking,m_rearLeftTyrePressure,m_rearRightTyrePressure,m_frontLeftTyrePressure,m_frontRightTyrePressure,lapTime
0,0,34.0,30.0,40.0,50.0,-3.2,-1.8,0.09,0.08,23.0,...,25.0,55.0,97.0,54.0,30.0,21.0,21.0,23.4,23.4,89225.307692
1,5,38.0,25.0,55.0,60.0,-3.5,-2.2,0.25,0.25,41.0,...,20.0,55.0,100.0,55.0,100.0,20.5,20.5,22.5,22.5,89383.625
2,6,35.263158,32.105263,67.894737,65.0,-3.5,-2.2,0.25,0.25,39.421053,...,20.0,52.631579,96.052632,53.947368,100.0,26.5,26.5,29.5,29.5,89417.842105
3,2,42.514286,37.228571,64.857143,49.428571,-3.5,-2.2,0.022857,0.097143,38.8,...,23.485714,56.971429,100.0,54.914286,74.285714,26.5,26.5,28.471429,28.357143,89578.514286


In [10]:
print(f"Usando {len(fast_dbscan_centroids)} centroides rápidos como referencia para DBSCAN.")
fast_dbscan_centroids

Usando 1 centroides rápidos como referencia para DBSCAN.


Unnamed: 0,cluster,m_frontWing,m_rearWing,m_onThrottle,m_offThrottle,m_frontCamber,m_rearCamber,m_frontToe,m_rearToe,m_frontSuspension,...,m_frontSuspensionHeight,m_rearSuspensionHeight,m_brakePressure,m_brakeBias,m_engineBraking,m_rearLeftTyrePressure,m_rearRightTyrePressure,m_frontLeftTyrePressure,m_frontRightTyrePressure,lapTime
0,0,41.3,36.8,69.5,54.0,-3.47,-2.2,0.135,0.16,40.0,...,21.1,56.1,99.2,54.2,92.0,24.16,24.16,26.59,26.54,88966.1


Redondeamos los setups de los clusters (hacia arriba a partir 0.5) si sus valores no son ninguno de los permitidos por el juego.

In [14]:
import numpy as np
import pandas as pd

# Tabla con las reglas oficiales del simulador
SETUP_RULES = {
    "m_frontWing":            {"min": 0,    "max": 50,   "step": 1},
    "m_rearWing":             {"min": 0,    "max": 50,   "step": 1},
    "m_onThrottle":           {"min": 10,   "max": 100,  "step": 5},
    "m_offThrottle":          {"min": 10,   "max": 100,  "step": 5},
    "m_frontCamber":          {"min": -3.5, "max": -2.5, "step": 0.1},
    "m_rearCamber":           {"min": -2.2, "max": -0.7, "step": 0.1},
    "m_frontToe":             {"min": 0.00, "max": 0.50, "step": 0.01},
    "m_rearToe":              {"min": 0.00, "max": 0.50, "step": 0.01},
    "m_frontSuspension":      {"min": 1,    "max": 41,   "step": 1},
    "m_rearSuspension":       {"min": 1,    "max": 41,   "step": 1},
    "m_frontAntiRollBar":     {"min": 1,    "max": 21,   "step": 1},
    "m_rearAntiRollBar":      {"min": 1,    "max": 21,   "step": 1},
    "m_frontSuspensionHeight":{"min": 10,   "max": 40,   "step": 1},
    "m_rearSuspensionHeight": {"min": 40,   "max": 100,  "step": 1},
    "m_brakePressure":        {"min": 80,   "max": 100,  "step": 1},
    "m_brakeBias":            {"min": 50,   "max": 70,   "step": 1},
    "m_engineBraking":        {"min": 0,    "max": 100,  "step": 10},
    "m_rearLeftTyrePressure": {"min": 20.5, "max": 26.5, "step": 0.1},
    "m_rearRightTyrePressure":{"min": 20.5, "max": 26.5, "step": 0.1},
    "m_frontLeftTyrePressure":{"min": 22.5, "max": 29.5, "step": 0.1},
    "m_frontRightTyrePressure":{"min": 22.5,"max": 29.5, "step": 0.1},
}

def _quantize(value, low, high, step):
    """Recorta y redondea un valor al múltiplo de `step` más cercano dentro [low, high]."""
    value = np.clip(value, low, high)
    q = round((value - low) / step) * step + low
    # Ajuste de precisión: nº de decimales según el paso
    decimals = len(str(step).split(".")[1]) if isinstance(step, float) and "." in str(step) else 0
    return round(q, decimals)

def enforce_setup_rules(df: pd.DataFrame, rules=SETUP_RULES) -> pd.DataFrame:
    """Devuelve una copia de `df` con todos los parámetros cuantizados y válidos."""
    df_valid = df.copy()
    for col, spec in rules.items():
        if col in df_valid.columns:
            df_valid[col] = df_valid[col].apply(
                lambda x: _quantize(x, spec["min"], spec["max"], spec["step"])
            )
    return df_valid

# Forzamos los valores válidos
fast_valid_kmeans_centroids  = enforce_setup_rules(fast_kmeans_centroids)
fast_valid_dbscan_centroids  = enforce_setup_rules(fast_dbscan_centroids)

# Mostramos los centroides
display(fast_valid_kmeans_centroids.head())
display(fast_valid_dbscan_centroids.head())

Unnamed: 0,cluster,m_frontWing,m_rearWing,m_onThrottle,m_offThrottle,m_frontCamber,m_rearCamber,m_frontToe,m_rearToe,m_frontSuspension,...,m_frontSuspensionHeight,m_rearSuspensionHeight,m_brakePressure,m_brakeBias,m_engineBraking,m_rearLeftTyrePressure,m_rearRightTyrePressure,m_frontLeftTyrePressure,m_frontRightTyrePressure,lapTime
0,0,34,30,40,50,-3.2,-1.8,0.09,0.08,23,...,25,55,97,54,30,21.0,21.0,23.4,23.4,89225.307692
1,5,38,25,55,60,-3.5,-2.2,0.25,0.25,41,...,20,55,100,55,100,20.5,20.5,22.5,22.5,89383.625
2,6,35,32,70,65,-3.5,-2.2,0.25,0.25,39,...,20,53,96,54,100,26.5,26.5,29.5,29.5,89417.842105
3,2,43,37,65,50,-3.5,-2.2,0.02,0.1,39,...,23,57,100,55,70,26.5,26.5,28.5,28.4,89578.514286


Unnamed: 0,cluster,m_frontWing,m_rearWing,m_onThrottle,m_offThrottle,m_frontCamber,m_rearCamber,m_frontToe,m_rearToe,m_frontSuspension,...,m_frontSuspensionHeight,m_rearSuspensionHeight,m_brakePressure,m_brakeBias,m_engineBraking,m_rearLeftTyrePressure,m_rearRightTyrePressure,m_frontLeftTyrePressure,m_frontRightTyrePressure,lapTime
0,0,41,37,70,55,-3.5,-2.2,0.14,0.16,40,...,21,56,99,54,90,24.2,24.2,26.6,26.5,88966.1


Elegimos el cluster más rápido, tanto por K-Means y DBSCAN

In [16]:
fastest_kmeans_centroid = fast_valid_kmeans_centroids.loc[fast_valid_kmeans_centroids["lapTime"].idxmin()]
fastest_kmeans_centroid = fastest_kmeans_centroid.drop(["cluster", "lapTime"])
print("Cluster más rápido con K-Means:")
print(fastest_kmeans_centroid)

fastest_dbscan_centroid = fast_valid_dbscan_centroids.loc[fast_valid_dbscan_centroids["lapTime"].idxmin()]
fastest_dbscan_centroid = fastest_dbscan_centroid.drop(["cluster", "lapTime"])
print("\nCluster más rápido con DBSCAN:")
print(fastest_dbscan_centroid)

Cluster más rápido con K-Means:
m_frontWing                 34.00
m_rearWing                  30.00
m_onThrottle                40.00
m_offThrottle               50.00
m_frontCamber               -3.20
m_rearCamber                -1.80
m_frontToe                   0.09
m_rearToe                    0.08
m_frontSuspension           23.00
m_rearSuspension            17.00
m_frontAntiRollBar           7.00
m_rearAntiRollBar            5.00
m_frontSuspensionHeight     25.00
m_rearSuspensionHeight      55.00
m_brakePressure             97.00
m_brakeBias                 54.00
m_engineBraking             30.00
m_rearLeftTyrePressure      21.00
m_rearRightTyrePressure     21.00
m_frontLeftTyrePressure     23.40
m_frontRightTyrePressure    23.40
Name: 0, dtype: float64

Cluster más rápido con DBSCAN:
m_frontWing                 41.00
m_rearWing                  37.00
m_onThrottle                70.00
m_offThrottle               55.00
m_frontCamber               -3.50
m_rearCamber                