**# SETUP**

In [None]:
!pip install -q category_encoders
import pandas as pd
import numpy as np
from lightgbm import LGBMRegressor
from lightgbm import log_evaluation
from category_encoders import TargetEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

**# LOAD DATA**

In [None]:
df = pd.read_csv("/kaggle/input/dataset/train.csv")
test_df = pd.read_csv("/kaggle/input/dataset/test.csv")

**# OUTLIER REMOVAL**

In [None]:
q1 = df['Lap_Time_Seconds'].quantile(0.005)
q3 = df['Lap_Time_Seconds'].quantile(0.995)
df = df[df['Lap_Time_Seconds'].between(q1, q3)]

**# TARGET VARIABLE**

In [None]:
y = np.log1p(df['Lap_Time_Seconds'])
X = df.drop(['Lap_Time_Seconds', 'Unique ID'], axis=1, errors='ignore')
X_test = test_df.drop(['Unique ID'], axis=1, errors='ignore')

**# FEATURE ENGINEERING**

In [None]:
def feature_engineer(df):
    if 'Avg_Speed_kmh' in df and 'Circuit_Length_km' in df:
        df['Speed_per_km'] = df['Avg_Speed_kmh'] / (df['Circuit_Length_km'] + 1e-5)
    if 'Track_Temperature_Celsius' in df and 'Ambient_Temperature_Celsius' in df:
        df['Temp_Diff'] = df['Track_Temperature_Celsius'] - df['Ambient_Temperature_Celsius']
    if 'Tire_Degradation_Factor_per_Lap' in df and 'Laps' in df:
        df['Degradation_Impact'] = df['Tire_Degradation_Factor_per_Lap'] * df['Laps']
    if 'Ambient_Temperature_Celsius' in df:
        df['Air_Density'] = 101325 / (287.05 * (df['Ambient_Temperature_Celsius'] + 273.15))
    if 'Pitstop_Count' in df and 'Laps' in df:
        df['Pitstop_Per_Lap'] = df['Pitstop_Count'] / (df['Laps'] + 1e-3)
    if 'Circuit_Length_km' in df and 'Laps' in df:
        df['Total_Distance'] = df['Circuit_Length_km'] * df['Laps']
    return df

X = feature_engineer(X)
X_test = feature_engineer(X_test)

**# CATEGORICAL ENCODING**

In [None]:
cat_cols = X.select_dtypes(include='object').columns
encoder = TargetEncoder()
X[cat_cols] = encoder.fit_transform(X[cat_cols], y)
X_test[cat_cols] = encoder.transform(X_test[cat_cols])

**# CONVERT TO float32 FOR PERFORMANCE**

In [None]:
X = X.astype(np.float32)
X_test = X_test.astype(np.float32)

**# TRAIN/VALIDATION SPLIT**

In [None]:
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.1, random_state=42)

**# MODEL**

In [None]:
model = LGBMRegressor(
    objective='regression',
    boosting_type='gbdt',
    learning_rate=0.01,
    num_leaves=256,
    max_depth=12,
    feature_fraction=0.85,
    bagging_fraction=0.9,
    bagging_freq=3,
    min_child_samples=20,
    reg_alpha=2.0,
    reg_lambda=2.0,
    n_estimators=20000,
    subsample_for_bin=50000,
    random_state=42,
    n_jobs=-1
)

**# TRAIN, EVALUATE,  PREDICT and EXPORT**

In [2]:
model.fit(
    X_train, y_train,
    eval_set=[(X_val, y_val)],
    eval_metric='rmse',
    callbacks=[log_evaluation(period=100)]
)

val_preds = np.expm1(model.predict(X_val))
rmse = np.sqrt(mean_squared_error(np.expm1(y_val), val_preds))
print(f"\n✅ Final Validation RMSE: {rmse:.5f}")

test_preds = np.expm1(model.predict(X_test))

submission = pd.DataFrame({
    'Unique ID': test_df['Unique ID'],
    'Lap_Time_Seconds': test_preds
})
submission.to_csv("submission_final.csv", index=False)
print("✅ submission_final.csv saved!")

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.540006 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 5485
[LightGBM] [Info] Number of data points in the train set: 1705653, number of used features: 48
[LightGBM] [Info] Start training from score 4.502922
[100]	valid_0's rmse: 0.120205	valid_0's l2: 0.0144492
[200]	valid_0's rmse: 0.116396	valid_0's l2: 0.0135481
[300]	valid_0's rmse: 0.113444	valid_0's l2: 0.0128697
[400]	valid_0's rmse: 0.110907	valid_0's l2: 0.0123003
[500]	valid_0's rmse: 0.108663	valid_0's l2: 0.0118077
[600]	valid_0's rmse: 0.106444	valid_0's l2: 0.0113303
[700]	valid_0's rmse: 0.104085	valid_0's l2: 0.0108337
[800]	valid_0's rmse: 0.101817	valid_0's l2: 0.0103667
[900]	valid_0's rmse: 0.0997027	valid_0's l2: 0.00994062
[1000]	valid_0's rmse: 0.0975326	valid_0's l2: 0.00951261
[1100]	valid_0's rmse: 0.0953965	valid_0's l2: 0.00910048
[1200]	valid_0's rmse: 0.0933131	valid_0's 