In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/burnout-datathon-ieeecsmuj/sample_submission.csv
/kaggle/input/burnout-datathon-ieeecsmuj/val.csv
/kaggle/input/burnout-datathon-ieeecsmuj/train.csv
/kaggle/input/burnout-datathon-ieeecsmuj/test.csv


In [2]:
pip install --upgrade pip

Collecting pip
  Downloading pip-25.1.1-py3-none-any.whl (1.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.8/1.8 MB[0m [31m19.8 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hInstalling collected packages: pip
  Attempting uninstall: pip
    Found existing installation: pip 23.0.1
    Uninstalling pip-23.0.1:
      Successfully uninstalled pip-23.0.1
Successfully installed pip-25.1.1
[0mNote: you may need to restart the kernel to use updated packages.


In [3]:
%pip install pandas numpy scikit-learn xgboost

Collecting xgboost
  Downloading xgboost-3.0.2-py3-none-manylinux_2_28_x86_64.whl.metadata (2.1 kB)
Downloading xgboost-3.0.2-py3-none-manylinux_2_28_x86_64.whl (253.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m253.9/253.9 MB[0m [31m43.2 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hInstalling collected packages: xgboost
Successfully installed xgboost-3.0.2
[0mNote: you may need to restart the kernel to use updated packages.


In [4]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from xgboost import XGBRegressor
from sklearn.metrics import mean_squared_error

# Load data
train = pd.read_csv('/kaggle/input/burnout-datathon-ieeecsmuj/train.csv')
test = pd.read_csv('/kaggle/input/burnout-datathon-ieeecsmuj/test.csv')
sample = pd.read_csv('/kaggle/input/burnout-datathon-ieeecsmuj/sample_submission.csv')

print("Train Shape :", train.shape)
print("Test Shape :", test.shape)

TARGET = 'Lap_Time_Seconds'

DROP_COLS = [
    'Unique ID', 'rider_name', 'team_name', 'bike_name',
    'circuit_name', 'points', 'position'
]

def add_features(df):
    """Add engineered features to the dataframe"""
    df['LapTime_Estimate'] = df['Circuit_Length_km'] / df['Avg_Speed_kmh'] * 3600
    df['Points_per_Year'] = df['Championship_Points'] / (df['years_active'] + 1)
    df['Finish_Rate'] = df['finishes'] / (df['starts'] + 1)
    df['Podium_Rate'] = df['podiums'] / (df['starts'] + 1)
    df['Win_Rate'] = df['wins'] / (df['starts'] + 1)
    df['Avg_Temp'] = (df['Ambient_Temperature_Celsius'] + df['Track_Temperature_Celsius']) / 2
    
    # Advanced features
    df['Experience_Factor'] = df['years_active'] * df['Championship_Points']
    df['Performance_Ratio'] = df['wins'] / (df['Championship_Points'] + 1)
    df['Speed_Efficiency'] = df['Avg_Speed_kmh'] / df['Circuit_Length_km']
    df['Tire_Speed_Interaction'] = df['Tire_Degradation_Factor_per_Lap'] * df['Avg_Speed_kmh']
    df['Temp_Differential'] = df['Track_Temperature_Celsius'] - df['Ambient_Temperature_Celsius']
    df['Circuit_Difficulty'] = df['Corners_per_Lap'] * df['Circuit_Length_km']
    df['Rider_Consistency'] = df['finishes'] / (df['starts'] + 1) * df['years_active']
    df['Power_to_Weight'] = df['Avg_Speed_kmh'] / (df['Circuit_Length_km'] + 1)
    
    return df

# Add initial features
train = add_features(train)
test = add_features(test)

# Store test IDs before processing
test_ids = test['Unique ID'].copy()

# Prepare features and target
X = train.drop(DROP_COLS + [TARGET], axis=1)
X_test = test.drop(DROP_COLS, axis=1)
y = train[TARGET]

# Handle missing values and categorical encoding
all_data = pd.concat([X, X_test], axis=0)
all_data.fillna(-1, inplace=True)

# Encode categorical variables
cat_cols = all_data.select_dtypes(include='object').columns
for col in cat_cols:
    all_data[col] = all_data[col].astype('category').cat.codes

# Split back to train and test
X = all_data.iloc[:len(train)].copy()
X_test = all_data.iloc[len(train):].copy()

# Train-validation split
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Add more engineered features
print("Adding additional engineered features...")

# Speed degradation interaction
X_train['Speed_Degradation'] = X_train['Avg_Speed_kmh'] * X_train['Tire_Degradation_Factor_per_Lap']
X_val['Speed_Degradation'] = X_val['Avg_Speed_kmh'] * X_val['Tire_Degradation_Factor_per_Lap']
X_test['Speed_Degradation'] = X_test['Avg_Speed_kmh'] * X_test['Tire_Degradation_Factor_per_Lap']

# Temperature-condition interaction
X_train['Temp_Condition'] = X_train['Track_Temperature_Celsius'] * X_train['Track_Condition']
X_val['Temp_Condition'] = X_val['Track_Temperature_Celsius'] * X_val['Track_Condition']
X_test['Temp_Condition'] = X_test['Track_Temperature_Celsius'] * X_test['Track_Condition']

# Circuit complexity ratio
X_train['Corners_per_Km'] = X_train['Corners_per_Lap'] / (X_train['Circuit_Length_km'] + 1e-3)
X_val['Corners_per_Km'] = X_val['Corners_per_Lap'] / (X_val['Circuit_Length_km'] + 1e-3)
X_test['Corners_per_Km'] = X_test['Corners_per_Lap'] / (X_test['Circuit_Length_km'] + 1e-3)

# More advanced interaction features
X_train['Speed_Temp_Interaction'] = X_train['Avg_Speed_kmh'] * X_train['Avg_Temp']
X_val['Speed_Temp_Interaction'] = X_val['Avg_Speed_kmh'] * X_val['Avg_Temp']
X_test['Speed_Temp_Interaction'] = X_test['Avg_Speed_kmh'] * X_test['Avg_Temp']

X_train['Experience_Speed'] = X_train['years_active'] * X_train['Avg_Speed_kmh']
X_val['Experience_Speed'] = X_val['years_active'] * X_val['Avg_Speed_kmh']
X_test['Experience_Speed'] = X_test['years_active'] * X_test['Avg_Speed_kmh']

# Enhanced XGBoost model with optimized hyperparameters for lower RMSE
print("\nTraining XGBoost model with optimized parameters...")
xgb_model = XGBRegressor(
    n_estimators=7000,
    learning_rate=0.006,
    max_depth=20,
    subsample=0.8,
    colsample_bytree=0.85,
    colsample_bylevel=0.85,
    colsample_bynode=0.85,
    reg_alpha=4.0,
    reg_lambda=5.0,
    min_child_weight=1,
    gamma=0.1,
    max_delta_step=0,
    tree_method='hist',
    grow_policy='depthwise',
    max_leaves=0,
    random_state=42,
    verbosity=0,
    n_jobs=-1  
)

print("Training XGBoost model...")
xgb_model.fit(X_train, y_train)

# Evaluate model performance
xgb_val_preds = xgb_model.predict(X_val)
xgb_rmse = np.sqrt(mean_squared_error(y_val, xgb_val_preds))
xgb_accuracy = max(0, (1 - xgb_rmse) * 100)

print(f"\nMODEL PERFORMANCE:")
print(f"XGBoost RMSE Score: {xgb_rmse:.8f}")
print(f"XGBoost Accuracy: {xgb_accuracy:.6f}%")

# Generate test predictions
print("\nGenerating test predictions...")
xgb_test_preds = xgb_model.predict(X_test)

# Create Kaggle submission file
submission = pd.DataFrame({
    'Unique ID': test_ids,
    'Lap_Time_Seconds': xgb_test_preds
})

# Save the submission file for Kaggle
submission.to_csv('submission.csv', index=False)

# Performance summary
print("\n" + "="*60)
print("FINAL MODEL PERFORMANCE SUMMARY")
print("="*60)
print(f"MODEL: XGBoost (Optimized)")
print(f"RMSE SCORE: {xgb_rmse:.8f}")
print(f"ACCURACY: {xgb_accuracy:.6f}%")
print("="*60)
print(f"✅ KAGGLE SUBMISSION FILE 'submission.csv' CREATED!")
print(f"Total predictions: {len(submission)}")
print(f"Prediction range: {xgb_test_preds.min():.6f} - {xgb_test_preds.max():.6f}")
print("="*60)

Train Shape : (1914056, 45)
Test Shape : (546874, 44)
Adding additional engineered features...

Training XGBoost model with optimized parameters...
Training XGBoost model...

MODEL PERFORMANCE:
XGBoost RMSE Score: 0.13247135
XGBoost Accuracy: 86.752865%

Generating test predictions...

FINAL MODEL PERFORMANCE SUMMARY
MODEL: XGBoost (Optimized)
RMSE SCORE: 0.13247135
ACCURACY: 86.752865%
✅ KAGGLE SUBMISSION FILE 'submission.csv' CREATED!
Total predictions: 546874
Prediction range: 69.915802 - 110.033325
