In [None]:
from xgboost import XGBRegressor
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_squared_error
import time

# Start overall timer
start_total_time = time.time()

print("Starting XGBRegressor Process with GPU...")

# Load datasets
print("1. Loading datasets...")
start_load_time = time.time()
train = pd.read_csv('/kaggle/input/datasetnew/train.csv')
test = pd.read_csv('/kaggle/input/datasetnew/test.csv')
print(f"   Datasets loaded. Train shape: {train.shape}, Test shape: {test.shape}")
print(f"   Loading time: {time.time() - start_load_time:.2f} seconds")

# Separate features and target
print("\n2. Preparing features and target...")
X_train = train.drop(columns=['price_doc'])
y_train = train['price_doc']
X_test = test

# Identify numeric columns
numeric_cols = X_train.select_dtypes(include=['int64', 'float64']).columns
print(f"   Numeric columns: {list(numeric_cols)}")

# Create preprocessing pipeline
print("\n3. Creating preprocessing pipeline...")
preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numeric_cols)
    ])

# Configure GPU-accelerated XGBRegressor pipeline
print("\n4. Configuring GPU-accelerated XGBRegressor pipeline...")
xgb_pipeline = Pipeline([
    ('preprocessor', preprocessor),
    ('regressor', XGBRegressor(
        n_estimators=1200,
        learning_rate=0.0041,
        max_depth=18,
        subsample=0.77,
        colsample_bytree=0.77,
        reg_alpha=0.1,
        reg_lambda=1.0,
        gamma=0,
        eval_metric='rmse',
        tree_method='gpu_hist',  # GPU acceleration
        predictor='gpu_predictor'  # Use GPU for predictions
    ))
])

# Fit the model
print("\n5. Training XGBRegressor model...")
start_train_time = time.time()
xgb_pipeline.fit(X_train, y_train)
print(f"   Model training completed. Training time: {time.time() - start_train_time:.2f} seconds")

# Predict on test set
print("\n6. Making predictions...")
start_predict_time = time.time()
y_pred = xgb_pipeline.predict(X_test)
print(f"   Predictions completed. Prediction time: {time.time() - start_predict_time:.2f} seconds")
print(f"   Sample predictions: {y_pred[:5]}")

# Create submission file
print("\n7. Creating submission file...")
submission = pd.DataFrame({
    'row ID': test['row ID'], 
    'price_doc': y_pred
})
submission.to_csv('/kaggle/working/xgbregressor_submission11.csv', index=False)
print("   Submission file 'xgbregressor_submission.csv' created successfully")

# Calculate and print RMSE on training data
print("\n8. Calculating Training RMSE...")
train_pred = xgb_pipeline.predict(X_train)
rmse = np.sqrt(mean_squared_error(y_train, train_pred))
print(f"   Training RMSE: {rmse:.10f}")

# Print total execution time
total_time = time.time() - start_total_time
print(f"\nTotal Execution Time: {total_time:.2f} seconds")
