# Model Training and Optimization
## Objectives
* Train a Gradient Boosting Regressor to predict car selling prices.
* Perform hyperparameter optimization.
* Evaluate model performance with R² score and Actual vs Predicted plots.

## Outputs
* Trained model saved as 'gradient_boosting.pkl'.
* Performance metrics and plots.

## Additional Comments
* Use GridSearchCV for hyperparameter tuning with at least 6 parameters.

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import r2_score
import matplotlib.pyplot as plt
import joblib

# Load processed dataset
df = pd.read_csv('../../datasets/processed_car_data.csv')

# Define features and target
X = df.drop(['Selling_Price(lacs)', 'Car_Name'], axis=1)
y = df['Selling_Price(lacs)']

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define model and hyperparameter grid
model = GradientBoostingRegressor(random_state=42)
param_grid = {
    'n_estimators': [100, 200, 300],
    'learning_rate': [0.01, 0.1, 0.2],
    'max_depth': [3, 5, 7],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
    'subsample': [0.8, 0.9, 1.0]
}

# Perform GridSearchCV
grid_search = GridSearchCV(model, param_grid, cv=5, scoring='r2', n_jobs=-1)
grid_search.fit(X_train, y_train)

# Best model
best_model = grid_search.best_estimator_

# Predictions
y_train_pred = best_model.predict(X_train)
y_test_pred = best_model.predict(X_test)

# Evaluate
train_r2 = r2_score(y_train, y_train_pred)
test_r2 = r2_score(y_test, y_test_pred)
print(f'Train R²: {train_r2:.2f}')
print(f'Test R²: {test_r2:.2f}')

# Plot Actual vs Predicted
plt.figure(figsize=(8, 6))
plt.scatter(y_train, y_train_pred, label='Train', alpha=0.5)
plt.scatter(y_test, y_test_pred, label='Test', alpha=0.5)
plt.plot([y.min(), y.max()], [y.min(), y.max()], 'r--')
plt.xlabel('Actual Selling Price (lacs)')
plt.ylabel('Predicted Selling Price (lacs)')
plt.title('Actual vs Predicted Selling Price')
plt.legend()
plt.savefig('../../outputs/actual_vs_predicted.png')
plt.close()

# Save model
joblib.dump(best_model, '../../outputs/gradient_boosting.pkl')