## Model Training

##### The model is built using keras.Sequential() with multiple dense layers and dropout regularization to prevent overfitting. The output layer uses a linear activation function for regression. The model is compiled with the Adam optimizer (learning rate of 0.001), MSE as the loss function, and RMSE as the evaluation metric. For callbacks, EarlyStopping halts training if the validation loss doesn't improve for 28 epochs, while ReduceLROnPlateau reduces the learning rate by 0.1 if the validation loss stagnates for 24 epochs.

In [None]:
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

# Load processed train data
train_data = pd.read_csv('../data/processed/train_data_processed.csv').copy()

# Select features and target variable
selected_features = [
    "milage", "age", "hp", "engine_size", 
    "cylinders", "speed", "has_auto_shift", "accident_mapped",
    "int_col_mapped", "ext_col_mapped", "luxury_category"
]

X = train_data[selected_features]  # Features
y = train_data['price']  # Target

# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features using StandardScaler
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Build the deep learning model with multiple dense layers and dropout regularization
model = keras.Sequential([
    layers.Dense(250, activation='relu', kernel_regularizer=keras.regularizers.l2(0.02)),  
    layers.Dropout(0.4),  
    layers.Dense(128, activation='relu', kernel_regularizer=keras.regularizers.l2(0.01)),  
    layers.Dropout(0.3),  
    layers.Dense(64, activation='relu', kernel_regularizer=keras.regularizers.l2(0.01)),  
    layers.Dropout(0.2),  
    layers.Dense(32, activation='relu', kernel_regularizer=keras.regularizers.l2(0.005)),  
    layers.Dropout(0.1),  
    layers.Dense(16, activation='relu'),  
    layers.Dense(1, activation='linear')  # Output layer with linear activation for regression
])

# Compile the model with Adam optimizer, MSE loss, and RMSE metric
model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=0.001),  # Adam optimizer with a learning rate of 0.001
    loss='mean_squared_error',  # Loss function: Mean Squared Error (MSE)
    metrics=[tf.keras.metrics.RootMeanSquaredError(name="root_mean_squared_error")]  # RMSE metric
)

# Set up EarlyStopping and ReduceLROnPlateau callbacks to prevent overfitting
es = EarlyStopping(monitor='val_loss', patience=28, restore_best_weights=True)
lr = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=24, min_lr=1e-6)

# Train the model with the training data and validation data
history = model.fit(X_train_scaled, y_train, 
                    validation_data=(X_test_scaled, y_test),
                    epochs=100, batch_size=32, verbose=1,
                    callbacks=[es, lr])  # Early stopping and learning rate reduction

# Evaluate the model on the test data
test_loss, test_rmse = model.evaluate(X_test_scaled, y_test)
print(f"\nTest RMSE: {test_rmse:.2f}")

# Save the trained model to disk
model.save("../models/car_price_prediction_model.h5")
print("Model successfully saved!")