Car Evaluation dataset

In [None]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier, MLPRegressor
from sklearn.metrics import accuracy_score, mean_squared_error, mean_absolute_error
import numpy as np
import matplotlib.pyplot as plt

Step 1 - Load the Car Evaluation dataset 

In [None]:
col_names_car=['buying', 'maint', 'doors', 'persons', 'lug_boot', 'safety', 'class']

# Load dataset
df_car = pd.read_csv('car.data',delim_whitespace=False, header=None)

df_car.columns=col_names_car

# Display the first few rows of the dataset
print(df_car.head())

Step 2 - Encode categorical features to numerical values

In [None]:
label_encoder = LabelEncoder()

for col in df_car.columns:
    df_car[col] = label_encoder.fit_transform(df_car[col])

df_car.head()

Step 3 - Split the Data into a training (80%) and a test set (20%)

In [None]:
# Separate features and target variable
X=df_car.iloc[:, :-1]
y=df_car.iloc[:, -1]

# Split data
X_train, X_test, y_train, y_test=train_test_split(X, y, test_size=0.2, random_state=42)

print(f"Training set size: {len(X_train)}")
print(f"Test set size: {len(X_test)}")

Step 4 - Train a multilayer perceptron.  Experiment with Different MLP Configurations and record the results

In [None]:
# Define configurations
configs=[(32,),(64,),(32, 16),(64, 32),(64, 32, 16)]

# Define an empty array
results=[]

for config in configs:
    mlp=MLPClassifier(hidden_layer_sizes=config, max_iter=500, random_state=42)
    mlp.fit(X_train, y_train)
    y_pred=mlp.predict(X_test)
    accuracy=accuracy_score(y_test, y_pred)
    results.append({'config': config, 'accuracy': accuracy})
    print(f"Config: {config}, Accuracy: {accuracy}")

# Find the best configuration
best_config=max(results, key=lambda x: x['accuracy'])
print(f"Best Configuration: {best_config['config']}, Accuracy: {best_config['accuracy']}")

Step 5 - For the best-performing MLP configuration experiment with different training sizes (10%-70%) and evaluate the results

In [None]:
training_sizes=[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7]
training_results=[]

for size in training_sizes:
    X_train_small, _, y_train_small, _ = train_test_split(X_train, y_train, train_size=size, random_state=42)
    mlp = MLPClassifier(hidden_layer_sizes=best_config['config'], max_iter=500, random_state=42)
    mlp.fit(X_train_small, y_train_small)
    y_pred=mlp.predict(X_test)
    accuracy=accuracy_score(y_test, y_pred)
    training_results.append({'size': size, 'accuracy': accuracy})
    print(f"Training Size: {size}, Accuracy: {accuracy}")

Step 6 - Plot the training loss curve for the different training set sizes

In [None]:
# Visualize the training loss curve for different training set sizes

plt.figure(figsize=(10, 6))
for size in training_sizes:
    X_train_small, _, y_train_small, _ = train_test_split(X_train, y_train, train_size=size, random_state=42)
    mlp = MLPClassifier(hidden_layer_sizes=best_config['config'], max_iter=500, random_state=42)
    mlp.fit(X_train_small, y_train_small)
    plt.plot(mlp.loss_curve_, label=f'Train Size: {int(size * 100)}%')

plt.title('Training Loss Curve')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

Auto MPG Dataset

Step 1 - Load and Preprocess the Auto MPG Dataset 

In [None]:
col_names_auto=['mpg', 'cylinders', 'displacement', 'horsepower','weight', 'acceleration', 'model_year', 'origin', 'car_name']

# Load the dataset
df_auto=pd.read_csv('auto-mpg.data', delim_whitespace=True, names=col_names_auto)

# Display the first few rows
print(df_auto.head())

# Handle missing values
df_auto['horsepower']=pd.to_numeric(df_auto['horsepower'], errors='coerce')
df_auto=df_auto.dropna()

# Drop the car_name column as it is not relevant
df_auto=df_auto.drop(columns=['car_name'])

# Display dataset totaly
print("\n")
print("Total")
print(df_auto.info())

Step 2 - Split Data into a training (70%) and a test set (30%)

In [None]:
# Separate features and target variable
X=df_auto.drop(columns=['mpg'])
y=df_auto['mpg']

# Split data
X_train, X_test, y_train, y_test=train_test_split(X, y, test_size=0.3, random_state=42)

print(f"Training set size: {len(X_train)}")
print(f"Test set size: {len(X_test)}")

Step 3 - Standardize the feature values to have zero mean and unit variance

In [None]:
scaler = StandardScaler()

# Fit the scaler on the training data and transform both train and test sets
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

Step 4 - Build and Train the MLP Regressor. Use hidden layers with sizes (64, 32, 16)


In [None]:
# Define the MLP Regressor
mlp=MLPRegressor(hidden_layer_sizes=(64, 32, 16),activation='relu',solver='adam',max_iter=500,random_state=42)

# Train the model
mlp.fit(X_train_scaled, y_train)

# Predict on the test set
y_pred=mlp.predict(X_test_scaled)

print("Training complete.")

Step 5 - Evaluate the Model the model's performance using Mean Squared Error (MSE) and Mean Absolute Error (MAE) metrics

In [None]:
# Calculate MSE and MAE
mse=mean_squared_error(y_test, y_pred)
mae=mean_absolute_error(y_test, y_pred)

print(f"Mean Squared Error (MSE): {mse:.2f}")
print(f"Mean Absolute Error (MAE): {mae:.2f}")

Step 6 - Show Predictions vs. Actual Values 



In [None]:
# Create a DataFrame for comparison
results_df = pd.DataFrame({'Actual': y_test,'Predicted': y_pred})

print(results_df.head())

Step 7 - Plot the training loss curve to visualize model convergence

In [None]:
plt.figure(figsize=(8, 6))
plt.plot(mlp.loss_curve_, label='Training Loss')
plt.title('Training Loss Curve')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()