In [1]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.impute import SimpleImputer

# Load the dataset
file_path = 'C:\\Users\\William\\My Drive\\College\\MEng in Connected & Autonomous Vehicles\\Semester_3\\Machine Learning (COMP09012)\\Assignment_1\\peugeot_207_01.csv'
raw_dataset = pd.read_csv(file_path, delimiter=';')

# Data cleaning
imputer = SimpleImputer(strategy="mean")
numeric_cols = raw_dataset.select_dtypes(include=["float64", "int64"]).columns
raw_dataset[numeric_cols] = imputer.fit_transform(raw_dataset[numeric_cols])

# Encode categorical variables
encoder = LabelEncoder()
categorical_cols = ['roadSurface', 'traffic', 'drivingStyle']
for col in categorical_cols:
    raw_dataset[col] = encoder.fit_transform(raw_dataset[col])

# Feature selection
features = raw_dataset.loc[:, raw_dataset.columns.difference(['drivingStyle', 'Unnamed: 0'])]  # Exclude target and identifier
target = raw_dataset['drivingStyle']  # Target variable

# Scale the features
scaler = StandardScaler()
features_scaled = scaler.fit_transform(features)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(features_scaled, target, test_size=0.2, random_state=42)

# Hyperparameter tuning with GridSearchCV
param_grid = {
    'C': [0.1, 1, 10, 100],
    'gamma': [1, 0.1, 0.01, 0.001],
    'kernel': ['rbf'],
    'class_weight': ['balanced']
}

# Suppress verbose output from GridSearchCV
grid_search = GridSearchCV(SVC(), param_grid, scoring='f1_macro', cv=5, verbose=0, n_jobs=-1)
grid_search.fit(X_train, y_train)

# Best parameters from GridSearchCV
best_params = grid_search.best_params_

# Train the SVM model with the best parameters
best_svm = grid_search.best_estimator_
best_svm.fit(X_train, y_train)

# Make predictions on the test set
y_pred = best_svm.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

# Print results in the desired format
print(f"Accuracy: {accuracy}")
print("Classification Report:")
print(report)


Accuracy: 0.9603658536585366
Classification Report:
              precision    recall  f1-score   support

           0       0.70      0.76      0.73       115
           1       0.98      0.98      0.98      1525

    accuracy                           0.96      1640
   macro avg       0.84      0.87      0.85      1640
weighted avg       0.96      0.96      0.96      1640

