# CEN352 Assignment 2: Supervised Learning


## Wine Dataset
This notebook uses the **Wine** dataset from the UCI Machine Learning Repository (available via scikit-learn). The data come from chemical analyses of wines grown in the same region of Italy but derived from three different cultivars. Each sample includes measurements of 13 chemical constituents, and the objective is to classify samples according to the cultivar【621712777951147†L26-L38】.

### Load and inspect the dataset

In [1]:

import numpy as np
import pandas as pd
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix, classification_report
import warnings
warnings.filterwarnings('ignore')

# Load dataset
wine = load_wine()
X = wine.data
y = wine.target
feature_names = wine.feature_names

# Convert to DataFrame for convenience
X_df = pd.DataFrame(X, columns=feature_names)
X_df['target'] = y
# Display first few rows
X_df.head()


Unnamed: 0,alcohol,malic_acid,ash,alcalinity_of_ash,magnesium,total_phenols,flavanoids,nonflavanoid_phenols,proanthocyanins,color_intensity,hue,od280/od315_of_diluted_wines,proline,target
0,14.23,1.71,2.43,15.6,127.0,2.8,3.06,0.28,2.29,5.64,1.04,3.92,1065.0,0
1,13.2,1.78,2.14,11.2,100.0,2.65,2.76,0.26,1.28,4.38,1.05,3.4,1050.0,0
2,13.16,2.36,2.67,18.6,101.0,2.8,3.24,0.3,2.81,5.68,1.03,3.17,1185.0,0
3,14.37,1.95,2.5,16.8,113.0,3.85,3.49,0.24,2.18,7.8,0.86,3.45,1480.0,0
4,13.24,2.59,2.87,21.0,118.0,2.8,2.69,0.39,1.82,4.32,1.04,2.93,735.0,0


### Split data and standardize features

In [2]:

# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)

# Standardize features (important for SVM and MLP)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


### Train models

In [3]:

# 1. Random Forest Classifier (Decision Tree-based)
rf_clf = RandomForestClassifier(n_estimators=100, random_state=42)
rf_clf.fit(X_train, y_train)
y_pred_rf = rf_clf.predict(X_test)

# 2. Support Vector Machine (SVM)
svm_clf = SVC(kernel='rbf', gamma='scale', C=1.0, random_state=42)
svm_clf.fit(X_train_scaled, y_train)
y_pred_svm = svm_clf.predict(X_test_scaled)

# 3. Multi-Layer Perceptron (MLP)
mlp_clf = MLPClassifier(hidden_layer_sizes=(50,), max_iter=500, random_state=42)
mlp_clf.fit(X_train_scaled, y_train)
y_pred_mlp = mlp_clf.predict(X_test_scaled)


### Evaluate models

In [4]:

# Function to evaluate models
def evaluate_model(name, y_true, y_pred):
    acc = accuracy_score(y_true, y_pred)
    f1 = f1_score(y_true, y_pred, average='weighted')
    cm = confusion_matrix(y_true, y_pred)
    print()  # blank line
    print(f"{name} Results:")
    print(f"Accuracy: {acc:.4f}")
    print(f"Weighted F1 Score: {f1:.4f}")
    print("Confusion Matrix:")
    print(cm)

# Evaluate models
evaluate_model('Random Forest', y_test, y_pred_rf)
evaluate_model('Support Vector Machine', y_test, y_pred_svm)
evaluate_model('MLP', y_test, y_pred_mlp)



Random Forest Results:
Accuracy: 1.0000
Weighted F1 Score: 1.0000
Confusion Matrix:
[[18  0  0]
 [ 0 21  0]
 [ 0  0 15]]

Support Vector Machine Results:
Accuracy: 0.9815
Weighted F1 Score: 0.9814
Confusion Matrix:
[[18  0  0]
 [ 0 21  0]
 [ 0  1 14]]

MLP Results:
Accuracy: 0.9815
Weighted F1 Score: 0.9815
Confusion Matrix:
[[18  0  0]
 [ 1 20  0]
 [ 0  0 15]]
