In [None]:
# Support Vector Machine
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, f1_score
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.metrics import confusion_matrix

In [None]:
# Load taining data
Data_train = pd.read_csv('mushroom_train.csv')

In [None]:
# Load test data
Data_test = pd.read_csv('mushroom_test.csv')

In [None]:
# Separate training data features and labels
X = Data_train.drop(columns='class')
y = Data_train['class']

# Define column transformer
numerical_features = ['cap-diameter', 'stem-height', 'stem-width']
categorical_features = ['cap-shape', 'cap-surface', 'cap-color', 'does-bruise-or-bleed',
                        'gill-attachment', 'gill-spacing', 'gill-color', 'stem-color', 'has-ring',
                        'ring-type', 'habitat', 'season']

column_transformer = ColumnTransformer(transformers=[
    ('num', StandardScaler(), numerical_features),
    ('cat', OneHotEncoder(), categorical_features)
])

# Create the pipeline
transformer = Pipeline(steps=[('transformer', column_transformer)])

# Fit and transform training data
X_data = transformer.fit_transform(X)

le = LabelEncoder()
y_data = le.fit_transform(y)

# Split data into training data and validation data
X_train, X_val, y_train, y_val = train_test_split(X_data, y_data, test_size=0.2, random_state=42)

In [None]:
# Separate test data features and labels
Xtest = Data_test.drop(columns='class')
ytest = Data_test['class']

# Transform test data using the same pipeline
X_test = transformer.transform(Xtest)

y_test = le.transform(ytest)

In [None]:
# SVM with linear kernel
print("SVM with linear kernel")
C_values = [0.1, 1, 10, 100]
for C in C_values:
    print("C value = ", C)
    svm_classifier = SVC(kernel='linear', C=C)
    svm_classifier.fit(X_train, y_train)

    train_accuracy = svm_classifier.score(X_train, y_train)

    y_pred_val = svm_classifier.predict(X_val)
    accuracy_val = svm_classifier.score(X_val, y_val)
    f1_val = f1_score(y_val, y_pred_val, average='binary', pos_label=1)

    print("Train accuracy:", train_accuracy)
    print("Validation accuracy:", accuracy_val)
    print("Validation F1-score:", f1_val)

    svm_classifier.fit(X_data, y_data)

    y_pred_test = svm_classifier.predict(X_test)
    accuracy_test = accuracy_score(y_test, y_pred_test)
    f1_test = f1_score(y_test, y_pred_test, average='binary', pos_label=1)

    test_accuracy = svm_classifier.score(X_test, y_test)

    print()
    print("Test accuracy:", accuracy_test)
    print("Test F1-score:", f1_test)
    print()

In [None]:
# SVM with linear kernel (best C value)

svm_classifier = SVC(kernel='linear', C=0.1)
svm_classifier.fit(X_train, y_train)

train_accuracy = svm_classifier.score(X_train, y_train)

y_pred_val = svm_classifier.predict(X_val)
accuracy_val = svm_classifier.score(X_val, y_val)
f1_val = f1_score(y_val, y_pred_val, average='binary', pos_label=1)

print("Train accuracy:", train_accuracy)
print("Validation accuracy:", accuracy_val)
print("Validation F1-score:", f1_val)

svm_classifier.fit(X_data, y_data)

y_pred_test = svm_classifier.predict(X_test)
accuracy_test = accuracy_score(y_test, y_pred_test)
f1_test = f1_score(y_test, y_pred_test, average='binary', pos_label=1)

test_accuracy = svm_classifier.score(X_test, y_test)

print()
print("Test accuracy:", accuracy_test)
print("Test F1-score:", f1_test)
print()

# Generate the confusion matrix
cm = confusion_matrix(y_test, y_pred_test)

# Create a heatmap using the Seaborn library
plt.figure(figsize=(6, 4))
sns.heatmap(cm, annot=True, cmap="Blues", fmt='g', xticklabels=['Edible', 'Poisonous'], yticklabels=['Edible', 'Poisonous'])

plt.xlabel('Predicted labels')
plt.ylabel('True labels')
plt.title('Test Confusion Matrix')

plt.show()

In [None]:
# SVM with RBF kernel - grid search
print("SVM with rbf kernel")
C_values = [0.1, 1, 10, 100]
gamma_values = [0.1, 1, 10]
for C in C_values:
    for gamma in gamma_values:
        print("C value = ", C,"Gamma = ", gamma)
        # Initialize SVM classifier
        svm_classifier = SVC(kernel='rbf', C=C, gamma=gamma)

        # Train SVM model on the entire training set
        svm_classifier.fit(X_train, y_train)

        # Evaluate accuracy and F1-score on validation set
        train_accuracy = svm_classifier.score(X_train, y_train)
        accuracy_val = svm_classifier.score(X_val, y_val)
        y_pred_val = svm_classifier.predict(X_val)
        f1_val = f1_score(y_val, y_pred_val, average='binary', pos_label=1)

        print("Train accuracy:", train_accuracy)
        print("Validation accuracy:", accuracy_val)
        print("Validation F1-score:", f1_val)

        # Evaluate accuracy and F1-score on test set
        y_pred_test = svm_classifier.predict(X_test)
        accuracy_test = accuracy_score(y_test, y_pred_test)
        f1_test = f1_score(y_test, y_pred_test, average='binary', pos_label=1)
        test_accuracy = svm_classifier.score(X_test, y_test)
            
        print("Test accuracy:", accuracy_test)
        print("Test F1-score:", f1_test)
        print()

In [None]:
# SVM with rbf kernel (best C and gamma value)
svm_classifier = SVC(kernel='rbf', C=10, gamma=1)

# Train SVM model on the entire training set
svm_classifier.fit(X_train, y_train)

# Evaluate accuracy and F1-score on validation set
train_accuracy = svm_classifier.score(X_train, y_train)
accuracy_val = svm_classifier.score(X_val, y_val)
y_pred_val = svm_classifier.predict(X_val)
f1_val = f1_score(y_val, y_pred_val, average='binary', pos_label=1)

print("Train accuracy:", train_accuracy)
print("Validation accuracy:", accuracy_val)
print("Validation F1-score:", f1_val)

# Evaluate accuracy and F1-score on test set
y_pred_test = svm_classifier.predict(X_test)
accuracy_test = accuracy_score(y_test, y_pred_test)
f1_test = f1_score(y_test, y_pred_test, average='binary', pos_label=1)
test_accuracy = svm_classifier.score(X_test, y_test)
    
print("Test accuracy:", accuracy_test)
print("Test F1-score:", f1_test)
print()

# Generate the confusion matrix
cm = confusion_matrix(y_test, y_pred_test)

# Create a heatmap using the Seaborn library
plt.figure(figsize=(6, 4))
sns.heatmap(cm, annot=True, cmap="Blues", fmt='g', xticklabels=['Edible', 'Poisonous'], yticklabels=['Edible', 'Poisonous'])

plt.xlabel('Predicted labels')
plt.ylabel('True labels')
plt.title('Test Confusion Matrix')

plt.show()