In [None]:
# Perceptron
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, f1_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import KFold

In [None]:
# Load taining data
Data_train = pd.read_csv('mushroom_train.csv')

In [None]:
# Load test data
Data_test = pd.read_csv('mushroom_test.csv')

In [None]:
# Separate training data features and labels
X = Data_train.drop(columns='class')
y = Data_train['class']

# Define column transformer
numerical_features = ['cap-diameter', 'stem-height', 'stem-width']
categorical_features = ['cap-shape', 'cap-surface', 'cap-color', 'does-bruise-or-bleed',
                        'gill-attachment', 'gill-spacing', 'gill-color', 'stem-color', 'has-ring',
                        'ring-type', 'habitat', 'season']

column_transformer = ColumnTransformer(transformers=[
    ('num', StandardScaler(), numerical_features),
    ('cat', OneHotEncoder(), categorical_features)
])

# Create the pipeline
transformer = Pipeline(steps=[('transformer', column_transformer)])

# Fit and transform training data
X_data = transformer.fit_transform(X)

le = LabelEncoder()
y_data = le.fit_transform(y)

# Split data into training data and validation data
X_train, X_val, y_train, y_val = train_test_split(X_data, y_data, test_size=0.2, random_state=42)

In [None]:
# Separate test data features and labels
Xtest = Data_test.drop(columns='class')
ytest = Data_test['class']

# Transform test data using the same pipeline
X_test = transformer.transform(Xtest)

y_test = le.transform(ytest)

In [None]:
def perceptron(X, y, n_epochs, learning_rate, batch_size, l2_reg):
    n_samples, n_features = X.shape
    weights = np.zeros(n_features)
    bias = 0
    
    for epoch in range(n_epochs):
        indices = np.random.permutation(n_samples)
        X = X[indices]
        y = y[indices]
        
        for i in range(0, n_samples, batch_size):
            batch_X = X[i:i + batch_size]
            batch_y = y[i:i + batch_size]
            
            y_pred = np.dot(batch_X, weights) + bias
            y_pred = np.where(y_pred > 0, 1, -1)
            errors = batch_y - y_pred
            weights += learning_rate * (np.dot(batch_X.T, errors) - l2_reg * weights)
            bias += learning_rate * np.sum(errors)
    
    return weights, bias

In [None]:
def predict(X, weights, bias):
    y_pred = np.dot(X, weights) + bias
    return np.where(y_pred > 0, 1, 0)

In [None]:
# Train the perceptron
n_epochs = 100
learning_rate = 0.1
batch_size = 32
l2_reg = 0.0001
weights, bias = perceptron(X_train.toarray(), y_train * 2 - 1, n_epochs, learning_rate, batch_size, l2_reg)

# Make predictions on the validation set
y_pred_train = predict(X_train.toarray(), weights, bias)

# Calculate the accuracy and F1-score for the train set
accuracy_train = accuracy_score(y_train, y_pred_train)
f1_train = f1_score(y_train, y_pred_train, average='binary', pos_label=1)

print("Train accuracy:", accuracy_train)
print("Train F1-score:", f1_train)
print()

# Make predictions on the validation set
y_pred_val = predict(X_val.toarray(), weights, bias)

# Calculate the accuracy and F1-score for validation set
accuracy_val = accuracy_score(y_val, y_pred_val)
f1_val = f1_score(y_val, y_pred_val, average='binary', pos_label=1)

print("Validation accuracy:", accuracy_val)
print("Validation F1-score:", f1_val)
print()

# Make predictions on the test set
y_pred_test = predict(X_test.toarray(), weights, bias)

# Calculate the accuracy and F1-score for test set
accuracy_test = accuracy_score(y_test, y_pred_test)
f1_test = f1_score(y_test, y_pred_test, average='binary', pos_label=1)

print("Test accuracy:", accuracy_test)
print("Test F1-score:", f1_test)

# Generate the confusion matrix
cm = confusion_matrix(y_test, y_pred_test)

# Create a heatmap using the Seaborn library
plt.figure(figsize=(6, 4))
sns.heatmap(cm, annot=True, cmap="Blues", fmt='g', xticklabels=['Edible', 'Poisonous'], yticklabels=['Edible', 'Poisonous'])

plt.xlabel('Predicted labels')
plt.ylabel('True labels')
plt.title('Test Confusion Matrix')

plt.show()