In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.decomposition import PCA
from sklearn.pipeline import Pipeline
#loading the dataset 
shroom = pd.read_csv('./cleaned_up_secondary_mushroom_dataset.csv')


In [None]:
features_to_drop = [
    'gill-attachment', 'gill-spacing', 'cap-surface',
    'stem-root', 'stem-surface', 'veil-type',
    'veil-color', 'spore-print-color'
]
shroom = shroom.drop(columns=features_to_drop)
# converting target letters to numerical values
shroom['class'] = shroom['class'].map({'e': 0, 'p': 1})
#converting other features 
for column in shroom.columns:
    if column != 'class' and shroom[column].dtype == 'object':
        # Create numerical mapping for each feature
        unique_values = shroom[column].unique()
        mapping = {value: idx for idx, value in enumerate(unique_values)}
        shroom[column] = shroom[column].map(mapping)

#sperating data and dargets 
shroom_data = shroom.iloc[:, 1:].to_numpy()  
shroom_targets = shroom['class'].to_numpy()
X_train, X_test, y_train, y_test = train_test_split(
    shroom_data, 
    shroom_targets,
    test_size=0.25, 
    random_state=42,
    stratify=shroom_targets 
)

#testing
print("Training features shape:", X_train.shape)
print("Test features shape:", X_test.shape)
print("Training targets shape:", y_train.shape)
print("Test targets shape:", y_test.shape)


In [None]:
svm_model = SVC(kernel='linear', C=1.0)

In [None]:
knn_model = KNeighborsClassifier(n_neighbors=5)

In [None]:
pca = PCA(n_components=10)
logreg_model = LogisticRegression(max_iter=1000)
scaler = StandardScaler()
pipe = Pipeline(steps=[("scaler", scaler), ("pca", pca), ("logistic", logreg_model)])
pipe.fit(X_train, y_train)

y_pred_test_pca = pipe.predict(X_test)
y_pred_train_pca = pipe.predict(X_train)

accuracy_test_pca = round(accuracy_score(y_test, y_pred_test_pca), 4)
accuracy_train_pca = round(accuracy_score(y_train, y_pred_train_pca), 4)

logreg_model.fit(X_train, y_train)

y_pred_test_no_pca = logreg_model.predict(X_test)
y_pred_train_no_pca = logreg_model.predict(X_train)

accuracy_test_no_pca = round(accuracy_score(y_test, y_pred_test_no_pca), 4)
accuracy_train_no_pca = round(accuracy_score(y_train, y_pred_train_no_pca), 4)

print("Using PCA:")
print(f"-Test Set: {accuracy_test_pca}")
print(f"-Training Set: {accuracy_train_pca}")

print("\nNo PCA:")
print(f"-Test Set: {accuracy_test_no_pca}")
print(f"-Training Set: {accuracy_train_no_pca}")

In [None]:
# svm_conf_matrix = confusion_matrix(y_test, svm_predictions)
# print("SVM Confusion Matrix:\n", svm_conf_matrix)