In [2]:
import pandas as pd
import numpy as np
import plot_params
import seaborn as sns
import matplotlib.pyplot as plt
import graphviz
import scipy.stats as stats

from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, f1_score
from sklearn.inspection import PartialDependenceDisplay
from sklearn.tree import export_graphviz
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.metrics import classification_report
from sklearn.pipeline import Pipeline

plot_params.apply_rcparams()

In [82]:
df = pd.read_csv("mushrooms.csv")
df = df.rename(columns={'class' : 'edibility'})
df = df.drop(df.columns[2:10],axis=1) # drop features too easy

In [83]:
df.head()

Unnamed: 0,edibility,cap-shape,stalk-shape,stalk-root,stalk-surface-above-ring,stalk-surface-below-ring,stalk-color-above-ring,stalk-color-below-ring,veil-type,veil-color,ring-number,ring-type,spore-print-color,population,habitat
0,p,x,e,e,s,s,w,w,p,w,o,p,k,s,u
1,e,x,e,c,s,s,w,w,p,w,o,p,n,n,g
2,e,b,e,c,s,s,w,w,p,w,o,p,n,n,m
3,p,x,e,e,s,s,w,w,p,w,o,p,k,s,u
4,e,x,t,e,s,s,w,w,p,w,o,e,n,a,g


In [84]:
label_encoder = preprocessing.LabelEncoder()
df_encoded_lab = df.apply(lambda col: label_encoder.fit_transform(col))
df_encoded_ohe = pd.get_dummies(df, columns=df.columns[1:], drop_first=True)
df_encoded_ohe['edibility'] = label_encoder.fit_transform(df['edibility'])

In [85]:
X_lab = df_encoded_lab.iloc[:, 1:]
y_lab = df_encoded_lab['edibility']
X_ohe = df_encoded_ohe.drop(columns=['edibility'])
y_ohe = df_encoded_ohe['edibility']

In [86]:
X_train_lab, X_test_lab, y_train_lab, y_test_lab = \
    train_test_split(X_lab, y_lab, test_size=0.2, random_state=12)
X_train_ohe, X_test_ohe, y_train_ohe, y_test_ohe = \
    train_test_split(X_ohe, y_ohe, test_size=0.2, random_state=12)

In [87]:
models = {
    'Random Forest': RandomForestClassifier(),
    'Logistic Regression': LogisticRegression(),
    'Support Vector Classifier': SVC()
}

In [88]:
for model_name, model in models.items():
    
    pipeline = Pipeline([
        ('scaler', StandardScaler()),  
        ('classifier', model)
    ])
    
    if model_name == 'Random Forest':
        pipeline.fit(X_train_lab, y_train_lab)
        y_pred = pipeline.predict(X_test_lab)
        print(f"Results for {model_name}:")
        print(classification_report(y_test_lab, y_pred))
        print(f"Training Accuracy: {accuracy_score(y_train_lab, model.predict(X_train_lab))}")
        print(f"Test Accuracy: {accuracy_score(y_test_lab, model.predict(X_test_lab))}")
        print("="*60)
    else:
        pipeline.fit(X_train_ohe, y_train_ohe)
        y_pred = pipeline.predict(X_test_ohe)
        print(f"Results for {model_name}:")
        print(classification_report(y_test_ohe, y_pred))
        print(f"Training Accuracy: {accuracy_score(y_train_ohe, model.predict(X_train_ohe))}")
        print(f"Test Accuracy: {accuracy_score(y_test_ohe, model.predict(X_test_ohe))}")
        print("="*60)
    

Results for Random Forest:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       813
           1       1.00      1.00      1.00       812

    accuracy                           1.00      1625
   macro avg       1.00      1.00      1.00      1625
weighted avg       1.00      1.00      1.00      1625

Training Accuracy: 0.5896291737190337
Test Accuracy: 0.5858461538461538
Results for Logistic Regression:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       813
           1       1.00      1.00      1.00       812

    accuracy                           1.00      1625
   macro avg       1.00      1.00      1.00      1625
weighted avg       1.00      1.00      1.00      1625

Training Accuracy: 0.832589629173719
Test Accuracy: 0.8326153846153846




Results for Support Vector Classifier:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       813
           1       1.00      1.00      1.00       812

    accuracy                           1.00      1625
   macro avg       1.00      1.00      1.00      1625
weighted avg       1.00      1.00      1.00      1625





Training Accuracy: 0.8410524696107093
Test Accuracy: 0.848


