Secondary Mushroom Classification Exploration Notebook

In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

In [3]:
from ucimlrepo import fetch_ucirepo 
# fetch dataset 
secondary_mushroom = fetch_ucirepo(id=848) 

In [4]:

# data (as pandas dataframes) 
X = secondary_mushroom.data.features 
y = secondary_mushroom.data.targets 
  
df = pd.DataFrame(X)
df['target'] = y
df.head()

# encode categorical columns
categorical_columns = X.select_dtypes(include='object').columns
label_encoder = LabelEncoder()
for col in categorical_columns:
    X.loc[:, col] = label_encoder.fit_transform(X[col])


# split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=0)

# standardize the data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# train the model
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train, y_train.values.ravel())

# make predictions
y_pred = knn.predict(X_test)

# calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy*100:.2f}%')


Accuracy: 99.98%


In [5]:
# New mushroom data for inference (hypothetical example)
new_data = {
    'cap-diameter': [4.5],
    'cap-shape': ['bell'],
    'cap-surface': ['smooth'],
    'cap-color': ['red'],
    'does-bruise-or-bleed': ['no'],
    'gill-attachment': ['free'],
    'gill-spacing': ['close'],
    'gill-color': ['white'],
    'stem-height': [12.3],
    'stem-width': [2.3],
    'stem-root': ['bulbous'],
    'stem-surface': ['smooth'],
    'stem-color': ['white'],
    'veil-type': ['partial'],
    'veil-color': ['white'],
    'has-ring': ['yes'],
    'ring-type': ['pendant'],
    'spore-print-color': ['brown'],
    'habitat': ['forest'],
    'season': ['autumn']
}

# Convert to pandas DataFrame
X_new = pd.DataFrame(new_data)

categorical_columns = X_new.select_dtypes(include='object').columns

label_encoder = LabelEncoder()

# Apply the same LabelEncoder to categorical columns for new data
for col in categorical_columns:
    X_new[col] = label_encoder.fit_transform(X_new[col])  # Use the fitted encoder to transform new data

# Standardize the new data using the same scaler used for training data
X_new_scaled = scaler.transform(X_new)

# Make predictions using the trained model
y_new_pred = knn.predict(X_new_scaled)

print("Predicted labels for the new data:", y_new_pred)



Predicted labels for the new data: ['p']
