# Predict mushroom edibleness

Predict whether a mushroom is **poisonous** or **edible**.
1. Import modules
2. Explore data
3. Clean data if necessary (NaN values etc.)
4. Split data into train and test sets
5. Choose and train ML model
6. Check model accuracy, precision, recall and f1 scores
7. Perform Hyperparameter tuning if necessary, compare scores

In [20]:
# 1. Imports

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# 2. Initial data exploration
df = pd.read_csv('mushrooms.csv')
#print(df.info())
#print(df.head())

# 3. Encode categorical data into numerical for use with ML models
for i in df.columns:
    df[i] = df[i].astype('category')
    df[i] = df[i].cat.codes
#print(df.info())
#print(df.head())
    
# 4.1 Split data into X, y
y = df['class']
X = df.drop('class', axis=1)

# 4.2 Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42)

# 5. Based on data types and number of samples the SVC model is chosen
clf = SVC()
clf.fit(X_train, y_train)

# 6. Check model accuracy, precision, recall and f1 scores
y_pred = clf.predict(X_test)

#print(clf.score(X_test, y_test))
print("Model Accuracy Score: ", accuracy_score(y_test, y_pred))
print("Model Precision Score: ", precision_score(y_test, y_pred))
print("Model Recall Score: ", recall_score(y_test, y_pred))
print("Model F1 Score: ", f1_score(y_test, y_pred))

Model Accuracy Score:  0.9926153846153846
Model Precision Score:  0.9987046632124352
Model Recall Score:  0.9859335038363172
Model F1 Score:  0.9922779922779922
