In [21]:
# Import necessary libraries
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, recall_score, confusion_matrix
from sklearn.metrics import RocCurveDisplay, PrecisionRecallDisplay, ConfusionMatrixDisplay
from sklearn.svm import SVC
import matplotlib.pyplot as plt


In [24]:
# data = pd.read_csv("mushrooms.csv")

# # The purpose of using LabelEncoder is 
# # to convert categorical labels into a format 
# # that can be provided to ML algorithms to do 
# # better predictions.

# labelencoder = LabelEncoder()
# labelencoder

In [27]:
# Load and preprocess data
def load_data():
    data = pd.read_csv("mushrooms.csv")
    labelencoder = LabelEncoder()
    for col in data.columns:
        data[col] = labelencoder.fit_transform(data[col])
    return data

data = load_data()

data.head()

Unnamed: 0,type,cap_shape,cap_surface,cap_color,bruises,odor,gill_attachment,gill_spacing,gill_size,gill_color,...,stalk_surface_below_ring,stalk_color_above_ring,stalk_color_below_ring,veil_type,veil_color,ring_number,ring_type,spore_print_color,population,habitat
0,1,5,2,4,1,6,1,0,1,4,...,2,7,7,0,2,1,4,2,3,5
1,0,5,2,9,1,0,1,0,0,4,...,2,7,7,0,2,1,4,3,2,1
2,0,0,2,8,1,3,1,0,0,5,...,2,7,7,0,2,1,4,3,2,3
3,1,5,3,8,1,6,1,0,1,5,...,2,7,7,0,2,1,4,2,3,5
4,0,5,2,3,0,5,1,1,0,4,...,2,7,7,0,2,1,0,3,0,1


In [29]:
# Split data into training and testing sets
def split(df):
    y = df['type']
    x = df.drop(columns=['type'])
    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=0)
    return x_train, x_test, y_train, y_test

x_train, x_test, y_train, y_test = split(data)

In [None]:
# Classifier selection
# For demonstration, let's use SVC (Support Vector Classifier)
C = 1.0  # Regularization parameter
kernel = 'rbf'  # Kernel type
gamma = 'scale'  # Kernel coefficient

model = SVC(C=C, kernel=kernel, gamma=gamma)
model.fit(x_train, y_train)

# Model evaluation
accuracy = model.score(x_test, y_test)
y_pred = model.predict(x_test)
precision = precision_score(y_test, y_pred, average='binary')
recall = recall_score(y_test, y_pred, average='binary')

print(f"Accuracy: {accuracy:.2f}")
print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")

# Function to plot selected metrics
def plot_metrics():
    # Confusion Matrix
    cm = confusion_matrix(y_test, y_pred)
    disp = ConfusionMatrixDisplay(confusion_matrix=cm)
    disp.plot(cmap='Blues')
    plt.title('Confusion Matrix')
    plt.show()

    # ROC Curve
    RocCurveDisplay.from_estimator(model, x_test, y_test)
    plt.title('ROC Curve')
    plt.show()

    # Precision-Recall Curve
    PrecisionRecallDisplay.from_estimator(model, x_test, y_test)
    plt.title('Precision-Recall Curve')
    plt.show()

plot_metrics()

| Feature | Old Code | New Excerpt |
|---------|----------|-------------|
| **Data Loading** | Uses `pd.read_csv("path\\to\\mushrooms.csv")` indicating a specific path with escape characters for Windows. | Uses `pd.read_csv("mushrooms.csv")` indicating a relative path without specifying the directory structure. |
| **Data Preprocessing** | Both versions use `LabelEncoder` for encoding categorical data. No difference in the approach. | No difference. |
| **Data Splitting** | Both versions split the data into training and testing sets using `train_test_split` with a test size of 0.3 and `random_state=0`. | No difference. |
| **Plotting Metrics** | Uses `plot_confusion_matrix`, `plot_roc_curve`, and `plot_precision_recall_curve` directly from `sklearn.metrics` for plotting. | Manually creates a confusion matrix plot using `ConfusionMatrixDisplay` from `sklearn.metrics`. The ROC Curve part is incomplete in the new excerpt. |
| **Classifier Selection and Hyperparameters** | Provides options for SVM, Logistic Regression, and Random Forest with customizable hyperparameters through the sidebar. | Not present in the new excerpt. |
| **Model Training and Evaluation** | Trains the selected model and evaluates it, displaying accuracy, precision, and recall. Also plots selected metrics based on user choice. | Not present in the new excerpt. |
| **Raw Data Display** | Includes an option to display the raw data if the user checks a sidebar checkbox. | Not present in the new excerpt. |
| **Web App Features** | Includes titles, markdowns, and sidebar options for a more interactive user experience. | Not present in the new excerpt. |
| **Data Path** | Specifies a more detailed path to the dataset with Windows path conventions. | Uses a simple filename, suggesting the file is in the same directory or a known path. |
| **Error Handling or Validation** | No explicit error handling or validation is shown in either code. | Not applicable. |
| **Comments or Documentation** | Lacks comments explaining the code blocks. | Also lacks comments, making it consistent with the old code in terms of documentation. |

The new excerpt focuses solely on data loading, preprocessing, splitting, and a part of the metrics plotting functionality, without the web app features, model training, evaluation, and user interaction components present in the old code.