# How to save a model and encoder and use it later: mushroom dataset

In [None]:
import pandas as pd

# 1. LOAD DATA
url = "data_mushrooms.csv"
# url = "https://github.com/ash322ash422/youtube/blob/main/tutorial_machine_learning/6_model_save/data_mushrooms.csv"
df = pd.read_csv(url)

In [49]:
print("Dataset shape:", df.shape)
print("Columns:", df.columns.tolist())

Dataset shape: (8124, 23)
Columns: ['class', 'cap-shape', 'cap-surface', 'cap-color', 'bruises', 'odor', 'gill-attachment', 'gill-spacing', 'gill-size', 'gill-color', 'stalk-shape', 'stalk-root', 'stalk-surface-above-ring', 'stalk-surface-below-ring', 'stalk-color-above-ring', 'stalk-color-below-ring', 'veil-type', 'veil-color', 'ring-number', 'ring-type', 'spore-print-color', 'population', 'habitat']


In [50]:
print(df.sample(10))

     class cap-shape cap-surface cap-color bruises odor gill-attachment  \
7768     p         k           y         e       f    s               f   
113      e         x           s         y       t    a               f   
1621     e         f           f         g       f    n               f   
673      e         x           f         y       t    l               f   
6829     p         x           s         n       f    y               f   
3497     e         f           f         e       t    n               f   
7207     p         k           y         n       f    f               f   
3712     e         f           y         n       t    n               f   
3418     p         x           s         g       f    c               f   
6040     e         x           s         n       f    n               a   

     gill-spacing gill-size gill-color  ... stalk-surface-below-ring  \
7768            c         n          b  ...                        k   
113             c         b   

In [None]:
# 2

In [42]:
# 2. SEPARATE FEATURES & TARGET
X = df.drop("class", axis=1)         # all features
y = df["class"]                       # edible/poisonous

In [51]:
# lets see the value counts of y
print(y.value_counts())

class
e    4208
p    3916
Name: count, dtype: int64


In [52]:
# 3. ENCODE CATEGORICAL FEATURES
from sklearn.preprocessing import OrdinalEncoder

encoder = OrdinalEncoder()
X_encoded = encoder.fit_transform(X)

In [53]:
# lets print raw and encoded values for 1st one
print(X[:1])
print(X_encoded[:1])

  cap-shape cap-surface cap-color bruises odor gill-attachment gill-spacing  \
0         x           s         n       t    p               f            c   

  gill-size gill-color stalk-shape  ... stalk-surface-below-ring  \
0         n          k           e  ...                        s   

  stalk-color-above-ring stalk-color-below-ring veil-type veil-color  \
0                      w                      w         p          w   

  ring-number ring-type spore-print-color population habitat  
0           o         p                 k          s       u  

[1 rows x 22 columns]
[[5. 2. 4. 1. 6. 1. 0. 1. 4. 0. 3. 2. 2. 7. 7. 0. 2. 1. 4. 2. 3. 5.]]


In [54]:
# 4. TRAIN-TEST SPLIT
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X_encoded, y, test_size=0.2, random_state=42
)

In [55]:
# 5. TRAIN MODEL
from sklearn.ensemble import RandomForestClassifier

model = RandomForestClassifier(
    n_estimators=200,
    random_state=42
)

model.fit(X_train, y_train)

In [56]:
# Example mushroom (raw categorical values)
sample = [["x", "s", "n", "t", "p", "f", "c", "n", "k", "e",
           "e", "s", "s", "w", "w", "p", "w", "o", "p", "k",
           "s", "u"]]

sample_df = pd.DataFrame(sample, columns=X.columns)

# Encode the sample
sample_encoded = encoder.transform(sample_df)

# Predict
prediction = model.predict(sample_encoded)
print("Prediction:", prediction)   # edible/poisonous

Prediction: ['p']


In [57]:
# 6. EVALUATE MODEL
from sklearn.metrics import accuracy_score

y_pred = model.predict(X_test)

acc = accuracy_score(y_test, y_pred)

print("Model Accuracy:", acc)

Model Accuracy: 1.0


In [58]:
# 7. SAVE MODEL + ENCODER
from joblib import dump

dump(model,   "mushroom_model.joblib")
dump(encoder, "mushroom_encoder.joblib")

print("Saved mushroom_model.joblib & mushroom_encoder.joblib")

Saved mushroom_model.joblib & mushroom_encoder.joblib


# How to Load and Use the Saved Model

In [59]:
from joblib import load

loaded_model   = load("mushroom_model.joblib")
loaded_encoder = load("mushroom_encoder.joblib")

# Example mushroom (raw categorical values)
sample = [["x", "s", "n", "t", "p", "f", "c", "n", "k", "e",
           "e", "s", "s", "w", "w", "p", "w", "o", "p", "k",
           "s", "u"]]

sample_df = pd.DataFrame(sample, columns=X.columns)

# Encode the sample
sample_encoded = loaded_encoder.transform(sample_df)

# Predict
prediction = loaded_model.predict(sample_encoded)
print("Prediction:", prediction)   # edible/poisonous

Prediction: ['p']
