In [None]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
from scipy.stats import randint

# Load data
df = pd.read_csv("mushrooms.csv")

# Encode all categorical columns
le = LabelEncoder()
for col in df.columns:
    df[col] = le.fit_transform(df[col])

# Split features and target
X = df.drop("class", axis=1)  # assuming 'class' is the target
y = df["class"]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Random Forest model
rf = RandomForestClassifier(random_state=42)

# Hyperparameter space
param_dist = {
    "n_estimators": randint(50, 300),  # number of trees
    "max_depth": randint(3, 30),  # max depth of each tree
    "min_samples_split": randint(2, 20),  # min samples to split a node
    "min_samples_leaf": randint(1, 20),  # min samples in each leaf
}

# Randomized Search CV
random_search = RandomizedSearchCV(
    rf, param_distributions=param_dist, n_iter=50, cv=5, random_state=42, n_jobs=-1
)

# Fit model
random_search.fit(X_train, y_train)

# Best model
best_rf = random_search.best_estimator_

# Evaluate
y_pred = best_rf.predict(X_test)
print(classification_report(y_test, y_pred))

# Keep track of final model
print("Best hyperparameters:", random_search.best_params_)


In [None]:
# Save the best trained model and input features for future use
import pickle
import joblib

with open("rf_model.pkl", "wb") as f:
    pickle.dump(best_rf, f)

# Save the best input features
X_columns = X.columns
joblib.dump(X_columns, "X_columns.pkl")