In [1]:
!pip install scikit-learn pandas
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import Lasso
from sklearn.metrics import accuracy_score, mean_squared_error, r2_score



In [2]:
from google.colab import files
uploaded = files.upload()

Saving mushrooms.csv to mushrooms.csv


In [3]:
df = pd.read_csv("mushrooms.csv")

In [4]:
df = df.astype("category")

In [5]:
le = LabelEncoder()
for column in df.columns:
    df[column] = le.fit_transform(df[column])

In [6]:
X = df.drop(
    ["veil-type", "bruises", "gill-spacing", "gill-size", "stalk-shape", "stalk-root",
     "stalk-surface-above-ring", "stalk-surface-below-ring", "stalk-color-above-ring",
     "stalk-color-below-ring", "ring-number", "ring-type", "spore-print-color", "veil-color"],
    axis=1
)

In [7]:
Y = X["class"]
X = X.drop(["class"], axis=1)

In [8]:
X_train, X_test, y_train, y_test = train_test_split(X, Y, random_state=42, test_size=0.4)

In [9]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [10]:
rf_params = {
    'n_estimators': [100, 200, 300],
    'max_depth': [10, 20, 30],
    'min_samples_split': [2, 5, 10]
}
rf_model = GridSearchCV(RandomForestClassifier(random_state=42), rf_params, cv=5, scoring='accuracy', n_jobs=-1)
rf_model.fit(X_train, y_train)

  _data = np.array(data, dtype=dtype, copy=copy,


In [11]:
print("Random Forest Best Parameters: ", rf_model.best_params_)
print("Random Forest Best Accuracy: ", rf_model.best_score_)


Random Forest Best Parameters:  {'max_depth': 20, 'min_samples_split': 2, 'n_estimators': 100}
Random Forest Best Accuracy:  0.9981530037382192


In [12]:
rf_best = rf_model.best_estimator_
rf_y_pred = rf_best.predict(X_test)
rf_accuracy = accuracy_score(y_test, rf_y_pred)

In [13]:
print("\nRandom Forest Results")
print("Test Accuracy: ", rf_accuracy)


Random Forest Results
Test Accuracy:  0.9981538461538462


In [14]:
lasso_params = {'alpha': [0.001, 0.01, 0.1, 1, 10]}
lasso_model = GridSearchCV(Lasso(random_state=42), lasso_params, cv=5, scoring='r2', n_jobs=-1)
lasso_model.fit(X_train_scaled, y_train)

In [15]:
print("\nLasso Regression Best Parameters: ", lasso_model.best_params_)
print("Lasso Regression Best R2 Score: ", lasso_model.best_score_)


Lasso Regression Best Parameters:  {'alpha': 0.001}
Lasso Regression Best R2 Score:  0.4140194746814439


In [16]:
lasso_best = lasso_model.best_estimator_
lasso_y_pred = lasso_best.predict(X_test_scaled)
lasso_mse = mean_squared_error(y_test, lasso_y_pred)
lasso_r2 = r2_score(y_test, lasso_y_pred)

In [17]:
print("\nLasso Regression Results")
print("Mean Squared Error: ", lasso_mse)
print("R2 Score: ", lasso_r2)


Lasso Regression Results
Mean Squared Error:  0.1474818496602891
R2 Score:  0.40944438996624366
