<a href="https://colab.research.google.com/github/aquaxmiriel/worksheet0/blob/main/2408239_NikishaShrestha_DecisionTree.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.metrics import f1_score, mean_squared_error

In [None]:
# load wine dataset
data = load_wine()
X = data.data
y = data.target

In [None]:
# split data for classification and regression
X_train_clf, X_test_clf, y_train_clf, y_test_clf = train_test_split(X, y, test_size=0.2, random_state=42)
X_train_reg, X_test_reg, y_train_reg, y_test_reg = train_test_split(X, np.mean(X, axis=1), test_size=0.2, random_state=42)

In [None]:
# 1. Implement classification models
# decision tree classifier
dt_clf = DecisionTreeClassifier(random_state=42)
dt_clf.fit(X_train_clf, y_train_clf)
dt_clf_f1_score = f1_score(y_test_clf, dt_clf.predict(X_test_clf), average='weighted')

In [None]:
# random forest classifier
rf_clf = RandomForestClassifier(random_state=42)
rf_clf.fit(X_train_clf, y_train_clf)
rf_clf_f1_score = f1_score(y_test_clf, rf_clf.predict(X_test_clf), average='weighted')

In [None]:
print(f"Decision Tree Classifier F1 Score: {dt_clf_f1_score}")
print(f"Random Forest Classifier F1 Score: {rf_clf_f1_score}")

Decision Tree Classifier F1 Score: 0.9439974457215836
Random Forest Classifier F1 Score: 1.0


In [None]:
#2. Hyperprameter Tuning for Random Forest Classifier
param_grid_clf = {
    'n_estimators': [50, 100, 200],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2,5,10]
}
gs_clf = GridSearchCV(RandomForestClassifier(random_state=42), param_grid_clf, cv=5, scoring='f1_weighted')
gs_clf.fit(X_train_clf, y_train_clf)

In [None]:
print(f"Best parameters for Random Forest Classifier: {gs_clf.best_params_}")
gs_clf_f1 = f1_score(y_test_clf, gs_clf.best_estimator_.predict(X_test_clf), average='weighted')
print(f"Optimized random Forest Classifier F1 Score: {gs_clf_f1}")

Best parameters for Random Forest Classifier: {'max_depth': None, 'min_samples_split': 2, 'n_estimators': 100}
Optimized random Forest Classifier F1 Score: 1.0


In [None]:
#3. Implement Regression Models
# decision tree regressor
dt_reg = DecisionTreeRegressor(random_state=42)
dt_reg.fit(X_train_reg, y_train_reg)
df_reg_mse = mean_squared_error(y_test_reg, dt_reg.predict(X_test_reg))

In [None]:
# random forest regressor
rf_reg = RandomForestRegressor(random_state=42)
rf_reg.fit(X_train_reg, y_train_reg)
rf_reg_mse = mean_squared_error(y_test_reg, rf_reg.predict(X_test_reg))

In [None]:
print(f"Decision Tree Regressor F1 Score: {df_reg_mse}")
print(f"Random Forest Regressor F1 Score: {rf_reg_mse}")

Decision Tree Regressor F1 Score: 11.83586473209729
Random Forest Regressor F1 Score: 10.833628618948362


In [None]:
#2. Hyperprameter Tuning for Random Forest Regressor
param_dist_reg = {
    'n_estimators': [50, 100, 200],
    'max_features': ['sqrt', 'log2', None],
    'min_samples_split': [2,5,10]
}
rs_reg = RandomizedSearchCV(RandomForestRegressor(random_state=42), param_dist_reg, n_iter=10, cv=5, scoring='neg_mean_squared_error', random_state=42)
rs_reg.fit(X_train_reg, y_train_reg)

In [None]:
print(f"Best parameters for Random Forest Regressor: {rs_reg.best_params_}")
rs_reg_mse = mean_squared_error(y_test_reg, rs_reg.best_estimator_.predict(X_test_reg))
print(f"Optimized random Forest Regressor MSE1 Score: {rs_reg_mse}")

Best parameters for Random Forest Regressor: {'n_estimators': 50, 'min_samples_split': 5, 'max_features': None}
Optimized random Forest Regressor MSE1 Score: 11.490428309871854
