In [None]:
#Importing necessary libraries
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import f1_score

In [None]:
# Loading the Wine dataset
data = load_wine()

X = data.data  # Feature matrix
y = data.target  # Target labels

# Splitting the dataset into training(80%) and testing(20%) sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# Training a Decision Tree Classifier
dt_classifier = DecisionTreeClassifier(random_state=42) # Initializing with a random seed
dt_classifier.fit(X_train, y_train) # Training the model on training data
dt_pred = dt_classifier.predict(X_test) # Predicting on the test data

In [None]:
# Train a Random Forest Classifier
rf_classifier = RandomForestClassifier(random_state=42) # Initializing with a random seed
rf_classifier.fit(X_train, y_train) # Training the model on training data
rf_pred = rf_classifier.predict(X_test) # Predicting on the test data


In [None]:
# Comparing F1 scores of the two classifiers
dt_f1 = f1_score(y_test, dt_pred, average='weighted') # F1 score for Decision Tree
rf_f1 = f1_score(y_test, rf_pred, average='weighted') # F1 score for Random Forest

print(f"F1 Score - Decision Tree: {dt_f1:.4f}")
print(f"F1 Score - Random Forest: {rf_f1:.4f}")
print("The Random Forest Classifier outperformed the Decision Tree Classifier. ")


F1 Score - Decision Tree: 0.9440
F1 Score - Random Forest: 1.0000


In [None]:
# Importing GridSearchCV for hyperparameter tuning
from sklearn.model_selection import GridSearchCV

# Define the hyperparameter grid for tuning
param_grid = {
    'n_estimators': [75, 100, 250],  # Number of trees
    'max_depth': [None, 10, 20],    # Maximum depth of the tree
    'min_samples_split': [2, 6, 10] # Minimum number of samples required to split a node
}

# Perform Grid Search
grid_search = GridSearchCV(estimator=rf_classifier, param_grid=param_grid, scoring='f1_weighted', cv=5)
grid_search.fit(X_train, y_train)

# Best parameters and F1 score
best_params = grid_search.best_params_
best_f1 = grid_search.best_score_

print(f"Best Parameters: {best_params}")
print(f"Best F1 Score from GridSearchCV: {best_f1:.4f}")


Best Parameters: {'max_depth': None, 'min_samples_split': 2, 'n_estimators': 75}
Best F1 Score from GridSearchCV: 0.9783


In [None]:
# Importing libraries for regression tasks
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import RandomizedSearchCV
from sklearn.metrics import mean_squared_error

In [None]:
# Training a Decision Tree Regressor
dt_regressor = DecisionTreeRegressor(random_state=42)
dt_regressor.fit(X_train, y_train)
dt_reg_pred = dt_regressor.predict(X_test)

In [None]:
# Training a Random Forest Regressor
rf_regressor = RandomForestRegressor(random_state=42)
rf_regressor.fit(X_train, y_train)
rf_reg_pred = rf_regressor.predict(X_test)

In [None]:
# Evaluating using Mean Squared Error
dt_mse = mean_squared_error(y_test, dt_reg_pred)
rf_mse = mean_squared_error(y_test, rf_reg_pred)

print(f"MSE - Decision Tree Regressor: {dt_mse:.4f}")
print(f"MSE - Random Forest Regressor: {rf_mse:.4f}")

MSE - Decision Tree Regressor: 0.1667
MSE - Random Forest Regressor: 0.0648


In [None]:
# Random Forest Regressor Hyperparameter Tuning
param_dist = {
    'n_estimators': [75, 100, 250],
    'max_depth': [None, 10, 20],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

# Perform Randomized Search for hyperparameter tuning
random_search = RandomizedSearchCV(estimator=rf_regressor, param_distributions=param_dist,
                                   n_iter=20, scoring='neg_mean_squared_error', cv=5, random_state=42)
random_search.fit(X_train, y_train)

# Best parameters and MSE
best_reg_params = random_search.best_params_
best_reg_mse = -random_search.best_score_

print(f"Best Parameters for Random Forest Regressor: {best_reg_params}")
print(f"Best MSE from RandomizedSearchCV: {best_reg_mse:.4f}")

Best Parameters for Random Forest Regressor: {'n_estimators': 200, 'min_samples_split': 10, 'min_samples_leaf': 1, 'max_depth': 10}
Best MSE from RandomizedSearchCV: 0.0468
