In [13]:
import pandas as pd
import numpy as np
from sklearn.datasets import load_iris
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, recall_score, f1_score

In [14]:
iris = load_iris()
X = pd.DataFrame(iris.data, columns=iris.feature_names)
y = pd.Series(iris.target)

In [15]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

In [16]:
results = []

In [17]:
for depth in range(1, 6):
    Model= DecisionTreeClassifier(max_depth=depth, min_samples_split=5, min_samples_leaf=2, random_state=42).fit(X_train, y_train)

    y_pred = Model.predict(X_test)

    precision_macro = precision_score(y_test, y_pred, average='macro')
    recall_macro = recall_score(y_test, y_pred, average='macro')
    f1_macro = f1_score(y_test, y_pred, average='macro')

    precision_micro = precision_score(y_test, y_pred, average='micro')
    recall_micro = recall_score(y_test, y_pred, average='micro')
    f1_micro = f1_score(y_test, y_pred, average='micro')

    precision_weighted = precision_score(y_test, y_pred, average='weighted')
    recall_weighted = recall_score(y_test, y_pred, average='weighted')
    f1_weighted = f1_score(y_test, y_pred, average='weighted')

    results.append((depth, precision_macro, recall_macro, f1_macro,
                    precision_micro, recall_micro, f1_micro,
                    precision_weighted, recall_weighted, f1_weighted))

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [18]:
df_results = pd.DataFrame(results, columns=["Max Depth",
                                            "Precision (Macro)", "Recall (Macro)", "F1 Score (Macro)",
                                            "Precision (Micro)", "Recall (Micro)", "F1 Score (Micro)",
                                            "Precision (Weighted)", "Recall (Weighted)", "F1 Score (Weighted)"])

In [19]:
print("Macro Scores")
print(df_results[["Max Depth", "Precision (Macro)", "Recall (Macro)", "F1 Score (Macro)"]])

Macro Scores
   Max Depth  Precision (Macro)  Recall (Macro)  F1 Score (Macro)
0          1           0.500000        0.666667          0.555556
1          2           0.933333        0.933333          0.933333
2          3           0.969697        0.966667          0.966583
3          4           0.969697        0.966667          0.966583
4          5           0.969697        0.966667          0.966583


In [20]:
print("Micro Scores")
print(df_results[["Max Depth", "Precision (Micro)", "Recall (Micro)", "F1 Score (Micro)"]])

Micro Scores
   Max Depth  Precision (Micro)  Recall (Micro)  F1 Score (Micro)
0          1           0.666667        0.666667          0.666667
1          2           0.933333        0.933333          0.933333
2          3           0.966667        0.966667          0.966667
3          4           0.966667        0.966667          0.966667
4          5           0.966667        0.966667          0.966667


In [21]:
print("Weighted Scores")
print(df_results[["Max Depth", "Precision (Weighted)", "Recall (Weighted)", "F1 Score (Weighted)"]])

Weighted Scores
   Max Depth  Precision (Weighted)  Recall (Weighted)  F1 Score (Weighted)
0          1              0.500000           0.666667             0.555556
1          2              0.933333           0.933333             0.933333
2          3              0.969697           0.966667             0.966583
3          4              0.969697           0.966667             0.966583
4          5              0.969697           0.966667             0.966583


In [22]:
best_recall_depth = df_results.loc[df_results["Recall (Macro)"].idxmax(), "Max Depth"]
best_recall_value = df_results["Recall (Macro)"].max()

In [23]:
orst_precision_depth = df_results.loc[df_results["Precision (Macro)"].idxmin(), "Max Depth"]
worst_precision_value = df_results["Precision (Macro)"].min()


In [24]:
best_f1_depth = df_results.loc[df_results["F1 Score (Macro)"].idxmax(), "Max Depth"]
best_f1_value = df_results["F1 Score (Macro)"].max()

In [27]:
print(f"The highest recall occurs at max_depth = {best_recall_depth}, with a value of {best_recall_value:.4f}")
print(f"The wowest precision (Precision) occurs at max_depth = {worst_precision_depth}, with a value of {worst_precision_value:.4f}")
print(f"The best F1 score occurs at max_depth = {best_f1_depth}, with a value of {best_f1_value:.4f}\n")


The highest recall (Recall) occurs at max_depth = 3, with a value of 0.9667


NameError: name 'worst_precision_depth' is not defined

In [26]:
print("\nDifferences between micro average, macro average, and weighted average scoring methods:")
print("Micro Average: First calculate the total number of true positives(TP), false positives(FP), and false negatives(FN) across all categories, then compute the recall, precision, and F1 score based on these totals.")
print("Macro Average: Calculate the recall, precision, and F1 score for each category separately, and then take the average of these scores.")
print("Weighted Average: Calculate the recall, precision, and F1 score for each category separately, and then compute a weighted average of these scores based on the number of samples in each category.")



Differences between micro average, macro average, and weighted average scoring methods:
Micro Average: First calculate the total number of true positives(TP), false positives(FP), and false negatives(FN) across all categories, then compute the recall, precision, and F1 score based on these totals.
Macro Average: Calculate the recall, precision, and F1 score for each category separately, and then take the average of these scores.
Weighted Average: Calculate the recall, precision, and F1 score for each category separately, and then compute a weighted average of these scores based on the number of samples in each category.
