In [1]:
import pandas as pd

# Wczytanie danych z pliku CSV do obiektu DataFrame za pomocą biblioteki pandas
data = pd.read_csv("mushrooms.csv")

In [2]:
from numpy import ndarray

def prepare_data_sets(number_of_used_attributes: int) -> list[ndarray, ndarray]:
    """Prepares data sets (columns/attributes) and labels for testing..

    Args:
        number_of_used_attributes (int): number of attributes from we'll create datasets.

    Returns:
        list[ndarray, ndarray]: dataset and target ndarray.
    """
    try:
        dataset = data.values[:, 1:number_of_used_attributes]
        target = data.values[:, 0]
        return dataset, target
    except Exception as error:
        print(f"An unexpected error: {error} while preparing data sets.")

In [3]:
attributes: int = 22  # maximum value that we can test.
dataset, target = prepare_data_sets(attributes)

In [4]:
from sklearn import preprocessing

def convert_labels_and_attributes_to_numeric_values(number_of_used_attributes: int) -> None:
    """Convert labels (edible/non-edible) and attributes to numeric values using LabelEncoder
    
    Args:
        number_of_used_attributes (int): number of attributes from we'll create datasets.
    """
    global target, dataset
    try:
        label_encoder = preprocessing.LabelEncoder()
        target = label_encoder.fit_transform(target)
        for x in range(0, number_of_used_attributes - 1):
            dataset[:, x] = label_encoder.fit_transform(dataset[:, x])
    except Exception as error:
        print(f"An unexpected error: {error} while converting labels to numeric values.")

In [5]:
convert_labels_and_attributes_to_numeric_values(attributes)

In [6]:
from sklearn.model_selection import train_test_split

def splits_data_for_testing(_test_size: float) -> list:
    """Splits the data into a training set and a test set.

    Args:
        _test_size (float): proportion of the dataset to include in the train split.

    Returns:
        list: list of training and testing data and target.
    """
    try:
        training_data, testing_data, training_target, testing_target = \
            train_test_split(dataset, target.reshape(-1, 1), test_size=_test_size)
        return training_data, testing_data, training_target, testing_target
    except Exception as error:
        print(f"An unexpected error: {error} while splitting data for testing.")

In [7]:
training_data, testing_data, training_target, testing_target = \
    splits_data_for_testing(0.1)

In [8]:
from sklearn.tree import DecisionTreeClassifier

def train_decision_tree_model(_max_depth: int) -> ndarray:
    """Initializes and trains of the decision tree model.

    Args:
        _max_depth (int): max depth of the decision tree.

    Returns:
        ndarray: the predicted class for each sample in X is returned.
    """
    global training_data, training_target, testing_data
    try:
        decision_tree = DecisionTreeClassifier(criterion='entropy', max_depth=_max_depth)
        decision_tree.fit(training_data, training_target)
        return decision_tree.predict(testing_data)
    except Exception as error:
        print(f"An unexpected error: {error} while training decision tree model.")

In [9]:
decision_predict_tree_array = train_decision_tree_model(8)

In [10]:
from sklearn.metrics import confusion_matrix

def print_confusion_matrix() -> None:
    """Prints confusion matrix."""
    try:
        print("Confusion Matrix:")
        print(f"{confusion_matrix(testing_target, decision_predict_tree_array)}\n")
    except Exception as error:
        print(f"An unexpected error: {error} while printing confusion matrix.")

In [11]:
from sklearn.metrics import accuracy_score

def print_accuracy_score() -> None:
    """Prints accuracy score."""
    try:
        print("Accuracy score:")
        print(accuracy_score(testing_target, decision_predict_tree_array))
    except Exception as error:
        print(f"An unexpected error: {error} while printing accuracy score.")

In [12]:
# testing different value of decision tree max depth, we no need to prepare dataset, and target cause we created it before

try:
    max_depth_to_test: int = 8
    for _ in range(max_depth_to_test):
        print(f"\nActual tree max depth: {max_depth_to_test - _}: \n")
        decision_predict_tree_array = train_decision_tree_model(max_depth_to_test - _)
        print_confusion_matrix()
        print_accuracy_score()
        
except IndexError as index_error:
    print(f"IndexError: {index_error} while testing max depth.")
except Exception as error:
    print(f"An unexpected error: {error} while testing max depth.")


Actual tree max depth: 8: 

Confusion Matrix:
[[436   0]
 [  0 377]]

Accuracy score:
1.0

Actual tree max depth: 7: 

Confusion Matrix:
[[436   0]
 [  0 377]]

Accuracy score:
1.0

Actual tree max depth: 6: 

Confusion Matrix:
[[433   3]
 [  5 372]]

Accuracy score:
0.990159901599016

Actual tree max depth: 5: 

Confusion Matrix:
[[431   5]
 [ 13 364]]

Accuracy score:
0.977859778597786

Actual tree max depth: 4: 

Confusion Matrix:
[[436   0]
 [ 27 350]]

Accuracy score:
0.966789667896679

Actual tree max depth: 3: 

Confusion Matrix:
[[418  18]
 [  5 372]]

Accuracy score:
0.971709717097171

Actual tree max depth: 2: 

Confusion Matrix:
[[422  14]
 [ 54 323]]

Accuracy score:
0.9163591635916359

Actual tree max depth: 1: 

Confusion Matrix:
[[436   0]
 [207 170]]

Accuracy score:
0.7453874538745388
