In [15]:
import pandas as pd

# Wczytanie danych z pliku CSV do obiektu DataFrame za pomocÄ… biblioteki pandas
data = pd.read_csv("mushrooms.csv")

In [16]:
from numpy import ndarray

def prepare_data_sets(number_of_used_attributes: int) -> list[ndarray, ndarray]:
    """Prepares data sets (columns/attributes) and labels for testing..

    Args:
        number_of_used_attributes (int): number of attributes from we'll create datasets.

    Returns:
        list[ndarray, ndarray]: dataset and target ndarray.
    """
    try:
        dataset = data.values[:, 1:number_of_used_attributes]
        target = data.values[:, 0]
        return dataset, target
    except Exception as error:
        print(f"An unexpected error: {error} while preparing data sets.")

In [17]:
attributes: int = 22  # maximum value that we can test.
dataset, target = prepare_data_sets(attributes)

In [18]:
from sklearn import preprocessing

def convert_labels_and_attributes_to_numeric_values(number_of_used_attributes: int) -> None:
    """Convert labels (edible/non-edible) and attributes to numeric values using LabelEncoder
    
    Args:
        number_of_used_attributes (int): number of attributes from we'll create datasets.
    """
    global target, dataset
    try:
        label_encoder = preprocessing.LabelEncoder()
        target = label_encoder.fit_transform(target)
        for x in range(0, number_of_used_attributes - 1):
            dataset[:, x] = label_encoder.fit_transform(dataset[:, x])
    except Exception as error:
        print(f"An unexpected error: {error} while converting labels to numeric values.")

In [19]:
convert_labels_and_attributes_to_numeric_values(attributes)

In [20]:
from sklearn.model_selection import train_test_split

def splits_data_for_testing(_test_size: float) -> list:
    """Splits the data into a training set and a test set.

    Args:
        _test_size (float): proportion of the dataset to include in the train split.

    Returns:
        list: list of training and testing data and target.
    """
    try:
        training_data, testing_data, training_target, testing_target = \
            train_test_split(dataset, target.reshape(-1, 1), test_size=_test_size)
        return training_data, testing_data, training_target, testing_target
    except Exception as error:
        print(f"An unexpected error: {error} while splitting data for testing.")

In [21]:
training_data, testing_data, training_target, testing_target = \
    splits_data_for_testing(0.1)

In [22]:
from sklearn.tree import DecisionTreeClassifier

def train_decision_tree_model(_max_depth: int) -> ndarray:
    """Initializes and trains of the decision tree model.

    Args:
        _max_depth (int): max depth of the decision tree.

    Returns:
        ndarray: the predicted class for each sample in X is returned.
    """
    global training_data, training_target, testing_data
    try:
        decision_tree = DecisionTreeClassifier(criterion='entropy', max_depth=_max_depth)
        decision_tree.fit(training_data, training_target)
        return decision_tree.predict(testing_data)
    except Exception as error:
        print(f"An unexpected error: {error} while training decision tree model.")

In [23]:
decision_predict_tree_array = train_decision_tree_model(8)

In [24]:
from sklearn.metrics import confusion_matrix

def print_confusion_matrix() -> None:
    """Prints confusion matrix."""
    try:
        print("Confusion Matrix:")
        print(confusion_matrix(testing_target, decision_predict_tree_array))
    except Exception as error:
        print(f"An unexpected error: {error} while printing confusion matrix.")

In [25]:
from sklearn.metrics import accuracy_score

def print_accuracy_score() -> None:
    """Prints accuracy score."""
    try:
        print("Accuracy score:")
        print(accuracy_score(testing_target, decision_predict_tree_array))
    except Exception as error:
        print(f"An unexpected error: {error} while printing accuracy score.")

In [26]:
print_confusion_matrix()
print_accuracy_score()

Confusion Matrix:
[[394   0]
 [  0 419]]
Accuracy score:
1.0
