In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelBinarizer
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score
from sklearn import tree

## Load the data

In [None]:
data = pd.read_csv('mushrooms40.csv')

# Column encoding

In [None]:
for col in data.columns:
    data[col] = LabelBinarizer().fit_transform(data[col])

## Extract test/train data

In [None]:
y = data['class'].values
X = data.drop(columns='class').values
feature_names = data.drop(columns='class').columns

Xtrain, Xtest, ytrain, ytest = train_test_split(X, y, test_size = 0.3)

## Build and train the decision tree

In [None]:
model = DecisionTreeClassifier()
model.fit(Xtrain, ytrain)

In [None]:
plt.figure(figsize=(10,8))
tree.plot_tree(model,
               fontsize=10,
               feature_names=feature_names,
               label='all',
               filled=True,
               rounded=True,
               impurity=True,
               class_names={0:'$\U0001F635$',1:'$\U0001F60B$'});

## Training performance

In [None]:
yhat = model.predict(Xtrain)
print(confusion_matrix(ytrain, yhat))
print(f'Training accuracy: {accuracy_score(ytrain, yhat):.2f}')

## Test performance

In [None]:
yhat = model.predict(Xtest)
print(confusion_matrix(ytest, yhat))
print(f'Training accuracy: {accuracy_score(ytest, yhat):.2f}')