In [2]:
#### Calculate Accuracy, Precision, Recall, and Specificity using a Confusion Matrix ####

# Importing necessary libraries
import pandas as pd
from sklearn import tree
from sklearn.pipeline import Pipeline
from sklearn.tree import DecisionTreeClassifier, plot_tree
from sklearn.metrics import confusion_matrix
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder

# Import data
data_train = pd.read_csv("carseats_train.csv")
data_test = pd.read_csv("carseats_test.csv")

# Model piece
ct = ColumnTransformer(
  [
    ("dummify", OneHotEncoder(sparse_output = False), ["Shelf_Location", "Urban", "US"]),
  ],
  remainder = "passthrough"
)

X_train = data_train.drop(['Sales', 'Sales_cat'], axis=1)
y_train = data_train['Sales_cat'].astype('category').cat.codes
X_test = data_test.drop(['Sales', 'Sales_cat'], axis=1)
y_test = data_test['Sales_cat'].astype('category').cat.codes


## Classification Tree (Depth 1)
tree_pipeline = Pipeline(
  [("preprocessing", ct),
    ("tree", DecisionTreeClassifier(max_depth=1, min_samples_split=2, ccp_alpha=0, random_state=1234))]
).set_output(transform="pandas")

# Fit decision tree
tree_fitted = tree_pipeline.fit(X_train, y_train)

# Compute predictions
y_pred = tree_pipeline.predict(X_test)

# Print confusion matrix
print("Confusion matrix for test data:")
conf_mat = confusion_matrix(y_pred, y_test)
print(conf_mat)

print("Accuracy:")
print((conf_mat[0,0] + conf_mat[1,1])/len(y_test))

print("Precision:")
print(conf_mat[0,0]/(conf_mat[0,0] + conf_mat[0,1]))

print("Recall:")
print(conf_mat[0,0]/(conf_mat[0,0] + conf_mat[1,0]))

print("Specificity:")
print(conf_mat[1,1]/(conf_mat[1,1] + conf_mat[1,0]))

Confusion matrix for test data:
[[12  5]
 [21 43]]
Accuracy:
0.6790123456790124
Precision:
0.7058823529411765
Recall:
0.36363636363636365
Specificity:
0.671875
