# Import libraries 

In [14]:
import sys
sys.path.append('../scripts/')
from model_training_script import *
from data_loading_script import *
from model_evaluation_script import *
from sklearn.tree import plot_tree
import matplotlib.pyplot as plt
import pickle 
import warnings
warnings.filterwarnings("ignore")

## Load models and Data

In [2]:
decision_tree = load_model("../models/decision_tree.pkl")
naive_bayes = load_model("../models/naive_bayes.pkl")

with open(f'../data/cleaned_data/tfidf_vectorizer.pkl', 'rb') as f:
    tfidf_vectorizer = pickle.load(f)
    
with open(f'../data/cleaned_data/label_encoder.pkl', 'rb') as f:
    label_encoder = pickle.load(f)

In [3]:
X_test, y_test = load_npy_data(path='../data/cleaned_data',files=['X_test','y_test'])

In [4]:
decision_tree

In [5]:
naive_bayes

# Visualizing a Decision Tree

In [6]:
feature_names = tfidf_vectorizer.get_feature_names_out().tolist()
class_names = label_encoder.classes_.tolist()

plt.figure(figsize=(300, 300))
plot_tree(
    decision_tree, 
    feature_names=feature_names,
    class_names=class_names,
    filled=True,
    fontsize=14
)
plt.savefig('../models/decision_tree.png')
plt.show()

# Evaluate model

In [15]:
 accuracy, classification_report, confusion_matrix, precision, recall, f1_score = evaluate_model(decision_tree, X_test, y_test)

## Accuracy

In [16]:
print(f"Accuracy: {accuracy:.2f}")

Accuracy: 0.66


## Confusion Matrix

In [17]:
print(confusion_matrix)

[[19  5  0  2  0  0  0  4  0  0  1]
 [ 6 55  0  0  0  0  1  6  0  0  0]
 [ 3  2  5  0  0  0  1  3  0  0  0]
 [ 0  0  0  0  0  0  0  0  0  0  0]
 [ 2  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  2  0  1  0  0  0  0  0]
 [ 2  0  1  0  0  0  2  0  0  0  0]
 [ 7  8  1  0  0  0  0 29  1  0  1]
 [ 0  0  0  0  0  0  1  0  0  0  0]
 [ 2  0  0  0  0  0  0  1  0  9  1]
 [ 1  1  0  0  0  1  0  1  0  0 12]]


## Classification Report

In [18]:
print(classification_report)

              precision    recall  f1-score   support

           1       0.45      0.61      0.52        31
           2       0.77      0.81      0.79        68
           8       0.71      0.36      0.48        14
          10       0.00      0.00      0.00         0
          11       0.00      0.00      0.00         2
          12       0.50      0.33      0.40         3
          14       0.40      0.40      0.40         5
          15       0.66      0.62      0.64        47
          16       0.00      0.00      0.00         1
          20       1.00      0.69      0.82        13
          24       0.80      0.75      0.77        16

    accuracy                           0.66       200
   macro avg       0.48      0.42      0.44       200
weighted avg       0.68      0.66      0.66       200



# Precision

In [19]:
print(f"Precision: {precision:.2f}")

Precision: 0.66


## Recall

In [20]:
print(f"Recall: {recall:.2f}")


Recall: 0.66


## F1-Score

In [21]:
print(f"F1-Score: {f1_score:.2f}")

F1-Score: 0.66
