In [None]:
from classifier import DialogueActClassifierFactory
from joblib import load
from metrics import ConfusionMatrixGenerator
from pandas import DataFrame, option_context, read_csv
from pathlib import Path


pcc_clf = load('./models/program_comprehension_challenge_classifier.pickle') 
dac_factory = DialogueActClassifierFactory()
dac_clf = dac_factory.get_classifier(classifier_file=Path('./models/dialogue_act_classifier.pickle'), test_set_percentage=10)

training_dataset = read_csv('../master-of-engineering/Assets/BigQuery/training_dataset.csv')
test_dataset = read_csv('../master-of-engineering/Assets/BigQuery/test_dataset.csv')

FEATURES = ['body', 'dialogue_act_classification_ml', 'comment_is_by_author']
LABEL = 'program_comprehension_challenge'

X_train = training_dataset[FEATURES]
X_test = test_dataset[FEATURES]
y_train = training_dataset[LABEL]
y_true = test_dataset[LABEL]

# Dialogue Act Classifier Performance

In [None]:
precisions, recalls = dac_factory.get_precision_and_recall()
precisions_recalls = [precisions, recalls]

dac_report = {}
dac_report[''] = [None, None, dac_factory.get_accuracy()]
for label in precisions.keys():
  dac_report[label] = [i[label] for i in precisions_recalls]

df = DataFrame.from_dict(dac_report, orient='index', columns=['Precision', 'Recall', 'Accuracy'])

df

In [None]:
dac_cm = dac_factory.get_confusion_matrix()
ConfusionMatrixGenerator.print_confusion_matrix(dac_cm._confusion, dac_cm._values, number_formatting='.1f', font_size=8)

# Program Comprehension Challenge Classifier

In [None]:
pcc_clf.fit(X_train, y_train)
y_pred = pcc_clf.predict(X_test)

In [None]:
from sklearn import metrics

report = metrics.classification_report(y_true, y_pred, digits=8)
print(report)
cm = metrics.confusion_matrix(y_true, y_pred, labels=["Yes", "No"])

In [None]:
ConfusionMatrixGenerator.print_confusion_matrix(cm, ["Yes", "No"], figsize=[4, 4], number_formatting='.2f', font_size=12)