# 1. Import Libraries & Data

In [1]:
import joblib
import pandas as pd
from sklearn.metrics import accuracy_score, roc_auc_score, confusion_matrix, classification_report

In [2]:
# load new dataset
gen_data = pd.read_pickle('/home/maria/Desktop/Deception_project/0. Final_Scripts/Generalisation/kaggle_movaver_dataset.pkl')

# 2. Generalisation

In [3]:
# load model
clf = joblib.load('/home/maria/Desktop/Deception_project/0. Final_Scripts/Generalisation/xgb_final_model_v2.pkl')
print('Model loaded.')

# prepare features
X_new = gen_data.drop(columns=['label', 'face_id', 'video_id'])
X_new = X_new.select_dtypes(include=['number']).to_numpy().astype('float32')

y_true = gen_data['label'].map({'lie': 0, 'truth': 1}).values

# predict class and probability
y_pred = clf.predict(X_new)
y_proba = clf.predict_proba(X_new)[:, 1]

print("Predictions:", y_pred[:10])
print("Probabilities:", y_proba[:10])

# metrics
acc = accuracy_score(y_true, y_pred)
auc = roc_auc_score(y_true, y_proba)
cm = confusion_matrix(y_true, y_pred)
report = classification_report(y_true, y_pred, target_names=['lie', 'truth'])

print(f"Accuracy: {acc*100:.2f}%")
print(f"AUC: {auc:.4f}")
print("Confusion Matrix:")
print(cm)
print("Classification Report:")
print(report)


Model loaded.
Predictions: [1 1 1 1 1 1 1 1 0 0]
Probabilities: [0.6701525  0.67593944 0.7673479  0.66910136 0.72292376 0.5927751
 0.5344016  0.53627795 0.4324088  0.28229168]
Accuracy: 55.00%
AUC: 0.5293
Confusion Matrix:
[[37 24]
 [30 29]]
Classification Report:
              precision    recall  f1-score   support

         lie       0.55      0.61      0.58        61
       truth       0.55      0.49      0.52        59

    accuracy                           0.55       120
   macro avg       0.55      0.55      0.55       120
weighted avg       0.55      0.55      0.55       120



# Results:
 - slightly better than random
 - AUC: 0.5293
 - the model did not separate lie vs truth well on the kaggle dataset
 - many misclassifications â†’ model does not capture patterns reliably
 - lie is predicted slightly better than truth (recall 0.61 vs 0.49)