# 1. Import Libraries & Data

In [1]:
import joblib
import pandas as pd
from sklearn.metrics import accuracy_score, roc_auc_score, confusion_matrix, classification_report

In [2]:
# load new dataset
gen_data = pd.read_pickle('/home/maria/Desktop/Deception_project/0. Final_Scripts/Generalisation/kaggle_movaver_dataset.pkl')

# 2. Generalisation

In [3]:
# load model
clf = joblib.load('/home/maria/Desktop/Deception_project/0. Final_Scripts/Generalisation/rf_final_model_v2.pkl')
print('Model loaded.')

# prepare features
X_new = gen_data.drop(columns=['label', 'face_id', 'video_id'])
X_new = X_new.select_dtypes(include=['number']).to_numpy().astype('float32')

y_true = gen_data['label'].map({'lie': 0, 'truth': 1}).values

# predict class and probability
y_pred = clf.predict(X_new)
y_proba = clf.predict_proba(X_new)[:, 1]

print("Predictions:", y_pred[:10])
print("Probabilities:", y_proba[:10])

# metrics
acc = accuracy_score(y_true, y_pred)
auc = roc_auc_score(y_true, y_proba)
cm = confusion_matrix(y_true, y_pred)
report = classification_report(y_true, y_pred, target_names=['lie', 'truth'])

print(f"Accuracy: {acc*100:.2f}%")
print(f"AUC: {auc:.4f}")
print("Confusion Matrix:")
print(cm)
print("Classification Report:")
print(report)

Model loaded.
Predictions: [1 1 1 1 1 0 1 0 0 0]
Probabilities: [0.52510476 0.63833333 0.6143381  0.55176667 0.56303333 0.46433333
 0.53956667 0.46746667 0.45823333 0.47236667]
Accuracy: 47.50%
AUC: 0.4229
Confusion Matrix:
[[28 33]
 [30 29]]
Classification Report:
              precision    recall  f1-score   support

         lie       0.48      0.46      0.47        61
       truth       0.47      0.49      0.48        59

    accuracy                           0.47       120
   macro avg       0.48      0.48      0.47       120
weighted avg       0.48      0.47      0.47       120



# Results:
 - Accuracy: 47.50%
 - AUC: 0.4229