# Imports

In [18]:
import pandas as pd

from transformers import AutoTokenizer, AutoModelForSequenceClassification
from utils import load_data, predict_formality, confusion_matrix, calculate_metrics

# Loading the Data

Change the threshold value (0, 3) to alter the difficulty of the dataset. A low threshold signifies a more challenging classification task, while a high threshold will produce a simpler classification.

Note that the threshold value also directly controls the size of the dataset (consult the documentation for an explanation of this).

In [19]:
threshold = 1.5
train_df, test_df = load_data()

df = pd.concat([train_df, test_df])

binary_df = df[df['avg_score'].abs() > threshold].copy()
binary_df['formal'] = binary_df['avg_score'].apply(lambda x: 1 if x > 0 else 0) # cr

# Load Models and Generate Predictions

### XLM-RoBERTa Model

In [20]:
# Loading the model from Hugging Face
xlmr_tokenizer = AutoTokenizer.from_pretrained("s-nlp/xlmr_formality_classifier")
xlmr_model = AutoModelForSequenceClassification.from_pretrained("s-nlp/xlmr_formality_classifier")

In [21]:
# Generating predictions on the binary (extreme) dataset
xlmr_predicted_labels, xlmr_predicted_logits = predict_formality(xlmr_model, xlmr_tokenizer, binary_df)

Using device: cuda


100%|██████████| 843/843 [01:34<00:00,  8.95it/s]


### DistilBERT Model

In [22]:
distilbert_tokenizer = AutoTokenizer.from_pretrained('s-nlp/mdistilbert-base-formality-ranker')
distilbert_model = AutoModelForSequenceClassification.from_pretrained('s-nlp/mdistilbert-base-formality-ranker')

In [23]:
# Generating predictions on the binary (extreme) dataset
distilbert_predicted_labels, distilbert_predicted_logits = predict_formality(distilbert_model, distilbert_tokenizer, binary_df, return_token_type_ids=False)

Using device: cuda


100%|██████████| 843/843 [00:52<00:00, 16.07it/s]


### mDeBERTa Base Model

In [3]:
mdeberta_tokenizer = AutoTokenizer.from_pretrained('s-nlp/mdeberta-base-formality-ranker')
mdeberta_model = AutoModelForSequenceClassification.from_pretrained('s-nlp/mdeberta-base-formality-ranker')

In [12]:
mdeberta_predicted_labels, mdeberta_predicted_logits = predict_formality(mdeberta_model, mdeberta_tokenizer, binary_df, padding=True, truncate=False)

Using device: cuda


100%|██████████| 143/143 [00:05<00:00, 24.27it/s]


### DeBERTa Large Model

In [24]:
# Loading the model from Hugging Face
deberta_large_tokenizer = AutoTokenizer.from_pretrained('s-nlp/deberta-large-formality-ranker')
deberta_large_model = AutoModelForSequenceClassification.from_pretrained('s-nlp/deberta-large-formality-ranker')

In [25]:
# Generating predictions on the binary (extreme) dataset
deberta_large_predicted_labels, deberta_large_predicted_logits = predict_formality(deberta_large_model, deberta_large_tokenizer, binary_df, batch_size=1)

Using device: cuda


100%|██████████| 3369/3369 [10:21<00:00,  5.42it/s]


# Calculate Metrics (Binary Classification)

### XLM-RoBERTa Model

In [26]:
xlmr_confusion_matrix = confusion_matrix(binary_df['formal'].values, xlmr_predicted_labels)
xlmr_accuracy, xlmr_precision, xlmr_recall, xlmr_f1 = calculate_metrics(binary_df['formal'].values, xlmr_predicted_labels)

print("XLM-RoBERTa Model Confusion Matrix:")
print(xlmr_confusion_matrix)

print(f"XLM-RoBERTa Model Metrics:")
print(f"Accuracy: {xlmr_accuracy:.4f}")
print(f"Precision: {xlmr_precision:.4f}")
print(f"Recall: {xlmr_recall:.4f}")
print(f"F1 Score: {xlmr_f1:.4f}")

XLM-RoBERTa Model Confusion Matrix:
[[1267  642]
 [  11 1449]]
XLM-RoBERTa Model Metrics:
Accuracy: 0.8062
Precision: 0.6930
Recall: 0.9925
F1 Score: 0.8161


### DistilBERT Model

In [27]:
distilbert_confusion_matrix = confusion_matrix(binary_df['formal'].values, distilbert_predicted_labels)
distilbert_accuracy, distilbert_precision, distilbert_recall, distilbert_f1 = calculate_metrics(binary_df['formal'].values, distilbert_predicted_labels)

print("DistilBERT Model Confusion Matrix:")
print(distilbert_confusion_matrix)

print(f"DistilBERT Model Metrics:")
print(f"Accuracy: {distilbert_accuracy:.4f}")
print(f"Precision: {distilbert_precision:.4f}")
print(f"Recall: {distilbert_recall:.4f}")
print(f"F1 Score: {distilbert_f1:.4f}")

DistilBERT Model Confusion Matrix:
[[1305  604]
 [  18 1442]]
DistilBERT Model Metrics:
Accuracy: 0.8154
Precision: 0.7048
Recall: 0.9877
F1 Score: 0.8226


### mDeBERTA Base Model

In [28]:
mdeberta_confusion_matrix = confusion_matrix(binary_df['formal'].values, mdeberta_predicted_labels)
mdeberta_accuracy, mdeberta_precision, mdeberta_recall, mdeberta_f1 = calculate_metrics(binary_df['formal'].values, mdeberta_predicted_labels)

print("mDeBERTA Base Model Confusion Matrix:")
print(mdeberta_confusion_matrix)

print(f"mDeBERTA Base Model Metrics:")
print(f"Accuracy: {mdeberta_accuracy:.4f}")
print(f"Precision: {mdeberta_precision:.4f}")
print(f"Recall: {mdeberta_recall:.4f}")
print(f"F1 Score: {mdeberta_f1:.4f}")

ValueError: Found input variables with inconsistent numbers of samples: [3369, 572]

### DeBERTa Large Model

In [29]:
deberta_large_confusion_matrix = confusion_matrix(binary_df['formal'].values, deberta_large_predicted_labels)
deberta_large_accuracy, deberta_large_precision, deberta_large_recall, deberta_large_f1 = calculate_metrics(binary_df['formal'].values, deberta_large_predicted_labels)

print("DeBERTa Large Model Confusion Matrix:")
print(deberta_large_confusion_matrix)

print(f"DeBERTa Large Model Metrics:")
print(f"Accuracy: {deberta_large_accuracy:.4f}")
print(f"Precision: {deberta_large_precision:.4f}")
print(f"Recall: {deberta_large_recall:.4f}")
print(f"F1 Score: {deberta_large_f1:.4f}")


DeBERTa Large Model Confusion Matrix:
[[1477  432]
 [  22 1438]]
DeBERTa Large Model Metrics:
Accuracy: 0.8652
Precision: 0.7690
Recall: 0.9849
F1 Score: 0.8637
