# Imports

In [1]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from utils import load_data, predict_formality, confusion_matrix, calculate_metrics

# Loading the Data

Change the threshold value (0, 3) to alter the difficulty of the dataset. A low threshold signifies a more challenging classification task, while a high threshold will produce a simpler classification. 

Note that the threshold value also directly controls the size of the dataset (consult the documentation for an explanation of this). 

In [2]:
threshold = 1.5
_, test_df = load_data()

binary_df = test_df[test_df['avg_score'].abs() > threshold].copy()
binary_df['formal'] = binary_df['avg_score'].apply(lambda x: 1 if x > 0 else 0) # create a binary column for formality


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  binary_df['formal'] = binary_df['avg_score'].apply(lambda x: 1 if x > 0 else 0)


# Load Models and Generate Predictions

### XLM-RoBERTa Model

In [4]:
# Loading the model from Hugging Face
xlmr_tokenizer = AutoTokenizer.from_pretrained("s-nlp/xlmr_formality_classifier")
xlmr_model = AutoModelForSequenceClassification.from_pretrained("s-nlp/xlmr_formality_classifier")

tokenizer_config.json:   0%|          | 0.00/398 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.08M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/896 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.11G [00:00<?, ?B/s]

In [5]:
# Generating predictions on the binary (extreme) dataset
xlmr_predicted_labels, xlmr_predicted_logits = predict_formality(xlmr_model, xlmr_tokenizer, binary_df)

100%|██████████| 224/224 [13:15<00:00,  3.55s/it]


### DistilBERT Model

In [None]:
distilbert_tokenizer = AutoTokenizer.from_pretrained('s-nlp/mdistilbert-base-formality-ranker')
distilbert_model = AutoModelForSequenceClassification.from_pretrained('s-nlp/mdistilbert-base-formality-ranker')

In [None]:
# Generating predictions on the binary (extreme) dataset
distilbert_predicted_labels, distilbert_predicted_logits = predict_formality(distilbert_model, distilbert_tokenizer, binary_df)

### mDeBERTa Base Model

In [None]:
mdeberta_tokenizer = AutoTokenizer.from_pretrained('s-nlp/mdeberta-base-formality-ranker')
mdeberta_model = AutoModelForSequenceClassification.from_pretrained('s-nlp/mdeberta-base-formality-ranker')

In [None]:
mdeberta_predicted_labels, mdeberta_predicted_logits = predict_formality(mdeberta_model, mdeberta_tokenizer, binary_df)

### DeBERTa Large Model

In [None]:
# Loading the model from Hugging Face
deberta_large_tokenizer = AutoTokenizer.from_pretrained('s-nlp/deberta-large-formality-ranker')
deberta_large_model = AutoModelForSequenceClassification.from_pretrained('s-nlp/deberta-large-formality-ranker')

In [None]:
# Generating predictions on the binary (extreme) dataset
deberta_large_predicted_labels, deberta_large_predicted_logits = predict_formality(deberta_large_model, deberta_large_tokenizer, binary_df)

# Calculate Metrics (Binary Classification)

### XLM-RoBERTa Model

In [6]:
xlmr_confusion_matrix = confusion_matrix(binary_df['formal'].values, xlmr_predicted_labels)
xlmr_accuracy, xlmr_precision, xlmr_recall, xlmr_f1 = calculate_metrics(binary_df['formal'].values, xlmr_predicted_labels)

print("XLM-RoBERTa Model Confusion Matrix:")
print(xlmr_confusion_matrix)

print(f"XLM-RoBERTa Model Metrics:")
print(f"Accuracy: {xlmr_accuracy:.4f}")
print(f"Precision: {xlmr_precision:.4f}")
print(f"Recall: {xlmr_recall:.4f}")
print(f"F1 Score: {xlmr_f1:.4f}")

Confusion Matrix:
[[250 197]
 [  6 443]]
Accuracy: 0.7734375
Precision: 0.6921875
Recall: 0.9866369710467706
F1 Score: 0.8135904499540864


### DistilBERT Model

In [None]:
distilbert_confusion_matrix = confusion_matrix(binary_df['formal'].values, distilbert_predicted_labels)
distilbert_accuracy, distilbert_precision, distilbert_recall, distilbert_f1 = calculate_metrics(binary_df['formal'].values, distilbert_predicted_labels)

print("DistilBERT Model Confusion Matrix:")
print(distilbert_confusion_matrix)

print(f"DistilBERT Model Metrics:")
print(f"Accuracy: {distilbert_accuracy:.4f}")
print(f"Precision: {distilbert_precision:.4f}")
print(f"Recall: {distilbert_recall:.4f}")
print(f"F1 Score: {distilbert_f1:.4f}")

### mDeBERTA Base Model

In [None]:
mdeberta_confusion_matrix = confusion_matrix(binary_df['formal'].values, mdeberta_predicted_labels)
mdeberta_accuracy, mdeberta_precision, mdeberta_recall, mdeberta_f1 = calculate_metrics(binary_df['formal'].values, mdeberta_predicted_labels)

print("mDeBERTA Base Model Confusion Matrix:")
print(mdeberta_confusion_matrix)

print(f"mDeBERTA Base Model Metrics:")
print(f"Accuracy: {mdeberta_accuracy:.4f}")
print(f"Precision: {mdeberta_precision:.4f}")
print(f"Recall: {mdeberta_recall:.4f}")
print(f"F1 Score: {mdeberta_f1:.4f}")

### DeBERTa Large Model

In [None]:
deberta_large_confusion_matrix = confusion_matrix(binary_df['formal'].values, deberta_large_predicted_labels)
deberta_large_accuracy, deberta_large_precision, deberta_large_recall, deberta_large_f1 = calculate_metrics(binary_df['formal'].values, deberta_large_predicted_labels)

print("DeBERTa Large Model Confusion Matrix:")
print(deberta_large_confusion_matrix)

print(f"DeBERTa Large Model Metrics:")
print(f"Accuracy: {deberta_large_accuracy:.4f}")
print(f"Precision: {deberta_large_precision:.4f}")
print(f"Recall: {deberta_large_recall:.4f}")
print(f"F1 Score: {deberta_large_f1:.4f}")
