This notebook makes the predictions for the DistilBERT model on the Ott et al. dataset and calculates the evaluation metrics accordingly. This is the main reference: https://huggingface.co/docs/transformers/tasks/sequence_classification

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!pip install transformers datasets torch sentencepiece

Collecting transformers
  Downloading transformers-4.33.2-py3-none-any.whl (7.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.6/7.6 MB[0m [31m68.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting datasets
  Downloading datasets-2.14.5-py3-none-any.whl (519 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m519.6/519.6 kB[0m [31m47.2 MB/s[0m eta [36m0:00:00[0m
Collecting sentencepiece
  Downloading sentencepiece-0.1.99-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m77.7 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub<1.0,>=0.15.1 (from transformers)
  Downloading huggingface_hub-0.17.2-py3-none-any.whl (294 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m294.9/294.9 kB[0m [31m33.6 MB/s[0m eta [36m0:00:00[0m
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1 (from transformers)
  Downloading tokenizers-0.13.3-cp310-

In [None]:
!huggingface-cli login


    _|    _|  _|    _|    _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|_|_|_|    _|_|      _|_|_|  _|_|_|_|
    _|    _|  _|    _|  _|        _|          _|    _|_|    _|  _|            _|        _|    _|  _|        _|
    _|_|_|_|  _|    _|  _|  _|_|  _|  _|_|    _|    _|  _|  _|  _|  _|_|      _|_|_|    _|_|_|_|  _|        _|_|_|
    _|    _|  _|    _|  _|    _|  _|    _|    _|    _|    _|_|  _|    _|      _|        _|    _|  _|        _|
    _|    _|    _|_|      _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|        _|    _|    _|_|_|  _|_|_|_|
    
    To login, `huggingface_hub` requires a token generated from https://huggingface.co/settings/tokens .
Token: 
Add token as git credential? (Y/n) n
Token is valid (permission: read).
Your token has been saved to /root/.cache/huggingface/token
Login successful


Loading the model:

In [None]:
import pandas as pd
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    DataCollatorWithPadding,
    pipeline
)
from datasets import load_dataset, load_metric
import torch
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

In [None]:
model_name = "dbauer1860/distilbert-base-fake-hotel-review-detector"

tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")
model = AutoModelForSequenceClassification.from_pretrained(model_name)

Downloading (…)okenizer_config.json:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/483 [00:00<?, ?B/s]

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/268M [00:00<?, ?B/s]

In [None]:
classifier = pipeline('text-classification', model=model, tokenizer=tokenizer)

# Ott et al. Dataset

In [None]:
ott_dataset = load_dataset(
    "csv",
    data_files= "/content/drive/MyDrive/data/ott_et_al_processed.csv",
    delimiter=";"
)

In [None]:
ott_dataset['train']

Dataset({
    features: ['text', 'label'],
    num_rows: 1600
})

In [None]:
predictions = classifier(ott_dataset['train']['text'], max_length=512, truncation=True)

In [None]:
for prediction in predictions:
    print(f"Label: {prediction['label']}, Score: {prediction['score']}")

Label: REAL, Score: 0.9872697591781616
Label: REAL, Score: 0.9990527033805847
Label: REAL, Score: 0.9990529417991638
Label: REAL, Score: 0.9991476535797119
Label: REAL, Score: 0.99893718957901
Label: REAL, Score: 0.999082088470459
Label: REAL, Score: 0.9990625977516174
Label: REAL, Score: 0.9989809393882751
Label: REAL, Score: 0.9990137815475464
Label: REAL, Score: 0.9990899562835693
Label: REAL, Score: 0.9990819692611694
Label: REAL, Score: 0.9990787506103516
Label: REAL, Score: 0.9989707469940186
Label: REAL, Score: 0.9989302754402161
Label: REAL, Score: 0.9986068606376648
Label: REAL, Score: 0.9991331696510315
Label: REAL, Score: 0.9984367489814758
Label: REAL, Score: 0.9983528852462769
Label: REAL, Score: 0.9990696310997009
Label: REAL, Score: 0.9990260601043701
Label: REAL, Score: 0.9988837838172913
Label: REAL, Score: 0.9966962337493896
Label: REAL, Score: 0.9990768432617188
Label: REAL, Score: 0.9990221261978149
Label: REAL, Score: 0.9989903569221497
Label: REAL, Score: 0.998934

In [None]:
predicted_labels = [prediction['label'] for prediction in predictions]
scores = [prediction['score'] for prediction in predictions]

In [None]:
label_dict = {'FAKE': 0, 'REAL': 1}
predicted_labels_numeric = [label_dict[label] for label in predicted_labels]
true_labels = ott_dataset['train']['label']

In [None]:
accuracy = accuracy_score(true_labels, predicted_labels_numeric)
precision = precision_score(true_labels, predicted_labels_numeric, average='binary')
recall = recall_score(true_labels, predicted_labels_numeric, average='binary')
f1 = f1_score(true_labels, predicted_labels_numeric, average='binary')

print(f"Accuracy: {accuracy}")
print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1 Score: {f1}")

Accuracy: 0.50375
Precision: 0.5018844221105527
Recall: 0.99875
F1 Score: 0.6680602006688964
