In [1]:
from datasets import load_dataset
import string
from datasets import load_dataset, DatasetDict , load_from_disk
from transformers import pipeline
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
import numpy as np

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
ds_punct = load_dataset("stanfordnlp/snli", cache_dir="D:\language resource\dataset\punctuation")

In [3]:
def remove_punctuation(text):
    return text.translate(str.maketrans('', '', string.punctuation))

In [4]:
ds_no_punct = DatasetDict()

for split in ['train', 'validation', 'test']:
    ds_no_punct[split] = ds_punct[split].map(
        lambda x: {
            'premise': remove_punctuation(x['premise']),
            'hypothesis': remove_punctuation(x['hypothesis']),
            'label': x['label'] 
        }
    )

ds_no_punct.save_to_disk(r"D:\language resource\dataset\no_punctuation")
ds_no_punct = load_from_disk(r"D:\language resource\dataset\no_punctuation")

Saving the dataset (1/1 shards): 100%|██████████| 550152/550152 [00:00<00:00, 2451110.39 examples/s]
Saving the dataset (1/1 shards): 100%|██████████| 10000/10000 [00:00<00:00, 1185903.64 examples/s]
Saving the dataset (1/1 shards): 100%|██████████| 10000/10000 [00:00<00:00, 1368451.55 examples/s]


In [5]:
pipe = pipeline("text-classification", model="agentlans/all-MiniLM-L6-v2-nli")
tokenizer = AutoTokenizer.from_pretrained("agentlans/all-MiniLM-L6-v2-nli")
model = AutoModelForSequenceClassification.from_pretrained("agentlans/all-MiniLM-L6-v2-nli")

Device set to use cuda:0


In [6]:
label_mapping = ["entailment", "neutral", "contradiction"]

def predict(premises, hypotheses):
    encoded_input = tokenizer(
        premises,
        hypotheses,
        padding=True,
        truncation=True,
        return_tensors="pt"
    )
    with torch.no_grad():
        outputs = model(**encoded_input)
    preds = outputs.logits.argmax(dim=1)
    return [label_mapping[i] for i in preds]

In [16]:
N = 3000
prem_punct = [ds_punct['validation'][i]['premise'] for i in range(N)]
hyp_punct = [ds_punct['validation'][i]['hypothesis'] for i in range(N)]

prem_no = [ds_no_punct['validation'][i]['premise'] for i in range(N)]
hyp_no = [ds_no_punct['validation'][i]['hypothesis'] for i in range(N)]

labels_punct = predict(prem_punct, hyp_punct)

labels_no = predict(prem_no, hyp_no)

In [None]:
flip_count = sum([1 for l1, l2 in zip(labels_punct, labels_no) if l1 != l2])
flip_rate = flip_count / N
print(f"Flip Rate: {flip_rate:.2f}")

label_mapping = ["entailment", "neutral", "contradiction"]

labels_true_int = [ds_punct['validation'][i]['label'] for i in range(N)]
labels_true = [label_mapping[l] for l in labels_true_int]

acc_punct = accuracy_score(labels_true, labels_punct)
acc_no = accuracy_score(labels_true, labels_no)

precision_punct, recall_punct, f1_punct, _ = precision_recall_fscore_support(labels_true, labels_punct, average='macro')
precision_no, recall_no, f1_no, _ = precision_recall_fscore_support(labels_true, labels_no, average='macro')

print(f"Accuracy: punct={acc_punct:.2f}, no punct={acc_no:.2f}")
print(f"F1: punct={f1_punct:.2f}, no punct={f1_no:.2f}")

Accuracy: punct=0.84, no punct=0.83
F1: punct=0.84, no punct=0.83


In [None]:
"""
N = 100
Flip Rate: 0.03
Accuracy: punct=0.86, no punct=0.87
F1: punct=0.86, no punct=0.87
N = 1000
Flip Rate: 0.04
Accuracy: punct=0.83, no punct=0.82
F1: punct=0.83, no punct=0.82
N = 2000
Flip Rate: 0.04
Accuracy: punct=0.84, no punct=0.83
F1: punct=0.84, no punct=0.83
N = 3000
"""

In [6]:
ds_sentiment_classification_punct = load_dataset("Sp1786/multiclass-sentiment-analysis-dataset")

In [7]:
def remove_punctuation(text):
    if text is None:
        return "" 
    return text.translate(str.maketrans('', '', string.punctuation))

ds_sentiment_classification_no_punct = DatasetDict()

for split in ['train', 'validation', 'test']:
    ds_sentiment_classification_no_punct[split] = ds_sentiment_classification_punct[split].map(
        lambda x: {'text': remove_punctuation(x['text'])}
    )

In [8]:
pipe_sentiment_classification = pipeline("text-classification", model="delarosajav95/tw-roberta-base-sentiment-FT-v2")
tokenizer_sentiment_classification = AutoTokenizer.from_pretrained("delarosajav95/tw-roberta-base-sentiment-FT-v2")
model_sentiment_classification = AutoModelForSequenceClassification.from_pretrained("delarosajav95/tw-roberta-base-sentiment-FT-v2")

Device set to use cuda:0


In [9]:
label_map = {
    "LABEL_0": 0,  # negative
    "LABEL_1": 1,  # neutral
    "LABEL_2": 2   # positive
}

def predict_sentiment(texts):
    outputs = pipe_sentiment_classification(
        texts,
        truncation=True,
        padding=True
    )
    return [label_map[o["label"]] for o in outputs]

In [18]:
N = 100

texts_punct = [
    ds_sentiment_classification_punct["test"][i]["text"]
    for i in range(N)
]

texts_no_punct = [
    ds_sentiment_classification_no_punct["test"][i]["text"]
    for i in range(N)
]

labels_true = [
    ds_sentiment_classification_punct["test"][i]["label"]
    for i in range(N)
]


In [19]:
labels_pred_punct = predict_sentiment(texts_punct)
labels_pred_no = predict_sentiment(texts_no_punct)

In [20]:
flip_rate = np.mean(
    np.array(labels_pred_punct) != np.array(labels_pred_no)
)

print(f"Flip Rate: {flip_rate:.2f}")


Flip Rate: 0.04


In [21]:
acc_punct = accuracy_score(labels_true, labels_pred_punct)
acc_no = accuracy_score(labels_true, labels_pred_no)

p_p, r_p, f1_p, _ = precision_recall_fscore_support(
    labels_true, labels_pred_punct, average="macro"
)
p_n, r_n, f1_n, _ = precision_recall_fscore_support(
    labels_true, labels_pred_no, average="macro"
)

print(f"Accuracy: punct={acc_punct:.3f}, no_punct={acc_no:.3f}")
print(f"F1: punct={f1_p:.3f}, no_punct={f1_n:.3f}")

Accuracy: punct=0.840, no_punct=0.810
F1: punct=0.840, no_punct=0.811


In [None]:
"""
Flip Rate: 0.05
Accuracy: punct=0.852, no_punct=0.830
F1: punct=0.854, no_punct=0.832
"""