In [1]:
from transformers import RobertaTokenizerFast, Trainer, TrainingArguments, RobertaForSequenceClassification
import numpy as np
import pandas as pd
from datasets import Dataset
from sklearn.metrics import classification_report
import evaluate

In [2]:
MODEL = "roberta-base"
tokenizer = RobertaTokenizerFast.from_pretrained(MODEL)
TRAIN_DATA ='../Data/train.csv'
EVAL_DATA = '../Data/validate.csv'
TEST_DATA = '../Data/test.csv'

In [3]:
pd.set_option('display.max_colwidth', None)
df_train = pd.read_csv(TRAIN_DATA, encoding='latin')
df_eval = pd.read_csv(EVAL_DATA, encoding='latin')
df_test = pd.read_csv(TEST_DATA, encoding='latin')
df_train

Unnamed: 0,target,text
0,-1,my roomie called to inform me someone tried to break into our apartment when she was there today..awesome.
1,-1,i would of got the 16gb iphone but i didnt have the extra $100
2,-1,just stay home and boring day
3,1,been voting for the eu parliament and the heritage rules of the danish monarchy. then a spinning class. feel good about myself...
4,1,ooooh sbs2!! that's exciting and relevant to my media audiences research into psbs - look forward to checking it out
...,...,...
9995,1,my man has both sides..with me he's the sweetest... just dont f*ck with me cuz then theres troble.i'll take both sides please.
9996,-1,omg why is this weather so disgusting today???? looks like i'm going to have to pull out the rainboots
9997,-1,is watching the green mile... does not want john coffey to die
9998,1,am i one of the 1st 100 to tweet it? i'd really like to demo 2.0


In [4]:
def data_preprocess(df):
    df['target'].replace({-1:0},inplace=True)
    df.rename(columns={'target':'label'}, inplace=True)
    return df

df_train = data_preprocess(df_train)
df_eval = data_preprocess(df_eval)
df_test = data_preprocess(df_test)

df_train

Unnamed: 0,label,text
0,0,my roomie called to inform me someone tried to break into our apartment when she was there today..awesome.
1,0,i would of got the 16gb iphone but i didnt have the extra $100
2,0,just stay home and boring day
3,1,been voting for the eu parliament and the heritage rules of the danish monarchy. then a spinning class. feel good about myself...
4,1,ooooh sbs2!! that's exciting and relevant to my media audiences research into psbs - look forward to checking it out
...,...,...
9995,1,my man has both sides..with me he's the sweetest... just dont f*ck with me cuz then theres troble.i'll take both sides please.
9996,0,omg why is this weather so disgusting today???? looks like i'm going to have to pull out the rainboots
9997,0,is watching the green mile... does not want john coffey to die
9998,1,am i one of the 1st 100 to tweet it? i'd really like to demo 2.0


In [5]:
print(df_train['label'].value_counts())
print(df_eval['label'].value_counts())
print(df_test['label'].value_counts())

0    5014
1    4986
Name: label, dtype: int64
0    1012
1     988
Name: label, dtype: int64
0    2521
1    2479
Name: label, dtype: int64


In [6]:
def tokenize_function(examples):
    return tokenizer(examples["text"], padding="max_length", truncation=True)

dataset_train = Dataset.from_pandas(df_train)
dataset_eval = Dataset.from_pandas(df_eval)
dataset_test = Dataset.from_pandas(df_test)
dataset_train[0]

{'label': 0,
 'text': 'my roomie called to inform me someone tried to break into our apartment when she was there today..awesome. '}

In [7]:
train_dataset = dataset_train.map(tokenize_function, batched=True)
eval_dataset = dataset_eval.map(tokenize_function, batched=True)
test_dataset = dataset_test.map(tokenize_function, batched=True)
train_dataset

Map:   0%|          | 0/10000 [00:00<?, ? examples/s]

Map:   0%|          | 0/2000 [00:00<?, ? examples/s]

Map:   0%|          | 0/5000 [00:00<?, ? examples/s]

Dataset({
    features: ['label', 'text', 'input_ids', 'attention_mask'],
    num_rows: 10000
})

In [8]:
metric = evaluate.combine(["accuracy", "f1", "precision", "recall"])
model = RobertaForSequenceClassification.from_pretrained(MODEL, num_labels=2)

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return metric.compute(predictions=predictions, references=labels)



Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.dense.bias', 'lm_head.layer_norm.bias', 'roberta.pooler.dense.bias', 'lm_head.decoder.weight', 'lm_head.dense.weight', 'roberta.pooler.dense.weight', 'lm_head.layer_norm.weight', 'lm_head.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.weight', 'classifie

In [29]:
training_args = TrainingArguments(output_dir="test_trainer", evaluation_strategy="epoch")

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    compute_metrics=compute_metrics,
)
trainer.train()



  0%|          | 0/3750 [00:00<?, ?it/s]

{'loss': 0.6128, 'learning_rate': 4.3333333333333334e-05, 'epoch': 0.4}
{'loss': 0.5213, 'learning_rate': 3.6666666666666666e-05, 'epoch': 0.8}


  0%|          | 0/250 [00:00<?, ?it/s]

{'eval_loss': 0.47290724515914917, 'eval_accuracy': 0.808, 'eval_f1': 0.8102766798418972, 'eval_precision': 0.7915057915057915, 'eval_recall': 0.8299595141700404, 'eval_runtime': 20.8341, 'eval_samples_per_second': 95.996, 'eval_steps_per_second': 12.0, 'epoch': 1.0}
{'loss': 0.4466, 'learning_rate': 3e-05, 'epoch': 1.2}
{'loss': 0.4322, 'learning_rate': 2.3333333333333336e-05, 'epoch': 1.6}
{'loss': 0.4301, 'learning_rate': 1.6666666666666667e-05, 'epoch': 2.0}


  0%|          | 0/250 [00:00<?, ?it/s]

{'eval_loss': 0.5027541518211365, 'eval_accuracy': 0.8325, 'eval_f1': 0.8261546445251687, 'eval_precision': 0.8477103301384451, 'eval_recall': 0.805668016194332, 'eval_runtime': 20.4716, 'eval_samples_per_second': 97.696, 'eval_steps_per_second': 12.212, 'epoch': 2.0}
{'loss': 0.3274, 'learning_rate': 1e-05, 'epoch': 2.4}
{'loss': 0.342, 'learning_rate': 3.3333333333333333e-06, 'epoch': 2.8}


  0%|          | 0/250 [00:00<?, ?it/s]

{'eval_loss': 0.5597289204597473, 'eval_accuracy': 0.8395, 'eval_f1': 0.8384499245093106, 'eval_precision': 0.8338338338338338, 'eval_recall': 0.8431174089068826, 'eval_runtime': 20.2336, 'eval_samples_per_second': 98.845, 'eval_steps_per_second': 12.356, 'epoch': 3.0}
{'train_runtime': 988.0571, 'train_samples_per_second': 30.363, 'train_steps_per_second': 3.795, 'train_loss': 0.43804378458658855, 'epoch': 3.0}


TrainOutput(global_step=3750, training_loss=0.43804378458658855, metrics={'train_runtime': 988.0571, 'train_samples_per_second': 30.363, 'train_steps_per_second': 3.795, 'train_loss': 0.43804378458658855, 'epoch': 3.0})

In [30]:
predictions = trainer.predict(test_dataset)

  0%|          | 0/625 [00:00<?, ?it/s]

In [45]:
preds = predictions.predictions.argmax(-1)
labels = pd.Series(preds).map({0:'negative',1:'positive'})
scores = (np.exp(predictions[0])/np.exp(predictions[0]).sum(-1,keepdims=True)).max(1)

In [47]:
df = pd.DataFrame(list(zip(preds,labels,scores)), columns=['pred','label','score'])
df

Unnamed: 0,pred,label,score
0,0,negative,0.780179
1,1,positive,0.988955
2,1,positive,0.989522
3,0,negative,0.954995
4,0,negative,0.980910
...,...,...,...
4995,1,positive,0.986783
4996,1,positive,0.989224
4997,1,positive,0.962709
4998,0,negative,0.978101


In [48]:
y_pred = preds
y_true = test_dataset['label']

In [49]:
print(classification_report(y_true, y_pred, target_names=['negative','positive'], digits=4))

              precision    recall  f1-score   support

    negative       0.85      0.84      0.84      2521
    positive       0.84      0.85      0.84      2479

    accuracy                           0.84      5000
   macro avg       0.84      0.84      0.84      5000
weighted avg       0.84      0.84      0.84      5000



In [None]:
model = RobertaForSequenceClassification.from_pretrained(MODEL, num_labels=2)
training_args = TrainingArguments(output_dir="test_trainer", evaluation_strategy="epoch", learning_rate=0.0001, per_device_train_batch_size=8)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    compute_metrics=compute_metrics,
)
trainer.train()

predictions = trainer.predict(test_dataset)
y_pred = predictions.predictions.argmax(-1)
y_true = test_dataset['label']

print(classification_report(y_true, y_pred, target_names=['negative','positive'], digits=4))

In [12]:
model = RobertaForSequenceClassification.from_pretrained(MODEL, num_labels=2)
training_args = TrainingArguments(output_dir="test_trainer", evaluation_strategy="epoch", learning_rate=0.00005, per_device_train_batch_size=8)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    compute_metrics=compute_metrics,
)
trainer.train()

predictions = trainer.predict(test_dataset)
y_pred = predictions.predictions.argmax(-1)
y_true = test_dataset['label']

print(classification_report(y_true, y_pred, target_names=['negative','positive'], digits=4))

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.dense.bias', 'lm_head.layer_norm.bias', 'roberta.pooler.dense.bias', 'lm_head.decoder.weight', 'lm_head.dense.weight', 'roberta.pooler.dense.weight', 'lm_head.layer_norm.weight', 'lm_head.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.weight', 'classifie

  0%|          | 0/3750 [00:00<?, ?it/s]

{'loss': 0.6128, 'learning_rate': 4.3333333333333334e-05, 'epoch': 0.4}
{'loss': 0.5213, 'learning_rate': 3.6666666666666666e-05, 'epoch': 0.8}


  0%|          | 0/250 [00:00<?, ?it/s]

{'eval_loss': 0.47290724515914917, 'eval_accuracy': 0.808, 'eval_f1': 0.8102766798418972, 'eval_precision': 0.7915057915057915, 'eval_recall': 0.8299595141700404, 'eval_runtime': 20.2973, 'eval_samples_per_second': 98.535, 'eval_steps_per_second': 12.317, 'epoch': 1.0}
{'loss': 0.4466, 'learning_rate': 3e-05, 'epoch': 1.2}
{'loss': 0.4322, 'learning_rate': 2.3333333333333336e-05, 'epoch': 1.6}
{'loss': 0.4301, 'learning_rate': 1.6666666666666667e-05, 'epoch': 2.0}


  0%|          | 0/250 [00:00<?, ?it/s]

{'eval_loss': 0.5027541518211365, 'eval_accuracy': 0.8325, 'eval_f1': 0.8261546445251687, 'eval_precision': 0.8477103301384451, 'eval_recall': 0.805668016194332, 'eval_runtime': 20.4844, 'eval_samples_per_second': 97.635, 'eval_steps_per_second': 12.204, 'epoch': 2.0}
{'loss': 0.3274, 'learning_rate': 1e-05, 'epoch': 2.4}
{'loss': 0.342, 'learning_rate': 3.3333333333333333e-06, 'epoch': 2.8}


  0%|          | 0/250 [00:00<?, ?it/s]

{'eval_loss': 0.5597289204597473, 'eval_accuracy': 0.8395, 'eval_f1': 0.8384499245093106, 'eval_precision': 0.8338338338338338, 'eval_recall': 0.8431174089068826, 'eval_runtime': 20.7411, 'eval_samples_per_second': 96.427, 'eval_steps_per_second': 12.053, 'epoch': 3.0}
{'train_runtime': 978.0621, 'train_samples_per_second': 30.673, 'train_steps_per_second': 3.834, 'train_loss': 0.43804378458658855, 'epoch': 3.0}
              precision    recall  f1-score   support

    negative       0.50      1.00      0.67      2521
    positive       0.00      0.00      0.00      2479

    accuracy                           0.50      5000
   macro avg       0.25      0.50      0.34      5000
weighted avg       0.25      0.50      0.34      5000



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [None]:
model = RobertaForSequenceClassification.from_pretrained(MODEL, num_labels=2)
training_args = TrainingArguments(output_dir="test_trainer", evaluation_strategy="epoch", learning_rate=0.00001, per_device_train_batch_size=8)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    compute_metrics=compute_metrics,
)
trainer.train()

predictions = trainer.predict(test_dataset)
y_pred = predictions.predictions.argmax(-1)
y_true = test_dataset['label']

print(classification_report(y_true, y_pred, target_names=['negative','positive'], digits=4))

In [None]:
model = RobertaForSequenceClassification.from_pretrained(MODEL, num_labels=2)
training_args = TrainingArguments(output_dir="test_trainer", evaluation_strategy="epoch", learning_rate=0.0001, per_device_train_batch_size=6)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    compute_metrics=compute_metrics,
)
trainer.train()

predictions = trainer.predict(test_dataset)
y_pred = predictions.predictions.argmax(-1)
y_true = test_dataset['label']

print(classification_report(y_true, y_pred, target_names=['negative','positive'], digits=4))

In [None]:
model = RobertaForSequenceClassification.from_pretrained(MODEL, num_labels=2)
training_args = TrainingArguments(output_dir="test_trainer", evaluation_strategy="epoch", learning_rate=0.00005, per_device_train_batch_size=6)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    compute_metrics=compute_metrics,
)
trainer.train()

predictions = trainer.predict(test_dataset)
y_pred = predictions.predictions.argmax(-1)
y_true = test_dataset['label']

print(classification_report(y_true, y_pred, target_names=['negative','positive'], digits=4))

In [None]:
model = RobertaForSequenceClassification.from_pretrained(MODEL, num_labels=2)
training_args = TrainingArguments(output_dir="test_trainer", evaluation_strategy="epoch", learning_rate=0.00001, per_device_train_batch_size=6)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    compute_metrics=compute_metrics,
)
trainer.train()

predictions = trainer.predict(test_dataset)
y_pred = predictions.predictions.argmax(-1)
y_true = test_dataset['label']

print(classification_report(y_true, y_pred, target_names=['negative','positive'], digits=4))

In [None]:
model = RobertaForSequenceClassification.from_pretrained(MODEL, num_labels=2)
training_args = TrainingArguments(output_dir="test_trainer", evaluation_strategy="epoch", learning_rate=0.0001, per_device_train_batch_size=10)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    compute_metrics=compute_metrics,
)
trainer.train()

predictions = trainer.predict(test_dataset)
y_pred = predictions.predictions.argmax(-1)
y_true = test_dataset['label']

print(classification_report(y_true, y_pred, target_names=['negative','positive'], digits=4))

In [13]:
model = RobertaForSequenceClassification.from_pretrained(MODEL, num_labels=2)
training_args = TrainingArguments(output_dir="test_trainer", evaluation_strategy="epoch", learning_rate=0.00005, per_device_train_batch_size=10)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    compute_metrics=compute_metrics,
)
trainer.train()

predictions = trainer.predict(test_dataset)
y_pred = predictions.predictions.argmax(-1)
y_true = test_dataset['label']

print(classification_report(y_true, y_pred, target_names=['negative','positive'], digits=4))

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.dense.bias', 'lm_head.layer_norm.bias', 'roberta.pooler.dense.bias', 'lm_head.decoder.weight', 'lm_head.dense.weight', 'roberta.pooler.dense.weight', 'lm_head.layer_norm.weight', 'lm_head.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.weight', 'classifie

  0%|          | 0/3000 [00:00<?, ?it/s]

{'loss': 0.5824, 'learning_rate': 4.166666666666667e-05, 'epoch': 0.5}
{'loss': 0.5006, 'learning_rate': 3.3333333333333335e-05, 'epoch': 1.0}


  0%|          | 0/250 [00:00<?, ?it/s]

{'eval_loss': 0.5520062446594238, 'eval_accuracy': 0.804, 'eval_f1': 0.8140417457305503, 'eval_precision': 0.7660714285714286, 'eval_recall': 0.868421052631579, 'eval_runtime': 20.8512, 'eval_samples_per_second': 95.918, 'eval_steps_per_second': 11.99, 'epoch': 1.0}
{'loss': 0.4286, 'learning_rate': 2.5e-05, 'epoch': 1.5}
{'loss': 0.3769, 'learning_rate': 1.6666666666666667e-05, 'epoch': 2.0}


  0%|          | 0/250 [00:00<?, ?it/s]

{'eval_loss': 0.4454420804977417, 'eval_accuracy': 0.827, 'eval_f1': 0.8214654282765737, 'eval_precision': 0.8378947368421052, 'eval_recall': 0.805668016194332, 'eval_runtime': 20.3518, 'eval_samples_per_second': 98.271, 'eval_steps_per_second': 12.284, 'epoch': 2.0}
{'loss': 0.294, 'learning_rate': 8.333333333333334e-06, 'epoch': 2.5}
{'loss': 0.3045, 'learning_rate': 0.0, 'epoch': 3.0}


  0%|          | 0/250 [00:00<?, ?it/s]

{'eval_loss': 0.542754054069519, 'eval_accuracy': 0.8345, 'eval_f1': 0.8298200514138818, 'eval_precision': 0.8432601880877743, 'eval_recall': 0.8168016194331984, 'eval_runtime': 20.5197, 'eval_samples_per_second': 97.467, 'eval_steps_per_second': 12.183, 'epoch': 3.0}
{'train_runtime': 982.8651, 'train_samples_per_second': 30.523, 'train_steps_per_second': 3.052, 'train_loss': 0.4145046234130859, 'epoch': 3.0}


  0%|          | 0/625 [00:00<?, ?it/s]

              precision    recall  f1-score   support

    negative       0.83      0.85      0.84      2521
    positive       0.84      0.82      0.83      2479

    accuracy                           0.84      5000
   macro avg       0.84      0.84      0.84      5000
weighted avg       0.84      0.84      0.84      5000



In [None]:
model = RobertaForSequenceClassification.from_pretrained(MODEL, num_labels=2)
training_args = TrainingArguments(output_dir="test_trainer", evaluation_strategy="epoch", learning_rate=0.00001, per_device_train_batch_size=10)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    compute_metrics=compute_metrics,
)
trainer.train()

predictions = trainer.predict(test_dataset)
y_pred = predictions.predictions.argmax(-1)
y_true = test_dataset['label']

print(classification_report(y_true, y_pred, target_names=['negative','positive'], digits=4))