In [21]:
!pip install torch torchvision torchaudio
!pip install transformers
!pip install git-lfs
!pip install evaluate
!pip install accelerate
!pip install --upgrade scikit-learn
# User must have git-lsf installed

In [22]:
from transformers import pipeline
import torch
from transformers import AutoTokenizer
import pandas as pd
from datasets import Dataset
from transformers import DataCollatorWithPadding
from transformers import AutoModelForSequenceClassification
import numpy as np
import evaluate
from huggingface_hub import notebook_login,create_repo
from transformers import TrainingArguments, Trainer
import os
import sklearn

In [23]:
torch.cuda.is_available()

True

In [24]:
df_dataset = pd.read_csv("data/tweets_formatted.csv")
df_dataset = df_dataset[["text", "airline_sentiment"]]
df_dataset = df_dataset.rename(columns={"text": "text", "airline_sentiment": "label"})
df_dataset = df_dataset.replace({'label': {"negative": 0, "neutral": 1, "positive": 2}})

df_train = df_dataset.sample(frac = 0.75)
df_test = df_dataset.drop(df_train.index)


train_dict = df_train.to_dict("list")
test_dict = df_test.to_dict("list")

train_dataset = Dataset.from_dict(train_dict)
test_dataset = Dataset.from_dict(test_dict)



print(len(train_dataset))
print(len(test_dataset))
train_dataset[0]

{'text': '@AmericanAir Thanks to AA for the upgrade today and getting me on a new flight after my first one was Cancelled Flightled!',
 'label': 2}

In [38]:
small_train_dataset = train_dataset#.shuffle(seed=1).select([i for i in list(range(3000))])
small_test_dataset = test_dataset#.shuffle(seed=1).select([i for i in list(range(300))])

In [107]:
tokenizer = AutoTokenizer.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment-latest")

Downloading (…)lve/main/config.json:   0%|          | 0.00/929 [00:00<?, ?B/s]

Downloading (…)olve/main/vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

Downloading (…)olve/main/merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

In [108]:
def preprocess_function(examples):
    return tokenizer(examples["text"], truncation=True)
 
tokenized_train = small_train_dataset.map(preprocess_function, batched=True)
tokenized_test = small_test_dataset.map(preprocess_function, batched=True)

Map:   0%|          | 0/8417 [00:00<?, ? examples/s]

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Map:   0%|          | 0/2806 [00:00<?, ? examples/s]

In [109]:
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

In [110]:
model = AutoModelForSequenceClassification.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment-latest", num_labels=3, 
                                                           id2label={
                                                                "0": "negative",
                                                                "1": "neutral",
                                                                "2": "positive"
                                                           })

Downloading pytorch_model.bin:   0%|          | 0.00/501M [00:00<?, ?B/s]

Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [111]:
def compute_metrics(eval_pred):
    load_accuracy = evaluate.load("accuracy")
    load_f1 = evaluate.load("f1")
  
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    accuracy = load_accuracy.compute(predictions=predictions, references=labels)
    f1 = load_f1.compute(predictions=predictions, references=labels, average="weighted")
    return {"accuracy": accuracy, "f1": f1}

In [44]:
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [114]:
os.environ["TOKENIZERS_PARALLELISM"] = "false"
repo_name = "ma_wme_sentiment_analysis_BERT"
#create_repo(repo_name, private=True)

training_args = TrainingArguments(
   output_dir=repo_name,
   learning_rate=1e-5,
   per_device_train_batch_size=16,
   per_device_eval_batch_size=16,
   num_train_epochs=2,
   weight_decay=0.01,
   save_strategy="epoch",
   push_to_hub=True,
)
 
trainer = Trainer(
   model=model,
   args=training_args,
   train_dataset=tokenized_train,
   eval_dataset=tokenized_test,
   tokenizer=tokenizer,
   data_collator=data_collator,
   compute_metrics=compute_metrics,
)

/data1/home/mael.vial/ma_wme/projet/ml/ma_wme_sentiment_analysis_BERT is already a clone of https://huggingface.co/mael110/ma_wme_sentiment_analysis_BERT. Make sure you pull the latest changes with `repo.git_pull()`.


In [115]:
trainer.train()



Step,Training Loss
500,0.379




TrainOutput(global_step=528, training_loss=0.3770296609762943, metrics={'train_runtime': 211.6012, 'train_samples_per_second': 79.555, 'train_steps_per_second': 2.495, 'total_flos': 403160159354274.0, 'train_loss': 0.3770296609762943, 'epoch': 2.0})

In [116]:
trainer.evaluate()



{'eval_loss': 0.3352523148059845,
 'eval_accuracy': {'accuracy': 0.8763364219529579},
 'eval_f1': {'f1': 0.8761547472066853},
 'eval_runtime': 7.3663,
 'eval_samples_per_second': 380.923,
 'eval_steps_per_second': 11.946,
 'epoch': 2.0}

# Test with new data

In [117]:
from transformers import pipeline

In [118]:
trainer.push_to_hub()

Several commits (2) will be pushed upstream.
The progress bars may be unreliable.
To https://huggingface.co/mael110/ma_wme_sentiment_analysis_BERT
   410aba3..1a6a2ea  main -> main



In [119]:
sentiment_model = pipeline("text-classification", model="ma_wme_sentiment_analysis")
sentiment_model(["I flew United last month and the experience was AWESOME!", "is flight 587 from DFW to ORD currently on-time?", "@united AND my luggage has been broken!! #youcouldntmakethis up #brokenwheel"])

[{'label': 'positive', 'score': 0.9289741516113281},
 {'label': 'neutral', 'score': 0.8915866613388062},
 {'label': 'negative', 'score': 0.9768679141998291}]