In [1]:
%load_ext autoreload
%autoreload 2
%env CUDA_VISIBLE_DEVICES=5

env: CUDA_VISIBLE_DEVICES=5


In [2]:
import torch
from tqdm import tqdm
from util import read_dataset
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler
import numpy as np
from sklearn.metrics import classification_report

In [3]:
if torch.cuda.is_available():    
    device = torch.device("cuda")
    print('There are %d GPU(s) available.' % torch.cuda.device_count())    
    print(f'We will use the GPU:{torch.cuda.get_device_name()} ({device})')

else:
    print('NO GPU AVAILABLE ERROR')
    device = torch.device("cpu")

There are 1 GPU(s) available.
We will use the GPU:Tesla V100-SXM2-32GB (cuda)


In [4]:
training_file = "tam_sentiment"
print(f"Training file:{training_file}")
tam_labels_train, tam_texts_train = read_dataset(training_file+"_train.tsv")
tam_labels_dev, tam_texts_dev = read_dataset(training_file+"_dev.tsv")

Training file:tam_sentiment
Texts: 35656
Label names: Index(['Mixed_feelings', 'Negative', 'Positive', 'not-Tamil', 'unknown_state'], dtype='object')
Texts: 3962
Label names: Index(['Mixed_feelings', 'Negative', 'Positive', 'not-Tamil', 'unknown_state'], dtype='object')


In [5]:
from transformers import BertTokenizer, BertForSequenceClassification, AdamW, get_linear_schedule_with_warmup
tokenizer = BertTokenizer.from_pretrained('bert-base-multilingual-cased')
model = BertForSequenceClassification.from_pretrained("bert-base-multilingual-cased", num_labels=5, output_attentions=True)
model.to(device)
optimizer = AdamW(model.parameters(), lr = 2e-5)

Some weights of the model checkpoint at bert-base-multilingual-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model ch

In [6]:
def tokenize_input(texts):
    input_ids = []
    attention_masks = []

    for text in texts:
        encoded_dict = tokenizer.encode_plus(
                            text,            
                            add_special_tokens = True,
                            max_length = 512,
                            padding = 'max_length',
                            return_attention_mask = True,
                            truncation=True,
                            return_tensors = 'pt')
    
        input_ids.append(encoded_dict['input_ids'])
        attention_masks.append(encoded_dict['attention_mask'])

    return input_ids, attention_masks

In [7]:
train_input_ids, train_attention_masks =  tokenize_input(tam_texts_train)
val_input_ids, val_attention_masks =  tokenize_input(tam_texts_dev)

In [8]:
train_input_ids = torch.cat(train_input_ids, dim=0)
train_attention_masks = torch.cat(train_attention_masks, dim=0)
train_labels = torch.tensor(tam_labels_train, dtype=torch.long)

val_input_ids = torch.cat(val_input_ids, dim=0)
val_attention_masks = torch.cat(val_attention_masks, dim=0)
val_labels = torch.tensor(tam_labels_dev, dtype=torch.long)

train_dataset = TensorDataset(train_input_ids, train_attention_masks, train_labels)
val_dataset = TensorDataset(val_input_ids, val_attention_masks, val_labels)

In [9]:
batch_size = 24

train_dataloader = DataLoader(
            train_dataset,
            sampler = RandomSampler(train_dataset),
            batch_size = batch_size)

validation_dataloader = DataLoader(
            val_dataset,
            sampler = SequentialSampler(val_dataset),
            batch_size = batch_size)

In [10]:
epochs = 4

total_steps = len(train_dataloader) * epochs

scheduler = get_linear_schedule_with_warmup(optimizer, 
                                            num_warmup_steps = 0,
                                            num_training_steps = total_steps)

In [11]:
for epoch_i in range(0, epochs):
    print("")
    print('======== Epoch {:} / {:} ========'.format(epoch_i + 1, epochs))
    print('Training...')
    total_train_loss = 0
    model.train()

    pbar = tqdm(enumerate(train_dataloader), total=len(train_dataloader), desc='train')


    for step, batch in pbar:
        model.zero_grad()        

        b_input_ids = batch[0].to(device)
        b_input_mask = batch[1].to(device)
        b_labels = batch[2].to(device)


        outputs = model(input_ids=b_input_ids, attention_mask=b_input_mask, labels=b_labels)
        
        total_train_loss += outputs.loss.item()

        outputs.loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
        optimizer.step()
        scheduler.step()
        avg_train_loss = total_train_loss / len(train_dataloader)
        mem = torch.cuda.memory_reserved(device)/1E9 if torch.cuda.is_available() else 0
        pbar.set_postfix(train_loss=f'{avg_train_loss:0.4f}',
                        gpu_mem=f'{mem:0.2f} GB')           

    print("Average training loss: {0:.2f}".format(avg_train_loss))
    print("Running Validation...")

    vbar = tqdm(enumerate(validation_dataloader), total=len(validation_dataloader), desc='valid')


    model.eval()
    total_eval_accuracy = 0
    total_eval_loss = 0
    nb_eval_steps = 0
    true_labels = []
    pred_labels = []
    for step, batch in vbar:
        b_input_ids = batch[0].to(device)
        b_input_mask = batch[1].to(device)
        b_labels = batch[2].to(device)
        with torch.no_grad(): 
            outputs = model(input_ids=b_input_ids, 
                                            attention_mask=b_input_mask,
                                            labels=b_labels)
            total_eval_loss += outputs.loss.item()
            logits = outputs.logits.detach().cpu().numpy().tolist()
            label_ids = b_labels.to('cpu').numpy().tolist()

            true_labels.extend(label_ids)
            pred_labels.extend(np.argmax(logits,axis=1))
        
    print(classification_report(pred_labels, true_labels))

train:   0%|          | 0/1486 [00:00<?, ?it/s]


Training...


train: 100%|██████████| 1486/1486 [18:28<00:00,  1.34it/s, gpu_mem=26.29 GB, train_loss=1.0569]
valid:   0%|          | 0/166 [00:00<?, ?it/s]

Average training loss: 1.06
Running Validation...


valid: 100%|██████████| 166/166 [00:41<00:00,  4.03it/s]
train:   0%|          | 0/1486 [00:00<?, ?it/s]

              precision    recall  f1-score   support

           0       0.09      0.64      0.16        61
           1       0.31      0.49      0.38       302
           2       0.88      0.70      0.78      2826
           3       0.48      0.69      0.57       124
           4       0.44      0.42      0.43       649

    accuracy                           0.64      3962
   macro avg       0.44      0.59      0.46      3962
weighted avg       0.74      0.64      0.67      3962


Training...


train: 100%|██████████| 1486/1486 [18:30<00:00,  1.34it/s, gpu_mem=26.29 GB, train_loss=0.8977]
valid:   0%|          | 0/166 [00:00<?, ?it/s]

Average training loss: 0.90
Running Validation...


valid: 100%|██████████| 166/166 [00:41<00:00,  4.02it/s]
train:   0%|          | 0/1486 [00:00<?, ?it/s]

              precision    recall  f1-score   support

           0       0.11      0.48      0.18       102
           1       0.30      0.52      0.38       275
           2       0.92      0.68      0.78      3019
           3       0.57      0.67      0.62       150
           4       0.35      0.52      0.42       416

    accuracy                           0.65      3962
   macro avg       0.45      0.58      0.48      3962
weighted avg       0.78      0.65      0.70      3962


Training...


train: 100%|██████████| 1486/1486 [18:30<00:00,  1.34it/s, gpu_mem=26.29 GB, train_loss=0.7738]
valid:   0%|          | 0/166 [00:00<?, ?it/s]

Average training loss: 0.77
Running Validation...


valid: 100%|██████████| 166/166 [00:41<00:00,  4.02it/s]
train:   0%|          | 0/1486 [00:00<?, ?it/s]

              precision    recall  f1-score   support

           0       0.13      0.39      0.19       147
           1       0.46      0.44      0.45       493
           2       0.85      0.74      0.79      2599
           3       0.62      0.65      0.64       167
           4       0.42      0.46      0.44       556

    accuracy                           0.65      3962
   macro avg       0.49      0.54      0.50      3962
weighted avg       0.70      0.65      0.67      3962


Training...


train: 100%|██████████| 1486/1486 [18:30<00:00,  1.34it/s, gpu_mem=26.29 GB, train_loss=0.6658]
valid:   0%|          | 0/166 [00:00<?, ?it/s]

Average training loss: 0.67
Running Validation...


valid: 100%|██████████| 166/166 [00:41<00:00,  4.02it/s]

              precision    recall  f1-score   support

           0       0.18      0.36      0.24       222
           1       0.38      0.48      0.43       378
           2       0.86      0.73      0.79      2671
           3       0.59      0.66      0.62       157
           4       0.39      0.45      0.42       534

    accuracy                           0.64      3962
   macro avg       0.48      0.53      0.50      3962
weighted avg       0.70      0.64      0.67      3962






In [1]:
import torch
from tqdm import tqdm
from util import read_dataset, tokenize_input
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler
import numpy as np
from sklearn.metrics import classification_report
from transformers import AutoModelForSequenceClassification, AutoTokenizer, AdamW, get_linear_schedule_with_warmup, XLMRobertaTokenizer, XLMRobertaForSequenceClassification
import os
from dataset import *


In [None]:
os.environ["CUDA_VISIBLE_DEVICES"]="5"

if torch.cuda.is_available():    
    device = torch.device("cuda")
    print('There are %d GPU(s) available.' % torch.cuda.device_count())    
    print(f'We will use the GPU:{torch.cuda.get_device_name()} ({device})')

else:
    print('NO GPU AVAILABLE ERROR')
    device = torch.device("cpu")

In [2]:
tokenizer = AutoTokenizer.from_pretrained('sentence-transformers/paraphrase-xlm-r-multilingual-v1')
model = AutoModelForSequenceClassification.from_pretrained("sentence-transformers/paraphrase-xlm-r-multilingual-v1", num_labels=5, output_attentions=True)
model.to(device)
optimizer = AdamW(model.parameters(), lr = 2e-5)

Some weights of the model checkpoint at sentence-transformers/paraphrase-xlm-r-multilingual-v1 were not used when initializing XLMRobertaForSequenceClassification: ['pooler.dense.weight', 'pooler.dense.bias']
- This IS expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at sentence-transformers/paraphrase-xlm-r-multilingual-v1 and are newly initialized: ['classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.dense.weight', 'classifier.out_pro

NameError: name 'device' is not defined

In [None]:
tokenizer.