In [1]:
import torch
from tqdm import tqdm
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler
import numpy as np
from sklearn.metrics import classification_report
from transformers import AutoModelForSequenceClassification, AutoTokenizer, AdamW, get_linear_schedule_with_warmup, XLMRobertaTokenizer, XLMRobertaForSequenceClassification
import os
from dataset import MTL_Dataset
import transformers
from model import MultitaskModel
from data_trainer import *
from datasets import load_dataset

In [2]:
LEARNING_RATE = 3e-5

EPOCHS = 4
BATCH_SIZE = 24
os.environ["CUDA_VISIBLE_DEVICES"]="2,3,4"

if torch.cuda.is_available():    
    device = torch.device("cuda")
    print('There are %d GPU(s) available.' % torch.cuda.device_count())    
    print(f'We will use the GPU:{torch.cuda.get_device_name()} ({device})')

else:
    print('NO GPU AVAILABLE ERROR')
    device = torch.device("cpu")

There are 3 GPU(s) available.
We will use the GPU:Tesla V100-SXM2-32GB (cuda)


In [3]:
tokenizer = AutoTokenizer.from_pretrained('sentence-transformers/paraphrase-xlm-r-multilingual-v1')

model_name = "sentence-transformers/paraphrase-xlm-r-multilingual-v1"

In [4]:
multitask_model = MultitaskModel.create(
    model_name=model_name,
    model_type_dict={
        #"kan_sentiment": transformers.AutoModelForSequenceClassification,
        #"mal_sentiment": transformers.AutoModelForSequenceClassification,
        "tam_sentiment": transformers.AutoModelForSequenceClassification,
        # "eng_phobia": transformers.AutoModelForSequenceClassification,
         "tam_phobia": transformers.AutoModelForSequenceClassification,
        # "mal_phobia": transformers.AutoModelForSequenceClassification,
        # "eng_tam_phobia": transformers.AutoModelForSequenceClassification
    },
    model_config_dict={
        #"kan_sentiment": transformers.AutoConfig.from_pretrained(model_name, num_labels=5),
        #"mal_sentiment": transformers.AutoConfig.from_pretrained(model_name, num_labels=5),
        "tam_sentiment": transformers.AutoConfig.from_pretrained(model_name, num_labels=5),
        # "eng_phobia": transformers.AutoConfig.from_pretrained(model_name, num_labels=3),
         "tam_phobia": transformers.AutoConfig.from_pretrained(model_name, num_labels=3),
        # "mal_phobia": transformers.AutoConfig.from_pretrained(model_name, num_labels=3),
        # "eng_tam_phobia": transformers.AutoConfig.from_pretrained(model_name, num_labels=3)
    },
)

Some weights of the model checkpoint at sentence-transformers/paraphrase-xlm-r-multilingual-v1 were not used when initializing XLMRobertaForSequenceClassification: ['pooler.dense.bias', 'pooler.dense.weight']
- This IS expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at sentence-transformers/paraphrase-xlm-r-multilingual-v1 and are newly initialized: ['classifier.dense.weight', 'classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_pro

In [5]:
dataset_dict = {
    'kan_sentiment': nlp.load_dataset('csv', delimiter='\t', data_files={'train': "../task_a/data/new_kan_train.tsv", 'test': "../task_a/data/kan_sentiment_dev.tsv"}),
    'mal_sentiment': nlp.load_dataset('csv', delimiter='\t', data_files={'train': "../task_a/data/new_mal_train.tsv", 'test': "../task_a/data/Mal_sentiment_dev.tsv"}),
    'tam_sentiment': nlp.load_dataset('csv', delimiter='\t', data_files={'train': "../task_a/data/new_tam_train.tsv", 'test': "../task_a/data/tam_sentiment_dev.tsv"}),

#    'eng_phobia': nlp.load_dataset('csv', delimiter='\t', data_files={'train': "../task_b/data/eng_3_train.tsv", 'test': "../task_b/data/eng_3_dev.tsv"}),
#    'tam_phobia': nlp.load_dataset('csv', delimiter='\t', data_files={'train': "../task_b/data/new_tam_train.tsv", 'test': "../task_b/data/tam_3_dev.tsv"}),
#    'mal_phobia': nlp.load_dataset('csv', delimiter='\t', data_files={'train': "../task_b/data/new_mal_train.tsv", 'test': "../task_b/data/mal_3_dev.tsv"}),
#    'eng_tam_phobia': nlp.load_dataset('csv', delimiter='\t', data_files={'train': "../task_b/data/new_eng_tam_train.tsv", 'test': "../task_b/data/eng-tam_3_dev.tsv"}),
}

Using custom data configuration default
Using custom data configuration default
Using custom data configuration default


In [6]:
def convert_to_mal(example_batch):
    features = {}
    features = tokenizer.batch_encode_plus(
                                    example_batch['text'],            
                                    add_special_tokens = True,
                                    max_length = 512,
                                    padding = 'max_length',
                                    return_attention_mask = True,
                                    truncation=True)
    new_labels = []
    for i in example_batch['category']:
        if i == "Positive":
            new_labels.append(0)
        elif i == "Negative":
            new_labels.append(1)
        elif i == "not-malayalam":
            new_labels.append(2)
        elif i == "unknown_state":
            new_labels.append(3)
        elif i == "Mixed_feelings":
            new_labels.append(4)
        else:
            print("Error", i, len(i))
    features["labels"] = new_labels
    return features
    
def convert_to_kan(example_batch):
    features = {}
    features = tokenizer.batch_encode_plus(
                                    example_batch['text'],            
                                    add_special_tokens = True,
                                    max_length = 512,
                                    padding = 'max_length',
                                    return_attention_mask = True,
                                    truncation=True)
    new_labels = []
    for i in example_batch['category']:
        if i == "Positive":
            new_labels.append(0)
        elif i == "Negative":
            new_labels.append(1)
        elif i == "not-Kannada":
            new_labels.append(2)
        elif i == "unknown state":
            new_labels.append(3)
        elif i == "Mixed feelings":
            new_labels.append(4)
        else:
            print("Error", i)

    features["labels"] = new_labels 
    return features

def convert_to_tam(example_batch):
    features = {}
    features = tokenizer.batch_encode_plus(
                                    example_batch['text'],            
                                    add_special_tokens = True,
                                    max_length = 512,
                                    padding = 'max_length',
                                    return_attention_mask = True,
                                    truncation=True)
    new_labels = []
    for i in example_batch['category']:
        if i == "Positive":
            new_labels.append(0)
        elif i == "Negative":
            new_labels.append(1)
        elif i == "not-Tamil":
            new_labels.append(2)
        elif i == "unknown_state":
            new_labels.append(3)
        elif i == "Mixed_feelings":
            new_labels.append(4)
        else:
            print("Error", i)

    features["labels"] = new_labels 
    return features

def convert_to_phobia(example_batch):
    features = {}
    features = tokenizer.batch_encode_plus(
                                    example_batch['text'],            
                                    add_special_tokens = True,
                                    max_length = 512,
                                    padding = 'max_length',
                                    return_attention_mask = True,
                                    truncation=True)
    new_labels = []
    for i in example_batch['category']:
        if i == "Non-anti-LGBT+ content":
            new_labels.append(0)
        elif i == "Homophobic":
            new_labels.append(1)
        elif i == "Transphobic":
            new_labels.append(2)
        else:
            print("Error", i)

    features["labels"] = new_labels 
    return features

In [7]:
convert_func_dict = {
    #"kan_sentiment": convert_to_kan,
    #"mal_sentiment": convert_to_mal,
    "tam_sentiment": convert_to_tam,
    # "eng_phobia": convert_to_phobia,
    "tam_phobia": convert_to_phobia,
    # "mal_phobia": convert_to_phobia,
    # "eng_tam_phobia": convert_to_phobia,
}

columns_dict = {
    #"kan_sentiment": ['input_ids', 'attention_mask', 'labels'],
    #"mal_sentiment": ['input_ids', 'attention_mask', 'labels'],
    "tam_sentiment": ['input_ids', 'attention_mask', 'labels'],
    
    # "eng_phobia": ['input_ids', 'attention_mask', 'labels'],
     "tam_phobia": ['input_ids', 'attention_mask', 'labels'],
    # "mal_phobia": ['input_ids', 'attention_mask', 'labels'],
    # "eng_tam_phobia": ['input_ids', 'attention_mask', 'labels'],
}


In [8]:
features_dict = {}
for task_name, dataset in dataset_dict.items():
    features_dict[task_name] = {}
    for phase, phase_dataset in dataset.items():
        features_dict[task_name][phase] = phase_dataset.map(
            convert_func_dict[task_name],
            batched=True,
            load_from_cache_file=False,
        )
        features_dict[task_name][phase].set_format(
            type="torch", 
            columns=columns_dict[task_name],
        )
        print(task_name, phase, len(phase_dataset), len(features_dict[task_name][phase]))

HBox(children=(FloatProgress(value=0.0, max=6.0), HTML(value='')))


kan_sentiment train 5951 5951


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))


kan_sentiment test 691 691


HBox(children=(FloatProgress(value=0.0, max=16.0), HTML(value='')))


mal_sentiment train 15726 15726


HBox(children=(FloatProgress(value=0.0, max=2.0), HTML(value='')))


mal_sentiment test 1766 1766


HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


tam_sentiment train 35575 35575


HBox(children=(FloatProgress(value=0.0, max=4.0), HTML(value='')))


tam_sentiment test 3962 3962


In [9]:
train_dataset = {
		task_name: dataset["train"] for task_name, dataset in features_dict.items()
}

In [10]:
trainer = MultitaskTrainer(
    model=multitask_model,
    args=transformers.TrainingArguments(
        output_dir="output_trainer",
        overwrite_output_dir=True,
        learning_rate=1e-5,
        do_train=True,
        num_train_epochs=3,
        per_device_train_batch_size=32,
        save_steps=3000,
    ),
    data_collator=NLPDataCollator(),
    train_dataset=train_dataset,
)
trainer.train()

***** Running training *****
  Num examples = 57252
  Num Epochs = 3
  Instantaneous batch size per device = 32
  Total train batch size (w. parallel, distributed & accumulation) = 96
  Gradient Accumulation steps = 1
  Total optimization steps = 1791


kan_sentiment 5951 96 <torch.utils.data.dataloader.DataLoader object at 0x7fb780011128>
mal_sentiment 15726 96 <torch.utils.data.dataloader.DataLoader object at 0x7fb400f76208>
tam_sentiment 35575 96 <torch.utils.data.dataloader.DataLoader object at 0x7fb400f76ba8>




Step,Training Loss
500,1.1431
1000,0.9554
1500,0.8971




Training completed. Do not forget to share your model on huggingface.co/models =)




TrainOutput(global_step=1791, training_loss=0.9765373668745202, metrics={'train_runtime': 2392.0489, 'train_samples_per_second': 71.803, 'train_steps_per_second': 0.749, 'total_flos': 4.581941053475635e+16, 'train_loss': 0.9765373668745202, 'epoch': 3.0})

In [1]:
preds_dict = {}
for task_name in ["tam_sentiment", "tam_phobia"]:
    print("Starting validation", task_name)
    eval_dataloader = DataLoaderWithTaskname(
        task_name,
        trainer.get_eval_dataloader(eval_dataset=features_dict[task_name]["test"])
    )
    print(eval_dataloader.data_loader.collate_fn)
    preds_dict[task_name] = trainer.evaluation_loop(
        eval_dataloader,
        description=f"Validation: {task_name}",
    )

Starting validation tam_sentiment


NameError: name 'DataLoaderWithTaskname' is not defined

In [41]:
from sklearn.metrics import classification_report

preds = np.argmax(preds_dict['mal_sentiment'].predictions ,axis=1)
ground_truth = features_dict['mal_sentiment']['test']['labels']

print("Mal Sentiment:\n", classification_report(preds, ground_truth))

Mal Sentiment:
               precision    recall  f1-score   support

           0       0.82      0.69      0.75       836
           1       0.50      0.52      0.51       226
           2       0.78      0.75      0.76       147
           3       0.69      0.72      0.70       557
           4       0.00      0.00      0.00         0

    accuracy                           0.68      1766
   macro avg       0.56      0.54      0.55      1766
weighted avg       0.74      0.68      0.71      1766



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [42]:
preds = np.argmax(preds_dict['tam_sentiment'].predictions ,axis=1)
ground_truth = features_dict['tam_sentiment']['test']['labels']

print("Tam Sentiment:\n", classification_report(preds, ground_truth))

Tam Sentiment:
               precision    recall  f1-score   support

           0       0.88      0.71      0.79      2784
           1       0.41      0.45      0.43       435
           2       0.48      0.64      0.55       133
           3       0.37      0.46      0.41       489
           4       0.12      0.44      0.19       121

    accuracy                           0.64      3962
   macro avg       0.45      0.54      0.47      3962
weighted avg       0.73      0.64      0.67      3962



In [43]:
preds = np.argmax(preds_dict['kan_sentiment'].predictions ,axis=1)
ground_truth = features_dict['kan_sentiment']['test']['labels']

print("Kan Sentiment:\n", classification_report(preds, ground_truth))

Kan Sentiment:
               precision    recall  f1-score   support

           0       0.78      0.68      0.73       366
           1       0.60      0.58      0.59       146
           2       0.75      0.62      0.67       133
           3       0.36      0.54      0.43        46
           4       0.00      0.00      0.00         0

    accuracy                           0.64       691
   macro avg       0.50      0.48      0.49       691
weighted avg       0.71      0.64      0.67       691



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
