In [1]:
%cd /scratch/mt/ashapiro/Hate_Speech/Multitask_trial/

/scratch/mt/ashapiro/Hate_Speech/Multitask_trial


In [7]:
%ls Data/

[0m[01;34mmultitask_data[0m/  [01;34mtrain[0m/             trainA_prepro_extra.csv  trainC.csv
[01;34mtest[0m/            trainA.csv         trainB.csv               train.txt
test.txt         trainA_prepro.csv  trainB_prepro.csv


In [2]:
import numpy as np
import torch
import torch.nn as nn
import transformers
import nlp
import logging
from datasets import load_dataset
from model import * 
logging.basicConfig(level=logging.INFO)

## Preparing Data

In [3]:
task_names = ['offensive', 'hatespeech', 'hatespeech_classes']

In [9]:
dataset_dict = {
    "offensive": load_dataset("csv", data_files={'train': "Data/trainA_prepro.csv", 'test': "Data/test/testA_prepro.csv" } ),
    "hatespeech": load_dataset("csv", data_files={'train': "Data/trainB_prepro.csv", 'test': "Data/test/testB_prepro.csv" } ),
    "hatespeech_classes": load_dataset("csv", data_files={'train': "Data/train/trainC_prepro.csv", 'test': "Data/test/testC_prepro.csv" } ),
}

100%|██████████| 2/2 [00:00<00:00, 328.36it/s]
100%|██████████| 2/2 [00:00<00:00, 361.14it/s]
100%|██████████| 2/2 [00:00<00:00, 202.80it/s]


## Setting Model

In [10]:
model_name = "/scratch/mt/ashapiro/Hate_Speech/Models/Marbertv2/"
multitask_model = MultitaskModel.create(
    model_name=model_name,
    model_type_dict={
        "offensive": transformers.AutoModelForSequenceClassification,
        "hatespeech": transformers.AutoModelForSequenceClassification,
        "hatespeech_classes": transformers.AutoModelForSequenceClassification,
    },
    model_config_dict={
        "offensive": transformers.AutoConfig.from_pretrained(model_name, num_labels=2),
        "hatespeech": transformers.AutoConfig.from_pretrained(model_name, num_labels=2),
        "hatespeech_classes": transformers.AutoConfig.from_pretrained(model_name, num_labels=7),
    },
)

In [11]:
tokenizer = transformers.AutoTokenizer.from_pretrained(model_name)

In [12]:
max_length = 512

def convert_to_features(example_batch):
    inputs = list(example_batch['text'])
    features = tokenizer.batch_encode_plus(
        inputs, max_length=max_length, pad_to_max_length=True
    )
    features["labels"] = example_batch["labels"]
    return features

convert_func_dict = {
    "offensive": convert_to_features,
    "hatespeech": convert_to_features,
    "hatespeech_classes": convert_to_features,
}

In [13]:
columns_dict = {
    "offensive": ['input_ids', 'attention_mask', 'labels'],
    "hatespeech": ['input_ids', 'attention_mask', 'labels'],
    "hatespeech_classes": ['input_ids', 'attention_mask', 'labels'],
}

features_dict = {}
for task_name, dataset in dataset_dict.items():
    features_dict[task_name] = {}
    for phase, phase_dataset in dataset.items():
        features_dict[task_name][phase] = phase_dataset.map(
            convert_func_dict[task_name],
            batched=True,
            load_from_cache_file=False,
        )
        print(task_name, phase, len(phase_dataset), len(features_dict[task_name][phase]))
        features_dict[task_name][phase].set_format(
            type="torch", 
            columns=columns_dict[task_name],
        )
        print(task_name, phase, len(phase_dataset), len(features_dict[task_name][phase]))

100%|██████████| 9/9 [00:03<00:00,  2.38ba/s]


offensive train 8887 8887
offensive train 8887 8887


100%|██████████| 2/2 [00:00<00:00,  4.15ba/s]


offensive test 1270 1270
offensive test 1270 1270


100%|██████████| 9/9 [00:03<00:00,  2.29ba/s]


hatespeech train 8887 8887
hatespeech train 8887 8887


100%|██████████| 2/2 [00:00<00:00,  4.12ba/s]


hatespeech test 1270 1270
hatespeech test 1270 1270


100%|██████████| 9/9 [00:03<00:00,  2.37ba/s]


hatespeech_classes train 8887 8887
hatespeech_classes train 8887 8887


100%|██████████| 2/2 [00:00<00:00,  4.05ba/s]

hatespeech_classes test 1270 1270
hatespeech_classes test 1270 1270





In [9]:
eval_dataset = {
    task_name: dataset["test"] 
    for task_name, dataset in features_dict.items()
}

In [14]:
train_dataset = {
    task_name: dataset["train"] 
    for task_name, dataset in features_dict.items()
}
args = transformers.TrainingArguments(
        output_dir="./models/multitask_model/7_epochs",
        overwrite_output_dir=True,
        learning_rate=2e-5,
        do_train=True,
        num_train_epochs=7,
        # Adjust batch size if this doesn't fit on the Colab GPU
        per_device_train_batch_size=32,  
        save_steps=3000,)

trainer = MultitaskTrainer(
    model=multitask_model,
    args=args,
    data_collator=NLPDataCollator(),
    train_dataset=train_dataset,
)

[34m[1mwandb[0m: Currently logged in as: [33mahmadshapiro[0m (use `wandb login --relogin` to force relogin)
[34m[1mwandb[0m: wandb version 0.12.11 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


In [18]:
trainer.train()

Epoch:   0%|          | 0/7 [00:00<?, ?it/s]
Iteration:   0%|          | 0/834 [00:00<?, ?it/s][A
Iteration:   0%|          | 1/834 [00:01<16:21,  1.18s/it][A
Iteration:   0%|          | 2/834 [00:02<14:46,  1.07s/it][A
Iteration:   0%|          | 3/834 [00:03<14:17,  1.03s/it][A
Iteration:   0%|          | 4/834 [00:04<13:57,  1.01s/it][A
Iteration:   1%|          | 5/834 [00:05<13:40,  1.01it/s][A
Iteration:   1%|          | 6/834 [00:06<13:31,  1.02it/s][A
Iteration:   1%|          | 7/834 [00:07<13:29,  1.02it/s][A
Iteration:   1%|          | 8/834 [00:07<13:26,  1.02it/s][A
Iteration:   1%|          | 9/834 [00:08<13:17,  1.03it/s][A
Iteration:   1%|          | 10/834 [00:09<13:19,  1.03it/s][A
Iteration:   1%|▏         | 11/834 [00:10<13:15,  1.03it/s][A
Iteration:   1%|▏         | 12/834 [00:11<13:14,  1.04it/s][A
Iteration:   2%|▏         | 13/834 [00:12<13:13,  1.04it/s][A
Iteration:   2%|▏         | 14/834 [00:13<13:12,  1.04it/s][A
Iteration:   2%|▏         | 

{"loss": 0.3453030834086239, "learning_rate": 1.828708461801987e-05, "epoch": 0.5995203836930456, "step": 500}



Iteration:  60%|██████    | 501/834 [08:13<05:29,  1.01it/s][A
Iteration:  60%|██████    | 502/834 [08:14<05:28,  1.01it/s][A
Iteration:  60%|██████    | 503/834 [08:15<05:27,  1.01it/s][A
Iteration:  60%|██████    | 504/834 [08:15<05:22,  1.02it/s][A
Iteration:  61%|██████    | 505/834 [08:16<05:19,  1.03it/s][A
Iteration:  61%|██████    | 506/834 [08:17<05:20,  1.02it/s][A
Iteration:  61%|██████    | 507/834 [08:18<05:20,  1.02it/s][A
Iteration:  61%|██████    | 508/834 [08:19<05:20,  1.02it/s][A
Iteration:  61%|██████    | 509/834 [08:20<05:19,  1.02it/s][A
Iteration:  61%|██████    | 510/834 [08:21<05:19,  1.01it/s][A
Iteration:  61%|██████▏   | 511/834 [08:22<05:15,  1.02it/s][A
Iteration:  61%|██████▏   | 512/834 [08:23<05:14,  1.02it/s][A
Iteration:  62%|██████▏   | 513/834 [08:24<05:14,  1.02it/s][A
Iteration:  62%|██████▏   | 514/834 [08:25<05:14,  1.02it/s][A
Iteration:  62%|██████▏   | 515/834 [08:26<05:13,  1.02it/s][A
Iteration:  62%|██████▏   | 516/834 [08

{"loss": 0.2279191680359654, "learning_rate": 1.657416923603974e-05, "epoch": 1.1990407673860912, "step": 1000}



Iteration:  20%|██        | 167/834 [02:44<11:00,  1.01it/s][A
Iteration:  20%|██        | 168/834 [02:45<10:58,  1.01it/s][A
Iteration:  20%|██        | 169/834 [02:46<10:57,  1.01it/s][A
Iteration:  20%|██        | 170/834 [02:47<10:56,  1.01it/s][A
Iteration:  21%|██        | 171/834 [02:48<10:54,  1.01it/s][A
Iteration:  21%|██        | 172/834 [02:49<10:53,  1.01it/s][A
Iteration:  21%|██        | 173/834 [02:50<10:52,  1.01it/s][A
Iteration:  21%|██        | 174/834 [02:51<10:50,  1.01it/s][A
Iteration:  21%|██        | 175/834 [02:52<10:49,  1.01it/s][A
Iteration:  21%|██        | 176/834 [02:53<10:50,  1.01it/s][A
Iteration:  21%|██        | 177/834 [02:54<10:43,  1.02it/s][A
Iteration:  21%|██▏       | 178/834 [02:55<10:40,  1.02it/s][A
Iteration:  21%|██▏       | 179/834 [02:56<10:42,  1.02it/s][A
Iteration:  22%|██▏       | 180/834 [02:57<10:42,  1.02it/s][A
Iteration:  22%|██▏       | 181/834 [02:58<10:43,  1.01it/s][A
Iteration:  22%|██▏       | 182/834 [02

{"loss": 0.147330990701681, "learning_rate": 1.486125385405961e-05, "epoch": 1.7985611510791366, "step": 1500}



Iteration:  80%|███████▉  | 667/834 [10:57<02:44,  1.01it/s][A
Iteration:  80%|████████  | 668/834 [10:58<02:43,  1.02it/s][A
Iteration:  80%|████████  | 669/834 [10:59<02:42,  1.01it/s][A
Iteration:  80%|████████  | 670/834 [11:00<02:41,  1.01it/s][A
Iteration:  80%|████████  | 671/834 [11:01<02:40,  1.01it/s][A
Iteration:  81%|████████  | 672/834 [11:02<02:39,  1.02it/s][A
Iteration:  81%|████████  | 673/834 [11:03<02:38,  1.01it/s][A
Iteration:  81%|████████  | 674/834 [11:04<02:37,  1.02it/s][A
Iteration:  81%|████████  | 675/834 [11:05<02:36,  1.02it/s][A
Iteration:  81%|████████  | 676/834 [11:06<02:34,  1.02it/s][A
Iteration:  81%|████████  | 677/834 [11:07<02:33,  1.02it/s][A
Iteration:  81%|████████▏ | 678/834 [11:08<02:32,  1.02it/s][A
Iteration:  81%|████████▏ | 679/834 [11:09<02:32,  1.02it/s][A
Iteration:  82%|████████▏ | 680/834 [11:10<02:31,  1.02it/s][A
Iteration:  82%|████████▏ | 681/834 [11:11<02:30,  1.02it/s][A
Iteration:  82%|████████▏ | 682/834 [11

{"loss": 0.09924615517852363, "learning_rate": 1.3148338472079481e-05, "epoch": 2.3980815347721824, "step": 2000}



Iteration:  40%|███▉      | 333/834 [05:24<08:00,  1.04it/s][A
Iteration:  40%|████      | 334/834 [05:25<07:59,  1.04it/s][A
Iteration:  40%|████      | 335/834 [05:26<07:57,  1.04it/s][A
Iteration:  40%|████      | 336/834 [05:27<07:56,  1.04it/s][A
Iteration:  40%|████      | 337/834 [05:28<07:55,  1.05it/s][A
Iteration:  41%|████      | 338/834 [05:29<07:54,  1.04it/s][A
Iteration:  41%|████      | 339/834 [05:30<07:53,  1.05it/s][A
Iteration:  41%|████      | 340/834 [05:31<07:53,  1.04it/s][A
Iteration:  41%|████      | 341/834 [05:32<07:51,  1.04it/s][A
Iteration:  41%|████      | 342/834 [05:33<07:50,  1.05it/s][A
Iteration:  41%|████      | 343/834 [05:34<07:49,  1.05it/s][A
Iteration:  41%|████      | 344/834 [05:35<07:48,  1.05it/s][A
Iteration:  41%|████▏     | 345/834 [05:36<07:46,  1.05it/s][A
Iteration:  41%|████▏     | 346/834 [05:37<07:46,  1.05it/s][A
Iteration:  42%|████▏     | 347/834 [05:38<07:43,  1.05it/s][A
Iteration:  42%|████▏     | 348/834 [05

{"loss": 0.07890414835029515, "learning_rate": 1.1435423090099349e-05, "epoch": 2.997601918465228, "step": 2500}



Iteration: 100%|█████████▉| 833/834 [13:22<00:00,  1.16it/s][A
Iteration: 100%|██████████| 834/834 [13:22<00:00,  1.04it/s][A
Epoch:  43%|████▎     | 3/7 [40:43<54:09, 812.27s/it]  
Iteration:   0%|          | 0/834 [00:00<?, ?it/s][A
Iteration:   0%|          | 1/834 [00:00<13:16,  1.05it/s][A
Iteration:   0%|          | 2/834 [00:01<13:12,  1.05it/s][A
Iteration:   0%|          | 3/834 [00:02<13:06,  1.06it/s][A
Iteration:   0%|          | 4/834 [00:03<13:08,  1.05it/s][A
Iteration:   1%|          | 5/834 [00:04<13:05,  1.06it/s][A
Iteration:   1%|          | 6/834 [00:05<13:05,  1.05it/s][A
Iteration:   1%|          | 7/834 [00:06<13:04,  1.05it/s][A
Iteration:   1%|          | 8/834 [00:07<13:01,  1.06it/s][A
Iteration:   1%|          | 9/834 [00:08<13:02,  1.05it/s][A
Iteration:   1%|          | 10/834 [00:09<13:01,  1.06it/s][A
Iteration:   1%|▏         | 11/834 [00:10<13:00,  1.05it/s][A
Iteration:   1%|▏         | 12/834 [00:11<13:01,  1.05it/s][A
Iteration:   2

{"loss": 0.04672879453405039, "learning_rate": 9.72250770811922e-06, "epoch": 3.597122302158273, "step": 3000}



Iteration:  60%|█████▉    | 498/834 [08:08<10:40,  1.91s/it][A
Iteration:  60%|█████▉    | 499/834 [08:09<09:07,  1.63s/it][A
Iteration:  60%|█████▉    | 500/834 [08:10<08:01,  1.44s/it][A
Iteration:  60%|██████    | 501/834 [08:11<07:15,  1.31s/it][A
Iteration:  60%|██████    | 502/834 [08:12<06:38,  1.20s/it][A
Iteration:  60%|██████    | 503/834 [08:13<06:14,  1.13s/it][A
Iteration:  60%|██████    | 504/834 [08:14<05:59,  1.09s/it][A
Iteration:  61%|██████    | 505/834 [08:15<05:47,  1.06s/it][A
Iteration:  61%|██████    | 506/834 [08:16<05:39,  1.03s/it][A
Iteration:  61%|██████    | 507/834 [08:17<05:33,  1.02s/it][A
Iteration:  61%|██████    | 508/834 [08:18<05:27,  1.01s/it][A
Iteration:  61%|██████    | 509/834 [08:19<05:24,  1.00it/s][A
Iteration:  61%|██████    | 510/834 [08:20<05:22,  1.00it/s][A
Iteration:  61%|██████▏   | 511/834 [08:21<05:21,  1.00it/s][A
Iteration:  61%|██████▏   | 512/834 [08:22<05:19,  1.01it/s][A
Iteration:  62%|██████▏   | 513/834 [08

{"loss": 0.036134968709491656, "learning_rate": 8.00959232613909e-06, "epoch": 4.196642685851319, "step": 3500}



Iteration:  20%|█▉        | 165/834 [02:42<11:04,  1.01it/s][A
Iteration:  20%|█▉        | 166/834 [02:43<11:03,  1.01it/s][A
Iteration:  20%|██        | 167/834 [02:44<11:01,  1.01it/s][A
Iteration:  20%|██        | 168/834 [02:45<10:59,  1.01it/s][A
Iteration:  20%|██        | 169/834 [02:46<10:56,  1.01it/s][A
Iteration:  20%|██        | 170/834 [02:47<10:51,  1.02it/s][A
Iteration:  21%|██        | 171/834 [02:48<10:50,  1.02it/s][A
Iteration:  21%|██        | 172/834 [02:49<10:55,  1.01it/s][A
Iteration:  21%|██        | 173/834 [02:50<10:56,  1.01it/s][A
Iteration:  21%|██        | 174/834 [02:51<10:56,  1.01it/s][A
Iteration:  21%|██        | 175/834 [02:52<10:58,  1.00it/s][A
Iteration:  21%|██        | 176/834 [02:53<10:59,  1.00s/it][A
Iteration:  21%|██        | 177/834 [02:54<11:00,  1.01s/it][A
Iteration:  21%|██▏       | 178/834 [02:55<10:55,  1.00it/s][A
Iteration:  21%|██▏       | 179/834 [02:56<10:52,  1.00it/s][A
Iteration:  22%|██▏       | 180/834 [02

{"loss": 0.028025242857285776, "learning_rate": 6.296676944158959e-06, "epoch": 4.796163069544365, "step": 4000}



Iteration:  80%|███████▉  | 665/834 [10:54<02:47,  1.01it/s][A
Iteration:  80%|███████▉  | 666/834 [10:55<02:46,  1.01it/s][A
Iteration:  80%|███████▉  | 667/834 [10:56<02:45,  1.01it/s][A
Iteration:  80%|████████  | 668/834 [10:57<02:44,  1.01it/s][A
Iteration:  80%|████████  | 669/834 [10:58<02:42,  1.01it/s][A
Iteration:  80%|████████  | 670/834 [10:59<02:41,  1.01it/s][A
Iteration:  80%|████████  | 671/834 [11:00<02:38,  1.03it/s][A
Iteration:  81%|████████  | 672/834 [11:01<02:38,  1.02it/s][A
Iteration:  81%|████████  | 673/834 [11:02<02:36,  1.03it/s][A
Iteration:  81%|████████  | 674/834 [11:02<02:34,  1.04it/s][A
Iteration:  81%|████████  | 675/834 [11:03<02:33,  1.03it/s][A
Iteration:  81%|████████  | 676/834 [11:04<02:32,  1.04it/s][A
Iteration:  81%|████████  | 677/834 [11:05<02:30,  1.04it/s][A
Iteration:  81%|████████▏ | 678/834 [11:06<02:29,  1.04it/s][A
Iteration:  81%|████████▏ | 679/834 [11:07<02:29,  1.04it/s][A
Iteration:  82%|████████▏ | 680/834 [11

{"loss": 0.0223789106036711, "learning_rate": 4.583761562178829e-06, "epoch": 5.39568345323741, "step": 4500}



Iteration:  40%|███▉      | 331/834 [05:25<08:17,  1.01it/s][A
Iteration:  40%|███▉      | 332/834 [05:26<08:16,  1.01it/s][A
Iteration:  40%|███▉      | 333/834 [05:27<08:14,  1.01it/s][A
Iteration:  40%|████      | 334/834 [05:28<08:13,  1.01it/s][A
Iteration:  40%|████      | 335/834 [05:29<08:09,  1.02it/s][A
Iteration:  40%|████      | 336/834 [05:30<08:09,  1.02it/s][A
Iteration:  40%|████      | 337/834 [05:30<08:09,  1.02it/s][A
Iteration:  41%|████      | 338/834 [05:31<08:06,  1.02it/s][A
Iteration:  41%|████      | 339/834 [05:32<08:06,  1.02it/s][A
Iteration:  41%|████      | 340/834 [05:33<08:06,  1.01it/s][A
Iteration:  41%|████      | 341/834 [05:34<08:06,  1.01it/s][A
Iteration:  41%|████      | 342/834 [05:35<08:06,  1.01it/s][A
Iteration:  41%|████      | 343/834 [05:36<08:01,  1.02it/s][A
Iteration:  41%|████      | 344/834 [05:37<07:55,  1.03it/s][A
Iteration:  41%|████▏     | 345/834 [05:38<07:56,  1.03it/s][A
Iteration:  41%|████▏     | 346/834 [05

{"loss": 0.012173666016897186, "learning_rate": 2.8708461801986986e-06, "epoch": 5.995203836930456, "step": 5000}



Iteration: 100%|█████████▉| 831/834 [13:36<00:02,  1.09it/s][A
Iteration: 100%|█████████▉| 832/834 [13:37<00:01,  1.07it/s][A
Iteration: 100%|█████████▉| 833/834 [13:37<00:00,  1.14it/s][A
Iteration: 100%|██████████| 834/834 [13:38<00:00,  1.02it/s][A
Epoch:  86%|████████▌ | 6/7 [1:21:39<13:37, 817.06s/it]
Iteration:   0%|          | 0/834 [00:00<?, ?it/s][A
Iteration:   0%|          | 1/834 [00:00<13:38,  1.02it/s][A
Iteration:   0%|          | 2/834 [00:01<13:39,  1.02it/s][A
Iteration:   0%|          | 3/834 [00:02<13:31,  1.02it/s][A
Iteration:   0%|          | 4/834 [00:03<13:34,  1.02it/s][A
Iteration:   1%|          | 5/834 [00:04<13:36,  1.02it/s][A
Iteration:   1%|          | 6/834 [00:05<13:37,  1.01it/s][A
Iteration:   1%|          | 7/834 [00:06<13:36,  1.01it/s][A
Iteration:   1%|          | 8/834 [00:07<13:26,  1.02it/s][A
Iteration:   1%|          | 9/834 [00:08<13:27,  1.02it/s][A
Iteration:   1%|          | 10/834 [00:09<13:28,  1.02it/s][A
Iteration:  

{"loss": 0.009607026116660563, "learning_rate": 1.157930798218568e-06, "epoch": 6.594724220623501, "step": 5500}



Iteration:  60%|█████▉    | 497/834 [08:07<05:31,  1.02it/s][A
Iteration:  60%|█████▉    | 498/834 [08:08<05:27,  1.03it/s][A
Iteration:  60%|█████▉    | 499/834 [08:09<05:24,  1.03it/s][A
Iteration:  60%|█████▉    | 500/834 [08:10<05:21,  1.04it/s][A
Iteration:  60%|██████    | 501/834 [08:11<05:21,  1.03it/s][A
Iteration:  60%|██████    | 502/834 [08:12<05:23,  1.03it/s][A
Iteration:  60%|██████    | 503/834 [08:13<05:23,  1.02it/s][A
Iteration:  60%|██████    | 504/834 [08:14<05:23,  1.02it/s][A
Iteration:  61%|██████    | 505/834 [08:14<05:22,  1.02it/s][A
Iteration:  61%|██████    | 506/834 [08:15<05:20,  1.02it/s][A
Iteration:  61%|██████    | 507/834 [08:16<05:18,  1.03it/s][A
Iteration:  61%|██████    | 508/834 [08:17<05:17,  1.03it/s][A
Iteration:  61%|██████    | 509/834 [08:18<05:16,  1.03it/s][A
Iteration:  61%|██████    | 510/834 [08:19<05:16,  1.02it/s][A
Iteration:  61%|██████▏   | 511/834 [08:20<05:16,  1.02it/s][A
Iteration:  61%|██████▏   | 512/834 [08

TrainOutput(global_step=5838, training_loss=0.09068378738210991)

In [19]:
task_names = ['hatespeech_classes']

In [22]:
import datasets

In [23]:
f1 = datasets.load_metric('f1')

In [24]:
recall = datasets.load_metric('recall')

In [25]:
precision = datasets.load_metric('precision')

In [26]:
preds_dict = {}
for task_name in task_names:
    eval_dataloader = DataLoaderWithTaskname(
        task_name,
        trainer.get_eval_dataloader(eval_dataset=features_dict[task_name]["test"])
    )
    print(eval_dataloader.data_loader.collate_fn)
    preds_dict[task_name] = trainer._prediction_loop(
        eval_dataloader, 
        description=f"Test: {task_name}",
    )

<bound method NLPDataCollator.collate_batch of <model.NLPDataCollator object at 0x7fffaf64b610>>


Test: hatespeech_classes: 100%|██████████| 159/159 [00:13<00:00, 11.98it/s]


### 2 Epochs

In [18]:
f1.compute(predictions=np.argmax(preds_dict["offensive"].predictions, axis=1),  references=preds_dict["offensive"].label_ids, average='macro' )

{'f1': 0.8418723545933512}

In [19]:
f1.compute(predictions=np.argmax(preds_dict["hatespeech"].predictions, axis=1),  references=preds_dict["hatespeech"].label_ids, average='macro' )

{'f1': 0.8371149406524729}

In [20]:
f1.compute(predictions=np.argmax(preds_dict["hatespeech_classes"].predictions, axis=1),  references=preds_dict["hatespeech_classes"].label_ids, average='macro' )

{'f1': 0.3926597611174843}

### 3 Epochs

In [21]:
f1.compute(predictions=np.argmax(preds_dict["offensive"].predictions, axis=1),  references=preds_dict["offensive"].label_ids, average='macro' )

{'f1': 0.8418723545933512}

In [27]:
f1.compute(predictions=np.argmax(preds_dict["hatespeech"].predictions, axis=1),  references=preds_dict["hatespeech"].label_ids, average='macro' )

{'f1': 0.8325985296056062}

In [28]:
f1.compute(predictions=np.argmax(preds_dict["hatespeech_classes"].predictions, axis=1),  references=preds_dict["hatespeech_classes"].label_ids, average='macro' )

{'f1': 0.4176057674898591}

### 4 Epochs

In [25]:
f1.compute(predictions=np.argmax(preds_dict["offensive"].predictions, axis=1),  references=preds_dict["offensive"].label_ids, average='macro' )

{'f1': 0.8469622661091527}

In [27]:
f1.compute(predictions=np.argmax(preds_dict["hatespeech"].predictions, axis=1),  references=preds_dict["hatespeech"].label_ids, average='macro' )

{'f1': 0.8325985296056062}

In [28]:
f1.compute(predictions=np.argmax(preds_dict["hatespeech_classes"].predictions, axis=1),  references=preds_dict["hatespeech_classes"].label_ids, average='macro' )

{'f1': 0.4176057674898591}

### 5 Epochs

In [43]:
f1.compute(predictions=np.argmax(preds_dict["offensive"].predictions, axis=1),  references=preds_dict["offensive"].label_ids, average='macro' )

{'f1': 0.8387301587301587}

In [44]:
f1.compute(predictions=np.argmax(preds_dict["hatespeech"].predictions, axis=1),  references=preds_dict["hatespeech"].label_ids, average='macro' )

{'f1': 0.8107379458902904}

In [45]:
f1.compute(predictions=np.argmax(preds_dict["hatespeech_classes"].predictions, axis=1),  references=preds_dict["hatespeech_classes"].label_ids, average='macro' )

{'f1': 0.4355893433350976}

### 7 Epochs

In [27]:
f1.compute(predictions=np.argmax(preds_dict["hatespeech_classes"].predictions, axis=1),  references=preds_dict["hatespeech_classes"].label_ids, average='macro' )

{'f1': 0.47217673634635726}

## Loading

In [46]:
def load_model(dot_bin_file):
    multitask_model = MultitaskModel.create(
                                            model_name=model_name,
                                            model_type_dict={
                                                "offensive": transformers.AutoModelForSequenceClassification,
                                                "hatespeech": transformers.AutoModelForSequenceClassification,
                                                "hatespeech_classes": transformers.AutoModelForSequenceClassification,
                                            },
                                            model_config_dict={
                                                "offensive": transformers.AutoConfig.from_pretrained(model_name, num_labels=2),
                                                "hatespeech": transformers.AutoConfig.from_pretrained(model_name, num_labels=2),
                                                "hatespeech_classes": transformers.AutoConfig.from_pretrained(model_name, num_labels=7),
                                            },)
    model = torch.load(dot_bin_file)
    multitask_model.load_state_dict(model)
    return multitask_model

# Without Last Task

In [1]:
%cd /scratch/mt/ashapiro/Hate_Speech/Multitask_trial/

/scratch/mt/ashapiro/Hate_Speech/Multitask_trial


In [2]:
import numpy as np
import torch
import torch.nn as nn
import transformers
import nlp
import logging
from datasets import load_dataset
from model import * 
logging.basicConfig(level=logging.INFO)

## Preparing Data

In [3]:
task_names = ['offensive', 'hatespeech', 'hatespeech_classes']

In [4]:
dataset_dict = {
    "offensive": load_dataset("csv", data_files={'train': "Data/trainA_prepro.csv", 'test': "Data/testA_prepro.csv" } ),
    "hatespeech": load_dataset("csv", data_files={'train': "Data/trainB_prepro.csv", 'test': "Data/testB_prepro.csv" } ),
    "hatespeech_classes": load_dataset("csv", data_files={'train': "Data/trainC_prepro.csv", 'test': "Data/testC_prepro.csv" } ),
}

100%|██████████| 2/2 [00:00<00:00, 193.53it/s]
100%|██████████| 2/2 [00:00<00:00, 208.36it/s]
100%|██████████| 2/2 [00:00<00:00, 206.70it/s]


## Setting Model

In [5]:
model_name = "/scratch/mt/ashapiro/Hate_Speech/Models/Marbertv2/"
multitask_model = MultitaskModel.create(
    model_name=model_name,
    model_type_dict={
        "offensive": transformers.AutoModelForSequenceClassification,
        "hatespeech": transformers.AutoModelForSequenceClassification,
        "hatespeech_classes": transformers.AutoModelForSequenceClassification,
    },
    model_config_dict={
        "offensive": transformers.AutoConfig.from_pretrained(model_name, num_labels=2),
        "hatespeech": transformers.AutoConfig.from_pretrained(model_name, num_labels=2),
        "hatespeech_classes": transformers.AutoConfig.from_pretrained(model_name, num_labels=7),
    },
)

In [6]:
tokenizer = transformers.AutoTokenizer.from_pretrained(model_name)

In [7]:
max_length = 512

def convert_to_features(example_batch):
    inputs = list(example_batch['text'])
    features = tokenizer.batch_encode_plus(
        inputs, max_length=max_length, pad_to_max_length=True
    )
    features["labels"] = example_batch["labels"]
    return features

convert_func_dict = {
    "offensive": convert_to_features,
    "hatespeech": convert_to_features,
    "hatespeech_classes": convert_to_features,
}

In [8]:
columns_dict = {
    "offensive": ['input_ids', 'attention_mask', 'labels'],
    "hatespeech": ['input_ids', 'attention_mask', 'labels'],
    "hatespeech_classes": ['input_ids', 'attention_mask', 'labels'],
}

features_dict = {}
for task_name, dataset in dataset_dict.items():
    features_dict[task_name] = {}
    for phase, phase_dataset in dataset.items():
        features_dict[task_name][phase] = phase_dataset.map(
            convert_func_dict[task_name],
            batched=True,
            load_from_cache_file=False,
        )
        print(task_name, phase, len(phase_dataset), len(features_dict[task_name][phase]))
        features_dict[task_name][phase].set_format(
            type="torch", 
            columns=columns_dict[task_name],
        )
        print(task_name, phase, len(phase_dataset), len(features_dict[task_name][phase]))

100%|██████████| 9/9 [00:03<00:00,  2.52ba/s]


offensive train 8887 8887
offensive train 8887 8887


100%|██████████| 2/2 [00:00<00:00,  3.92ba/s]


offensive test 1270 1270
offensive test 1270 1270


100%|██████████| 9/9 [00:03<00:00,  2.35ba/s]


hatespeech train 8887 8887
hatespeech train 8887 8887


100%|██████████| 2/2 [00:00<00:00,  3.93ba/s]


hatespeech test 1270 1270
hatespeech test 1270 1270


100%|██████████| 9/9 [00:03<00:00,  2.35ba/s]


hatespeech_classes train 8887 8887
hatespeech_classes train 8887 8887


100%|██████████| 2/2 [00:00<00:00,  3.87ba/s]

hatespeech_classes test 1270 1270
hatespeech_classes test 1270 1270





In [9]:
eval_dataset = {
    task_name: dataset["test"] 
    for task_name, dataset in features_dict.items()
}

In [10]:
train_dataset = {
    task_name: dataset["train"] 
    for task_name, dataset in features_dict.items()
}
args = transformers.TrainingArguments(
        output_dir="./models/multitask_model/3_epochs",
        overwrite_output_dir=True,
        learning_rate=2e-5,
        do_train=True,
        num_train_epochs=2,
        # Adjust batch size if this doesn't fit on the Colab GPU
        per_device_train_batch_size=16,  
        save_steps=3000,)

trainer = MultitaskTrainer(
    model=multitask_model,
    args=args,
    data_collator=NLPDataCollator(),
    train_dataset=train_dataset,
)

[34m[1mwandb[0m: Currently logged in as: [33mahmadshapiro[0m (use `wandb login --relogin` to force relogin)
[34m[1mwandb[0m: wandb version 0.12.11 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


In [11]:
trainer.train()

Epoch:   0%|          | 0/2 [00:00<?, ?it/s]
Iteration:   0%|          | 0/1668 [00:00<?, ?it/s][A
Iteration:   0%|          | 1/1668 [00:00<17:53,  1.55it/s][A
Iteration:   0%|          | 2/1668 [00:01<16:14,  1.71it/s][A
Iteration:   0%|          | 3/1668 [00:01<15:44,  1.76it/s][A
Iteration:   0%|          | 4/1668 [00:02<15:26,  1.80it/s][A
Iteration:   0%|          | 5/1668 [00:02<15:19,  1.81it/s][A
Iteration:   0%|          | 6/1668 [00:03<15:17,  1.81it/s][A
Iteration:   0%|          | 7/1668 [00:03<15:07,  1.83it/s][A
Iteration:   0%|          | 8/1668 [00:04<15:13,  1.82it/s][A
Iteration:   1%|          | 9/1668 [00:05<15:13,  1.82it/s][A
Iteration:   1%|          | 10/1668 [00:05<15:09,  1.82it/s][A
Iteration:   1%|          | 11/1668 [00:06<15:04,  1.83it/s][A
Iteration:   1%|          | 12/1668 [00:06<15:06,  1.83it/s][A
Iteration:   1%|          | 13/1668 [00:07<15:04,  1.83it/s][A
Iteration:   1%|          | 14/1668 [00:07<15:02,  1.83it/s][A
Iteration:   

{"loss": 0.41617555819451807, "learning_rate": 1.7002398081534774e-05, "epoch": 0.2997601918465228, "step": 500}



Iteration:  30%|███       | 501/1668 [04:32<11:34,  1.68it/s][A
Iteration:  30%|███       | 502/1668 [04:32<11:18,  1.72it/s][A
Iteration:  30%|███       | 503/1668 [04:33<11:07,  1.74it/s][A
Iteration:  30%|███       | 504/1668 [04:33<10:59,  1.77it/s][A
Iteration:  30%|███       | 505/1668 [04:34<10:52,  1.78it/s][A
Iteration:  30%|███       | 506/1668 [04:34<10:47,  1.80it/s][A
Iteration:  30%|███       | 507/1668 [04:35<10:45,  1.80it/s][A
Iteration:  30%|███       | 508/1668 [04:36<10:42,  1.80it/s][A
Iteration:  31%|███       | 509/1668 [04:36<10:39,  1.81it/s][A
Iteration:  31%|███       | 510/1668 [04:37<10:37,  1.82it/s][A
Iteration:  31%|███       | 511/1668 [04:37<10:35,  1.82it/s][A
Iteration:  31%|███       | 512/1668 [04:38<10:33,  1.82it/s][A
Iteration:  31%|███       | 513/1668 [04:38<10:33,  1.82it/s][A
Iteration:  31%|███       | 514/1668 [04:39<10:33,  1.82it/s][A
Iteration:  31%|███       | 515/1668 [04:39<10:32,  1.82it/s][A
Iteration:  31%|███     

{"loss": 0.2891820684103295, "learning_rate": 1.4004796163069546e-05, "epoch": 0.5995203836930456, "step": 1000}



Iteration:  60%|██████    | 1001/1668 [09:03<06:25,  1.73it/s][A
Iteration:  60%|██████    | 1002/1668 [09:04<06:18,  1.76it/s][A
Iteration:  60%|██████    | 1003/1668 [09:04<06:13,  1.78it/s][A
Iteration:  60%|██████    | 1004/1668 [09:05<06:10,  1.79it/s][A
Iteration:  60%|██████    | 1005/1668 [09:05<06:07,  1.80it/s][A
Iteration:  60%|██████    | 1006/1668 [09:06<05:59,  1.84it/s][A
Iteration:  60%|██████    | 1007/1668 [09:06<05:56,  1.85it/s][A
Iteration:  60%|██████    | 1008/1668 [09:07<05:56,  1.85it/s][A
Iteration:  60%|██████    | 1009/1668 [09:07<05:58,  1.84it/s][A
Iteration:  61%|██████    | 1010/1668 [09:08<05:58,  1.84it/s][A
Iteration:  61%|██████    | 1011/1668 [09:08<05:58,  1.83it/s][A
Iteration:  61%|██████    | 1012/1668 [09:09<05:57,  1.83it/s][A
Iteration:  61%|██████    | 1013/1668 [09:09<05:56,  1.84it/s][A
Iteration:  61%|██████    | 1014/1668 [09:10<05:56,  1.84it/s][A
Iteration:  61%|██████    | 1015/1668 [09:11<05:55,  1.84it/s][A
Iteration

{"loss": 0.22568737766169944, "learning_rate": 1.1007194244604318e-05, "epoch": 0.8992805755395683, "step": 1500}



Iteration:  90%|████████▉ | 1501/1668 [13:34<01:36,  1.73it/s][A
Iteration:  90%|█████████ | 1502/1668 [13:35<01:34,  1.76it/s][A
Iteration:  90%|█████████ | 1503/1668 [13:35<01:32,  1.78it/s][A
Iteration:  90%|█████████ | 1504/1668 [13:36<01:31,  1.79it/s][A
Iteration:  90%|█████████ | 1505/1668 [13:36<01:30,  1.80it/s][A
Iteration:  90%|█████████ | 1506/1668 [13:37<01:29,  1.81it/s][A
Iteration:  90%|█████████ | 1507/1668 [13:37<01:28,  1.81it/s][A
Iteration:  90%|█████████ | 1508/1668 [13:38<01:28,  1.80it/s][A
Iteration:  90%|█████████ | 1509/1668 [13:38<01:27,  1.83it/s][A
Iteration:  91%|█████████ | 1510/1668 [13:39<01:25,  1.84it/s][A
Iteration:  91%|█████████ | 1511/1668 [13:39<01:25,  1.83it/s][A
Iteration:  91%|█████████ | 1512/1668 [13:40<01:25,  1.83it/s][A
Iteration:  91%|█████████ | 1513/1668 [13:41<01:23,  1.87it/s][A
Iteration:  91%|█████████ | 1514/1668 [13:41<01:22,  1.88it/s][A
Iteration:  91%|█████████ | 1515/1668 [13:42<01:21,  1.87it/s][A
Iteration

{"loss": 0.16795708821783772, "learning_rate": 8.00959232613909e-06, "epoch": 1.1990407673860912, "step": 2000}



Iteration:  20%|█▉        | 333/1668 [03:01<13:12,  1.68it/s][A
Iteration:  20%|██        | 334/1668 [03:01<12:51,  1.73it/s][A
Iteration:  20%|██        | 335/1668 [03:02<12:37,  1.76it/s][A
Iteration:  20%|██        | 336/1668 [03:02<12:27,  1.78it/s][A
Iteration:  20%|██        | 337/1668 [03:03<12:19,  1.80it/s][A
Iteration:  20%|██        | 338/1668 [03:04<12:10,  1.82it/s][A
Iteration:  20%|██        | 339/1668 [03:04<12:04,  1.83it/s][A
Iteration:  20%|██        | 340/1668 [03:05<12:05,  1.83it/s][A
Iteration:  20%|██        | 341/1668 [03:05<12:06,  1.83it/s][A
Iteration:  21%|██        | 342/1668 [03:06<12:06,  1.83it/s][A
Iteration:  21%|██        | 343/1668 [03:06<12:05,  1.83it/s][A
Iteration:  21%|██        | 344/1668 [03:07<12:03,  1.83it/s][A
Iteration:  21%|██        | 345/1668 [03:07<12:04,  1.83it/s][A
Iteration:  21%|██        | 346/1668 [03:08<12:04,  1.83it/s][A
Iteration:  21%|██        | 347/1668 [03:08<12:02,  1.83it/s][A
Iteration:  21%|██      

{"loss": 0.13271094839542638, "learning_rate": 5.011990407673861e-06, "epoch": 1.498800959232614, "step": 2500}



Iteration:  50%|████▉     | 833/1668 [07:34<07:51,  1.77it/s][A
Iteration:  50%|█████     | 834/1668 [07:34<07:45,  1.79it/s][A
Iteration:  50%|█████     | 835/1668 [07:35<07:41,  1.81it/s][A
Iteration:  50%|█████     | 836/1668 [07:36<07:37,  1.82it/s][A
Iteration:  50%|█████     | 837/1668 [07:36<07:35,  1.83it/s][A
Iteration:  50%|█████     | 838/1668 [07:37<07:33,  1.83it/s][A
Iteration:  50%|█████     | 839/1668 [07:37<07:32,  1.83it/s][A
Iteration:  50%|█████     | 840/1668 [07:38<07:32,  1.83it/s][A
Iteration:  50%|█████     | 841/1668 [07:38<07:30,  1.84it/s][A
Iteration:  50%|█████     | 842/1668 [07:39<07:30,  1.83it/s][A
Iteration:  51%|█████     | 843/1668 [07:39<07:29,  1.84it/s][A
Iteration:  51%|█████     | 844/1668 [07:40<07:27,  1.84it/s][A
Iteration:  51%|█████     | 845/1668 [07:40<07:27,  1.84it/s][A
Iteration:  51%|█████     | 846/1668 [07:41<07:27,  1.84it/s][A
Iteration:  51%|█████     | 847/1668 [07:42<07:27,  1.84it/s][A
Iteration:  51%|█████   

{"loss": 0.11166536170669134, "learning_rate": 2.0143884892086333e-06, "epoch": 1.7985611510791366, "step": 3000}



Iteration:  80%|███████▉  | 1332/1668 [12:08<08:28,  1.51s/it][A
Iteration:  80%|███████▉  | 1333/1668 [12:09<06:50,  1.22s/it][A
Iteration:  80%|███████▉  | 1334/1668 [12:09<05:40,  1.02s/it][A
Iteration:  80%|████████  | 1335/1668 [12:10<04:51,  1.14it/s][A
Iteration:  80%|████████  | 1336/1668 [12:10<04:17,  1.29it/s][A
Iteration:  80%|████████  | 1337/1668 [12:11<03:53,  1.42it/s][A
Iteration:  80%|████████  | 1338/1668 [12:11<03:36,  1.52it/s][A
Iteration:  80%|████████  | 1339/1668 [12:12<03:25,  1.60it/s][A
Iteration:  80%|████████  | 1340/1668 [12:12<03:16,  1.67it/s][A
Iteration:  80%|████████  | 1341/1668 [12:13<03:10,  1.71it/s][A
Iteration:  80%|████████  | 1342/1668 [12:13<03:06,  1.75it/s][A
Iteration:  81%|████████  | 1343/1668 [12:14<03:03,  1.77it/s][A
Iteration:  81%|████████  | 1344/1668 [12:14<03:00,  1.79it/s][A
Iteration:  81%|████████  | 1345/1668 [12:15<02:58,  1.81it/s][A
Iteration:  81%|████████  | 1346/1668 [12:16<02:57,  1.82it/s][A
Iteration

TrainOutput(global_step=3336, training_loss=0.21281198457132977)

In [12]:
task_names = ['offensive','hatespeech','hatespeech_classes']

In [13]:
import datasets

In [14]:
f1 = datasets.load_metric('f1')

In [15]:
recall = datasets.load_metric('recall')

In [16]:
precision = datasets.load_metric('precision')

In [17]:
preds_dict = {}
for task_name in task_names:
    eval_dataloader = DataLoaderWithTaskname(
        task_name,
        trainer.get_eval_dataloader(eval_dataset=features_dict[task_name]["test"])
    )
    print(eval_dataloader.data_loader.collate_fn)
    preds_dict[task_name] = trainer._prediction_loop(
        eval_dataloader, 
        description=f"Test: {task_name}",
    )

Test: offensive:   1%|▏         | 2/159 [00:00<00:15, 10.40it/s]

<bound method NLPDataCollator.collate_batch of <model.NLPDataCollator object at 0x7ffef9fbb070>>


Test: offensive: 100%|██████████| 159/159 [00:13<00:00, 11.91it/s]
Test: hatespeech:   1%|▏         | 2/159 [00:00<00:13, 11.97it/s]

<bound method NLPDataCollator.collate_batch of <model.NLPDataCollator object at 0x7ffef9fbb070>>


Test: hatespeech: 100%|██████████| 159/159 [00:13<00:00, 11.88it/s]
Test: hatespeech_classes:   1%|▏         | 2/159 [00:00<00:13, 11.92it/s]

<bound method NLPDataCollator.collate_batch of <model.NLPDataCollator object at 0x7ffef9fbb070>>


Test: hatespeech_classes: 100%|██████████| 159/159 [00:13<00:00, 11.83it/s]


### 2 Epochs

In [18]:
f1.compute(predictions=np.argmax(preds_dict["offensive"].predictions, axis=1),  references=preds_dict["offensive"].label_ids, average='macro' )

{'f1': 0.8418723545933512}

In [19]:
f1.compute(predictions=np.argmax(preds_dict["hatespeech"].predictions, axis=1),  references=preds_dict["hatespeech"].label_ids, average='macro' )

{'f1': 0.8371149406524729}

In [20]:
f1.compute(predictions=np.argmax(preds_dict["hatespeech_classes"].predictions, axis=1),  references=preds_dict["hatespeech_classes"].label_ids, average='macro' )

{'f1': 0.3926597611174843}

### 3 Epochs

In [21]:
f1.compute(predictions=np.argmax(preds_dict["offensive"].predictions, axis=1),  references=preds_dict["offensive"].label_ids, average='macro' )

{'f1': 0.8418723545933512}

In [27]:
f1.compute(predictions=np.argmax(preds_dict["hatespeech"].predictions, axis=1),  references=preds_dict["hatespeech"].label_ids, average='macro' )

{'f1': 0.8325985296056062}

In [28]:
f1.compute(predictions=np.argmax(preds_dict["hatespeech_classes"].predictions, axis=1),  references=preds_dict["hatespeech_classes"].label_ids, average='macro' )

{'f1': 0.4176057674898591}

### 4 Epochs

In [25]:
f1.compute(predictions=np.argmax(preds_dict["offensive"].predictions, axis=1),  references=preds_dict["offensive"].label_ids, average='macro' )

{'f1': 0.8469622661091527}

In [27]:
f1.compute(predictions=np.argmax(preds_dict["hatespeech"].predictions, axis=1),  references=preds_dict["hatespeech"].label_ids, average='macro' )

{'f1': 0.8325985296056062}

In [28]:
f1.compute(predictions=np.argmax(preds_dict["hatespeech_classes"].predictions, axis=1),  references=preds_dict["hatespeech_classes"].label_ids, average='macro' )

{'f1': 0.4176057674898591}

### 5 Epochs

In [43]:
f1.compute(predictions=np.argmax(preds_dict["offensive"].predictions, axis=1),  references=preds_dict["offensive"].label_ids, average='macro' )

{'f1': 0.8387301587301587}

In [44]:
f1.compute(predictions=np.argmax(preds_dict["hatespeech"].predictions, axis=1),  references=preds_dict["hatespeech"].label_ids, average='macro' )

{'f1': 0.8107379458902904}

In [45]:
f1.compute(predictions=np.argmax(preds_dict["hatespeech_classes"].predictions, axis=1),  references=preds_dict["hatespeech_classes"].label_ids, average='macro' )

{'f1': 0.4355893433350976}

## Loading

In [46]:
def load_model(dot_bin_file):
    multitask_model = MultitaskModel.create(
                                            model_name=model_name,
                                            model_type_dict={
                                                "offensive": transformers.AutoModelForSequenceClassification,
                                                "hatespeech": transformers.AutoModelForSequenceClassification,
                                                "hatespeech_classes": transformers.AutoModelForSequenceClassification,
                                            },
                                            model_config_dict={
                                                "offensive": transformers.AutoConfig.from_pretrained(model_name, num_labels=2),
                                                "hatespeech": transformers.AutoConfig.from_pretrained(model_name, num_labels=2),
                                                "hatespeech_classes": transformers.AutoConfig.from_pretrained(model_name, num_labels=7),
                                            },)
    model = torch.load(dot_bin_file)
    multitask_model.load_state_dict(model)
    return multitask_model

## Sumbission

In [17]:
%cd /scratch/mt/ashapiro/Hate_Speech/Multitask_trial/

/scratch/mt/ashapiro/Hate_Speech/Multitask_trial


In [2]:
import numpy as np
import torch
import torch.nn as nn
import transformers
import nlp
import logging
from datasets import load_dataset
from model import * 
logging.basicConfig(level=logging.INFO)

In [52]:
model_check_point = "./models/multitask_model/3_main_tasks/large_data/checkpoint-6000/pytorch_model.bin"

In [45]:
model_name = "/scratch/mt/ashapiro/Hate_Speech/Models/Marbertv2/"
multitask_model = MultitaskModel.create(
    model_name=model_name,
    model_type_dict={
        "offensive": transformers.AutoModelForSequenceClassification,
        "hatespeech": transformers.AutoModelForSequenceClassification,
        "hatespeech_classes": transformers.AutoModelForSequenceClassification,
    },
    model_config_dict={
        "offensive": transformers.AutoConfig.from_pretrained(model_name, num_labels=2),
        "hatespeech": transformers.AutoConfig.from_pretrained(model_name, num_labels=2),
        "hatespeech_classes": transformers.AutoConfig.from_pretrained(model_name, num_labels=7),
    },
)


FileNotFoundError: [Errno 2] No such file or directory: '/models/multitask_model/3_main_tasks/large_data/checkpoint-6000/'

In [53]:
model = torch.load(model_check_point)
multitask_model.load_state_dict(model)

<All keys matched successfully>

In [29]:
%pwd

'/scratch/mt/ashapiro/Hate_Speech/Multitask_trial'

In [30]:
import pandas as pd

In [31]:
df = pd.read_csv("test_final_B.csv",header=0)

In [59]:
df.to_csv("test_final_B.csv", index=False)

In [32]:
df

Unnamed: 0,text,labels
0,هتهزر معايا ولا ايه 😡,0
1,مشفتش العرض بتاعهم لا مش مهتمة لا 😩 🐸 😂,0
2,عندما تكون لوحدك تحس انك لحالك صح 😊 حتى انا مث...,0
3,ماشاء الله الرجال باين عليه محترم البنات متى ت...,0
4,شسالفة احد يفهمني 😤,0
...,...,...
2536,قله حيا وين اهلهم ذولي الله لايبلانا لهالدرجه ...,0
2537,ثم الطحلبه 🐸,0
2538,يا وجه الله 😷 من اليوم ورايح شاورما انسى 🔪,0
2539,متخلف حتى الحلال حرمتوه 😷,0


In [36]:
dataset_dict = {
    "hatespeech_classes": load_dataset("csv", data_files={'train': "Data/trainB_prepro.csv", 'test': "test_final_B.csv" } )}

100%|██████████| 2/2 [00:00<00:00, 592.12it/s]


In [37]:
max_length = 512

def convert_to_features(example_batch):
    inputs = list(example_batch['text'])
    features = tokenizer.batch_encode_plus(
        inputs, max_length=max_length, pad_to_max_length=True
    )
    features["labels"] = example_batch["labels"]
    return features

convert_func_dict = {
    "hatespeech_classes": convert_to_features,
}

In [38]:
columns_dict = {
    "hatespeech_classes": ['input_ids', 'attention_mask', 'labels'],
}

features_dict = {}
for task_name, dataset in dataset_dict.items():
    features_dict[task_name] = {}
    for phase, phase_dataset in dataset.items():
        features_dict[task_name][phase] = phase_dataset.map(
            convert_func_dict[task_name],
            batched=True,
            load_from_cache_file=False,
        )
        print(task_name, phase, len(phase_dataset), len(features_dict[task_name][phase]))
        features_dict[task_name][phase].set_format(
            type="torch", 
            columns=columns_dict[task_name],
        )
        print(task_name, phase, len(phase_dataset), len(features_dict[task_name][phase]))

100%|██████████| 9/9 [00:04<00:00,  2.08ba/s]


hatespeech_classes train 8887 8887
hatespeech_classes train 8887 8887


100%|██████████| 3/3 [00:01<00:00,  2.75ba/s]

hatespeech_classes test 2541 2541
hatespeech_classes test 2541 2541





In [39]:
preds_dict = {}
for task_name in ['hatespeech_classes']:
    eval_dataloader = DataLoaderWithTaskname(
        task_name,
        trainer.get_eval_dataloader(eval_dataset=features_dict[task_name]["test"])
    )
    print(eval_dataloader.data_loader.collate_fn)
    preds_dict[task_name] = trainer._prediction_loop(
        eval_dataloader, 
        description=f"Test: {task_name}",
    )

<bound method NLPDataCollator.collate_batch of <model.NLPDataCollator object at 0x7fffaf64b610>>


Test: hatespeech_classes: 100%|██████████| 318/318 [00:26<00:00, 11.91it/s]


In [40]:
predictions=np.argmax(preds_dict["hatespeech_classes"].predictions, axis=1)

In [46]:
labels = ['NOT_HS' if x == 0 else f'HS{x}' for x in predictions]

In [47]:
len(labels)

2541

In [49]:
file_to_write = "\n".join(labels)

In [50]:
f = open("testCpreds.txt", 'w') 

In [51]:
f.write(file_to_write)

17012

In [52]:
f.close()

In [53]:
!wc -l testBpreds.txt

2541 testBpreds.txt
