In [1]:
import pandas as pd
import torch
import os

from torch.utils.data import Dataset
from transformers import (BertTokenizer, BertForSequenceClassification, Trainer,
                          TrainingArguments, BertPreTrainedModel)
# from simpletransformers.language_modeling import LanguageModelingModel
from sklearn.metrics import accuracy_score, f1_score


In [2]:
print(torch.cuda.is_available())
print(torch.cuda.get_device_name(0))
print(torch.cuda.get_device_properties(0))

True
NVIDIA GeForce RTX 3060
_CudaDeviceProperties(name='NVIDIA GeForce RTX 3060', major=8, minor=6, total_memory=12050MB, multi_processor_count=28)


In [3]:
MODEL_NAME = '/home/abdullah/Code/dl/499A/best_models/epoch_3_merged_dataset_tinybert'
TRAIN_FILE_LOC = '/home/abdullah/Code/dl/bnlp-resources/sentiment/CogniSenti/twitter_fbPost_merged_train.tsv'
TEST_FILE_LOC = '/home/abdullah/Code/dl/bnlp-resources/sentiment/CogniSenti/twitter_fbPost_merged_test.tsv'
EVAL_FILE_LOC = '/home/abdullah/Code/dl/bnlp-resources/sentiment/CogniSenti/twitter_fbPost_merged_dev.tsv'


In [4]:
def tsv_to_text(tsv_file_loc):
    file_name = tsv_file_loc.split("/")[-1].split(".")[0]
    txt_name = tsv_file_loc.replace(".tsv", ".txt")
    txt_name = txt_name.replace("split_merged", "texts")

    if os.path.exists(txt_name):
        return [txt_name, file_name]

    df = pd.read_csv(tsv_file_loc, sep="\t")

    for txt in df["text"]:
        with open(txt_name, "a", encoding="utf8") as f:
            f.writelines(txt + "\n")
    return [txt_name, file_name]


tsv_to_text(TRAIN_FILE_LOC)


['/home/abdullah/Code/dl/bnlp-resources/sentiment/CogniSenti/twitter_fbPost_merged_train.txt',
 'twitter_fbPost_merged_train']

In [5]:
def tsv_to_df(csv_file_loc):
    df = pd.read_csv(csv_file_loc, sep='\t')
    # remove id column
    df = df.drop(columns=['id'])

    # replace neutral with 0, positive with 1, negative with 2 in class_label column
    df.loc[df['class_label'] == 'Neutral', 'class_label'] = 0
    df.loc[df['class_label'] == 'Positive', 'class_label'] = 1
    df.loc[df['class_label'] == 'Negative', 'class_label'] = 2

    return df


train_df = tsv_to_df(TEST_FILE_LOC)

texts = train_df['text'].tolist()
print(train_df)


                                                  text class_label
0    ফ্রাঁন্সের সেই শক্তী-ধর রাজা লুই-কেত্রোজ ( রাজ...           2
1    ৯৯% রাই ভুল করে এবার আপনাদের পালা!!! #আইকিউ_টে...           0
2    ঢাকা শহরের হাফ ভাগ মানুষ বাকি আড়াই ভাগ অমানুষ...           2
3    ছোট বেলায় দেখেছি চোর কে গলায় জুতার মালা পড়ি...           2
4    ..ভাব যদি কোথায় সে ঘাসের আশ্রয়ে চলে গেল - ভা...           0
..                                                 ...         ...
981  বিভিন্ন দেশের মুদ্রার নাম ➺ বাংলাদেশ – টাকা। ➺...           0
982  @Im_acs আমি ৮ বছর ধরে প্রবাসে আছি এখনো জাই নি ...           2
983  ১৭৫৭সন ২৩সে জুন পলাশী থেকে ১৬ ই ডিসেম্বর /১৯৭১...           0
984  সবুজ পাহাড়ে নজরকাড়া ফুল। পানখাইয়া পাড়া, খাগড়াছ...           1
985  ধা‌র কর‌তে কর‌তে জীবন টা ম‌নে হয় শেষ হ‌য়ে যা...           2

[986 rows x 2 columns]


In [6]:
def compute_metrics(pred):
  labels = pred.label_ids
  preds = pred.predictions.argmax(-1)
  # calculate accuracy using sklearn's function
  acc = accuracy_score(labels, preds)
  return {
      'accuracy': acc,
  }


In [7]:
def f1_calculator(pred):
  labels = pred.label_ids
  preds = pred.predictions.argmax(-1)
  # calculate accuracy using sklearn's function
  f1 = f1_score(labels, preds, average='weighted')
  return {
      'f1': f1,
  }


In [8]:
train_list = tsv_to_text(TRAIN_FILE_LOC)
test_list = tsv_to_text(TEST_FILE_LOC)
eval_list = tsv_to_text(EVAL_FILE_LOC)


In [9]:
os.environ["WANDB_DISABLED"] = "true"

train_df = tsv_to_df(TRAIN_FILE_LOC)
test_df = tsv_to_df(TEST_FILE_LOC)
eval_df = tsv_to_df(EVAL_FILE_LOC)


class TINYDataset(Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(value[idx])
                for key, value in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)


tokenizer = BertTokenizer.from_pretrained(MODEL_NAME)

train_encodings = tokenizer(train_df['text'].tolist(
), padding=True, truncation=True, max_length=512)
test_encodings = tokenizer(test_df['text'].tolist(
), padding=True, truncation=True, max_length=512)
eval_encodings = tokenizer(eval_df['text'].tolist(
), padding=True, truncation=True, max_length=512)

train_dataset = TINYDataset(train_encodings, train_df['class_label'].tolist())
test_dataset = TINYDataset(test_encodings, test_df['class_label'].tolist())
eval_dataset = TINYDataset(eval_encodings, eval_df['class_label'].tolist())

model = BertForSequenceClassification.from_pretrained(MODEL_NAME, num_labels=3).to('cuda')
model.manual_seed = 14


Some weights of the model checkpoint at /home/abdullah/Code/dl/499A/best_models/epoch_3_merged_dataset_tinybert were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.bias', 'cls.predictions.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized fr

## 1 Epoch Train

In [10]:
# training_args = TrainingArguments(
#     output_dir=f"temp",
#     num_train_epochs=1,
#     per_device_train_batch_size=48,
#     per_device_eval_batch_size=96,
#     warmup_steps=500,
#     learning_rate=5e-5,
#     weight_decay=0.01,
#     overwrite_output_dir=True,
#     logging_dir=f"temp/logs",
#     logging_steps=96,
#     save_steps=96,
#     load_best_model_at_end=True,
#     evaluation_strategy="steps",
#     seed=14,
# )

# trainer = Trainer(
#     model=model,
#     args=training_args,
#     train_dataset=train_dataset,
#     eval_dataset=eval_dataset

# )

# trainer.train()


## 100 Epoch with 1 epoch model

In [11]:
MODEL_NAME = '/home/abdullah/Code/dl/499A/best_models/1/cogni_sentiment'
model = BertForSequenceClassification.from_pretrained(
    MODEL_NAME, num_labels=3).to('cuda')
model.manual_seed = 14

for name, param in model.named_parameters():
    if 'bert.encoder.layer.0' in name:
        # print(name, param.requires_grad)
        param.requires_grad = False
# print("\n\n\n")
# for name, param in model.named_parameters():
#     print(name, param.requires_grad)

training_args = TrainingArguments(
    output_dir=f"temp",
    num_train_epochs=100,
    per_device_train_batch_size=192,
    per_device_eval_batch_size=384,
    warmup_steps=500,
    learning_rate=5e-5,
    weight_decay=0.01,
    overwrite_output_dir=True,
    logging_dir=f"temp/logs",
    logging_steps=400,
    save_steps=400,
    load_best_model_at_end=True,
    evaluation_strategy="steps",
    seed=14,
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    compute_metrics=f1_calculator,

)

trainer.train()

# print(cool.metrics)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    compute_metrics=compute_metrics,

)

trainer.train()


Using the `WAND_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
***** Running training *****
  Num examples = 4599
  Num Epochs = 100
  Instantaneous batch size per device = 192
  Total train batch size (w. parallel, distributed & accumulation) = 192
  Gradient Accumulation steps = 1
  Total optimization steps = 2400
 17%|█▋        | 400/2400 [01:10<05:49,  5.72it/s]***** Running Evaluation *****
  Num examples = 985
  Batch size = 384


{'loss': 68.9622, 'learning_rate': 4e-05, 'epoch': 16.67}


                                                  
 17%|█▋        | 400/2400 [01:10<05:49,  5.72it/s]Saving model checkpoint to temp/checkpoint-400
Configuration saved in temp/checkpoint-400/config.json


{'eval_loss': 29.214954376220703, 'eval_f1': 0.43137429667915755, 'eval_runtime': 0.3753, 'eval_samples_per_second': 2624.743, 'eval_steps_per_second': 7.994, 'epoch': 16.67}


Model weights saved in temp/checkpoint-400/pytorch_model.bin
 33%|███▎      | 800/2400 [02:21<04:40,  5.70it/s]***** Running Evaluation *****
  Num examples = 985
  Batch size = 384


{'loss': 44.9134, 'learning_rate': 4.210526315789474e-05, 'epoch': 33.33}


                                                  
 33%|███▎      | 800/2400 [02:22<04:40,  5.70it/s]Saving model checkpoint to temp/checkpoint-800
Configuration saved in temp/checkpoint-800/config.json


{'eval_loss': 21.27397346496582, 'eval_f1': 0.4151399359918267, 'eval_runtime': 0.3749, 'eval_samples_per_second': 2627.068, 'eval_steps_per_second': 8.001, 'epoch': 33.33}


Model weights saved in temp/checkpoint-800/pytorch_model.bin
 50%|█████     | 1200/2400 [03:32<03:26,  5.80it/s]***** Running Evaluation *****
  Num examples = 985
  Batch size = 384


{'loss': 39.6746, 'learning_rate': 3.157894736842105e-05, 'epoch': 50.0}


                                                   
 50%|█████     | 1200/2400 [03:32<03:26,  5.80it/s]Saving model checkpoint to temp/checkpoint-1200
Configuration saved in temp/checkpoint-1200/config.json


{'eval_loss': 19.15365982055664, 'eval_f1': 0.43932719558128625, 'eval_runtime': 0.3749, 'eval_samples_per_second': 2627.497, 'eval_steps_per_second': 8.003, 'epoch': 50.0}


Model weights saved in temp/checkpoint-1200/pytorch_model.bin
 67%|██████▋   | 1600/2400 [04:43<02:17,  5.80it/s]***** Running Evaluation *****
  Num examples = 985
  Batch size = 384


{'loss': 38.0885, 'learning_rate': 2.105263157894737e-05, 'epoch': 66.67}


                                                   
 67%|██████▋   | 1600/2400 [04:43<02:17,  5.80it/s]Saving model checkpoint to temp/checkpoint-1600
Configuration saved in temp/checkpoint-1600/config.json


{'eval_loss': 16.503175735473633, 'eval_f1': 0.509064688004136, 'eval_runtime': 0.3648, 'eval_samples_per_second': 2700.38, 'eval_steps_per_second': 8.225, 'epoch': 66.67}


Model weights saved in temp/checkpoint-1600/pytorch_model.bin
 83%|████████▎ | 2000/2400 [05:53<01:10,  5.70it/s]***** Running Evaluation *****
  Num examples = 985
  Batch size = 384


{'loss': 37.1556, 'learning_rate': 1.0526315789473684e-05, 'epoch': 83.33}


                                                   
 83%|████████▎ | 2000/2400 [05:53<01:10,  5.70it/s]Saving model checkpoint to temp/checkpoint-2000
Configuration saved in temp/checkpoint-2000/config.json


{'eval_loss': 18.429956436157227, 'eval_f1': 0.47646174366795, 'eval_runtime': 0.3758, 'eval_samples_per_second': 2620.914, 'eval_steps_per_second': 7.982, 'epoch': 83.33}


Model weights saved in temp/checkpoint-2000/pytorch_model.bin
100%|██████████| 2400/2400 [07:05<00:00,  5.80it/s]***** Running Evaluation *****
  Num examples = 985
  Batch size = 384


{'loss': 36.7251, 'learning_rate': 0.0, 'epoch': 100.0}


                                                   
100%|██████████| 2400/2400 [07:05<00:00,  5.80it/s]Saving model checkpoint to temp/checkpoint-2400
Configuration saved in temp/checkpoint-2400/config.json
Model weights saved in temp/checkpoint-2400/pytorch_model.bin


{'eval_loss': 16.3358097076416, 'eval_f1': 0.48859930055151735, 'eval_runtime': 0.3752, 'eval_samples_per_second': 2624.931, 'eval_steps_per_second': 7.995, 'epoch': 100.0}




Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from temp/checkpoint-2400 (score: 16.3358097076416).
100%|██████████| 2400/2400 [07:06<00:00,  5.63it/s]
***** Running training *****
  Num examples = 4599
  Num Epochs = 100
  Instantaneous batch size per device = 192
  Total train batch size (w. parallel, distributed & accumulation) = 192
  Gradient Accumulation steps = 1
  Total optimization steps = 2400


{'train_runtime': 426.2698, 'train_samples_per_second': 1078.894, 'train_steps_per_second': 5.63, 'train_loss': 44.253236083984376, 'epoch': 100.0}


 17%|█▋        | 400/2400 [01:09<05:50,  5.71it/s]***** Running Evaluation *****
  Num examples = 985
  Batch size = 384


{'loss': 36.0846, 'learning_rate': 4e-05, 'epoch': 16.67}



 17%|█▋        | 400/2400 [01:10<05:50,  5.71it/s]Saving model checkpoint to temp/checkpoint-400
Configuration saved in temp/checkpoint-400/config.json


{'eval_loss': 14.081541061401367, 'eval_accuracy': 0.6, 'eval_runtime': 0.378, 'eval_samples_per_second': 2605.698, 'eval_steps_per_second': 7.936, 'epoch': 16.67}


Model weights saved in temp/checkpoint-400/pytorch_model.bin
 33%|███▎      | 800/2400 [02:21<04:38,  5.75it/s]***** Running Evaluation *****
  Num examples = 985
  Batch size = 384


{'loss': 36.3287, 'learning_rate': 4.210526315789474e-05, 'epoch': 33.33}



 33%|███▎      | 800/2400 [02:21<04:38,  5.75it/s]Saving model checkpoint to temp/checkpoint-800
Configuration saved in temp/checkpoint-800/config.json


{'eval_loss': 23.655437469482422, 'eval_accuracy': 0.6538071065989848, 'eval_runtime': 0.3748, 'eval_samples_per_second': 2627.768, 'eval_steps_per_second': 8.003, 'epoch': 33.33}


Model weights saved in temp/checkpoint-800/pytorch_model.bin
 50%|█████     | 1200/2400 [03:32<03:26,  5.81it/s]***** Running Evaluation *****
  Num examples = 985
  Batch size = 384


{'loss': 35.9088, 'learning_rate': 3.157894736842105e-05, 'epoch': 50.0}



 50%|█████     | 1200/2400 [03:32<03:26,  5.81it/s]Saving model checkpoint to temp/checkpoint-1200
Configuration saved in temp/checkpoint-1200/config.json
Model weights saved in temp/checkpoint-1200/pytorch_model.bin


{'eval_loss': 16.0231990814209, 'eval_accuracy': 0.6548223350253807, 'eval_runtime': 0.3804, 'eval_samples_per_second': 2589.396, 'eval_steps_per_second': 7.886, 'epoch': 50.0}


 67%|██████▋   | 1600/2400 [04:43<02:20,  5.69it/s]***** Running Evaluation *****
  Num examples = 985
  Batch size = 384


{'loss': 35.1235, 'learning_rate': 2.105263157894737e-05, 'epoch': 66.67}



 67%|██████▋   | 1600/2400 [04:43<02:20,  5.69it/s]Saving model checkpoint to temp/checkpoint-1600
Configuration saved in temp/checkpoint-1600/config.json


{'eval_loss': 15.355647087097168, 'eval_accuracy': 0.649746192893401, 'eval_runtime': 0.3679, 'eval_samples_per_second': 2677.579, 'eval_steps_per_second': 8.155, 'epoch': 66.67}


Model weights saved in temp/checkpoint-1600/pytorch_model.bin
 83%|████████▎ | 2000/2400 [05:53<01:09,  5.79it/s]***** Running Evaluation *****
  Num examples = 985
  Batch size = 384


{'loss': 34.4118, 'learning_rate': 1.0526315789473684e-05, 'epoch': 83.33}



 83%|████████▎ | 2000/2400 [05:54<01:09,  5.79it/s]Saving model checkpoint to temp/checkpoint-2000
Configuration saved in temp/checkpoint-2000/config.json


{'eval_loss': 14.646716117858887, 'eval_accuracy': 0.6761421319796954, 'eval_runtime': 0.3728, 'eval_samples_per_second': 2642.288, 'eval_steps_per_second': 8.048, 'epoch': 83.33}


Model weights saved in temp/checkpoint-2000/pytorch_model.bin
100%|██████████| 2400/2400 [07:04<00:00,  5.77it/s]***** Running Evaluation *****
  Num examples = 985
  Batch size = 384


{'loss': 34.3829, 'learning_rate': 0.0, 'epoch': 100.0}



100%|██████████| 2400/2400 [07:05<00:00,  5.77it/s]Saving model checkpoint to temp/checkpoint-2400
Configuration saved in temp/checkpoint-2400/config.json


{'eval_loss': 15.76189136505127, 'eval_accuracy': 0.649746192893401, 'eval_runtime': 0.3642, 'eval_samples_per_second': 2704.338, 'eval_steps_per_second': 8.237, 'epoch': 100.0}


Model weights saved in temp/checkpoint-2400/pytorch_model.bin


Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from temp/checkpoint-400 (score: 14.081541061401367).
100%|██████████| 2400/2400 [07:05<00:00,  5.64it/s]

{'train_runtime': 425.8822, 'train_samples_per_second': 1079.876, 'train_steps_per_second': 5.635, 'train_loss': 35.37337849934896, 'epoch': 100.0}





TrainOutput(global_step=2400, training_loss=35.37337849934896, metrics={'train_runtime': 425.8822, 'train_samples_per_second': 1079.876, 'train_steps_per_second': 5.635, 'train_loss': 35.37337849934896, 'epoch': 100.0})