In [2]:
import pandas as pd
import torch
import os

from torch.utils.data import Dataset
from transformers import (BertTokenizer, BertForSequenceClassification, Trainer,
                          TrainingArguments, BertPreTrainedModel)
# from simpletransformers.language_modeling import LanguageModelingModel
from sklearn.metrics import accuracy_score, f1_score


In [3]:
print(torch.cuda.is_available())
print(torch.cuda.get_device_name(0))
print(torch.cuda.get_device_properties(0))

True
NVIDIA GeForce RTX 3060
_CudaDeviceProperties(name='NVIDIA GeForce RTX 3060', major=8, minor=6, total_memory=12050MB, multi_processor_count=28)


In [4]:
MODEL_NAME = '/home/abdullah/Code/dl/499A/best_models/epoch_3_merged_dataset_tinybert'

In [5]:
def tsv_to_text(tsv_file_loc):
    file_name = tsv_file_loc.split("/")[-1].split(".")[0]
    txt_name = tsv_file_loc.replace(".tsv", ".txt")
    txt_name = txt_name.replace("split_merged", "texts")

    if os.path.exists(txt_name):
        return [txt_name, file_name]

    df = pd.read_csv(tsv_file_loc, sep="\t")

    for txt in df["text"]:
        with open(txt_name, "a", encoding="utf8") as f:
            f.writelines(txt + "\n")
    return [txt_name, file_name]


tsv_to_text("/home/abdullah/Code/dl/bnlp-resources/sentiment/ABSA_datasets/BASA_restaurant_train.tsv")


['/home/abdullah/Code/dl/bnlp-resources/sentiment/ABSA_datasets/BASA_restaurant_train.txt',
 'BASA_restaurant_train']

In [6]:
def tsv_to_df(csv_file_loc):
    df = pd.read_csv(csv_file_loc, sep='\t')
    # remove id column
    df = df.drop(columns=['id'])

    # replace neutral with 0, positive with 1, negative with 2 in class_label column
    df.loc[df['class_label'] == 'neutral', 'class_label'] = 0
    df.loc[df['class_label'] == 'positive', 'class_label'] = 1
    df.loc[df['class_label'] == 'negative', 'class_label'] = 2

    return df


train_df = tsv_to_df(
    "/home/abdullah/Code/dl/bnlp-resources/sentiment/ABSA_datasets/BASA_restaurant_train.tsv")

texts = train_df['text'].tolist()
print(train_df)


                                                   text class_label
0     তাই, ভাল খাবারের জন্য আমিসুপারিশ করতাম, কিন্তু...           1
1     খাঁটি খাবার এবং তাদের কাছে সত্যিকারের সেবা আছে...           1
2     আমি  পাচ বছরেরও বেশি সময় ধরে উপরের হার্টল্যান...           1
3                                 এটা সুপারিশ আমি করেছি           1
4     জানিনা পূর্ববর্তী সমালোচক কে ছিলেন, লং, ডাইন্ড...           1
...                                                 ...         ...
1360  কিন্তু একরকম একটু অপেক্ষা করতে পারতাম,আমরা উদ্...           1
1361              গ্রেট bagels পুরানো-ভাবে তৈরির উপায় ।           1
1362     অ্যাট্রাকিয়াস থাকার কারনে খাবারটি ভয়াবহ ছিল।           2
1363  ওয়েটার আমাদের উপেক্ষা করেছিলেন তা আমরা আমাদের...           2
1364                            এই জায়গাটি ভীশন খারাপ।           2

[1365 rows x 2 columns]


In [7]:
def compute_metrics(pred):
  labels = pred.label_ids
  preds = pred.predictions.argmax(-1)
  # calculate accuracy using sklearn's function
  acc = accuracy_score(labels, preds)
  return {
      'accuracy': acc,
  }


In [8]:
def f1_calculator(pred):
  labels = pred.label_ids
  preds = pred.predictions.argmax(-1)
  # calculate accuracy using sklearn's function
  f1 = f1_score(labels, preds, average='weighted')
  return {
      'f1': f1,
  }


In [9]:
train_list = tsv_to_text("/home/abdullah/Code/dl/bnlp-resources/sentiment/ABSA_datasets/BASA_restaurant_train.tsv")
test_list = tsv_to_text("/home/abdullah/Code/dl/bnlp-resources/sentiment/ABSA_datasets/BASA_restaurant_test.tsv")
eval_list = tsv_to_text("/home/abdullah/Code/dl/bnlp-resources/sentiment/ABSA_datasets/BASA_restaurant_dev.tsv")


In [10]:
os.environ["WANDB_DISABLED"] = "true"

train_df = tsv_to_df(
    "/home/abdullah/Code/dl/bnlp-resources/sentiment/ABSA_datasets/BASA_restaurant_train.tsv")
test_df = tsv_to_df(
    "/home/abdullah/Code/dl/bnlp-resources/sentiment/ABSA_datasets/BASA_restaurant_test.tsv")
eval_df = tsv_to_df(
    "/home/abdullah/Code/dl/bnlp-resources/sentiment/ABSA_datasets/BASA_restaurant_dev.tsv")


class TINYDataset(Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(value[idx])
                for key, value in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)


tokenizer = BertTokenizer.from_pretrained(MODEL_NAME)

train_encodings = tokenizer(train_df['text'].tolist(
), padding=True, truncation=True, max_length=512)
test_encodings = tokenizer(test_df['text'].tolist(
), padding=True, truncation=True, max_length=512)
eval_encodings = tokenizer(eval_df['text'].tolist(
), padding=True, truncation=True, max_length=512)

train_dataset = TINYDataset(train_encodings, train_df['class_label'].tolist())
test_dataset = TINYDataset(test_encodings, test_df['class_label'].tolist())
eval_dataset = TINYDataset(eval_encodings, eval_df['class_label'].tolist())

model = BertForSequenceClassification.from_pretrained(MODEL_NAME, num_labels=3).to('cuda')
model.manual_seed = 14


Some weights of the model checkpoint at /home/abdullah/Code/dl/499A/best_models/epoch_3_merged_dataset_tinybert were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.bias', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized fr

## 1 Epoch Train

In [11]:
# training_args = TrainingArguments(
#     output_dir=f"temp",
#     num_train_epochs=1,
#     per_device_train_batch_size=48,
#     per_device_eval_batch_size=96,
#     warmup_steps=500,
#     learning_rate=5e-5,
#     weight_decay=0.01,
#     overwrite_output_dir=True,
#     logging_dir=f"temp/logs",
#     logging_steps=29,
#     save_steps=29,
#     load_best_model_at_end=True,
#     evaluation_strategy="steps",
#     seed=14,
# )

# trainer = Trainer(
#     model=model,
#     args=training_args,
#     train_dataset=train_dataset,
#     eval_dataset=eval_dataset

# )

# trainer.train()


## 100 Epoch with 1 epoch model

In [36]:
MODEL_NAME = '/home/abdullah/Code/dl/499A/best_models/1/restaurant_sentiment'
model = BertForSequenceClassification.from_pretrained(
    MODEL_NAME, num_labels=3).to('cuda')
model.manual_seed = 14

for name, param in model.named_parameters():
    # print(name)
    # if "weight" in name:
    #     tensor = param.data        
    #     grad_tensor = param.grad       
    #     grad_tensor = torch.where(tensor.abs() < 5e-5, torch.zeros_like(grad_tensor), grad_tensor)
    #     param.grad.data = grad_tensor

    if 'bert.encoder.layer.0' in name:
        # print(name, param.requires_grad)
        param.requires_grad = False
# print("\n\n\n")
# for name, param in model.named_parameters():
#     print(name, param.requires_grad)

training_args = TrainingArguments(
    output_dir=f"temp",
    num_train_epochs=100,
    per_device_train_batch_size=192,
    per_device_eval_batch_size=384,
    warmup_steps=500,
    learning_rate=5e-5,
    weight_decay=0.01,
    overwrite_output_dir=True,
    logging_dir=f"temp/logs",
    logging_steps=100,
    save_steps=100,
    load_best_model_at_end=True,
    evaluation_strategy="steps",
    seed=14,
    label_smoothing_factor=0.5,
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    compute_metrics=f1_calculator,

)

trainer.train()

# print(cool.metrics)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    compute_metrics=compute_metrics,

)

trainer.train()



loading configuration file /home/abdullah/Code/dl/499A/best_models/1/restaurant_sentiment/config.json
Model config BertConfig {
  "_name_or_path": "/home/abdullah/Code/dl/499A/best_models/epoch_3_merged_dataset_tinybert",
  "architectures": [
    "BertForSequenceClassification"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 128,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2"
  },
  "initializer_range": 14,
  "intermediate_size": 512,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2
  },
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 2,
  "num_hidden_layers": 2,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "problem_type": "single_label_classification",
  "torch_dtype": "float32",
  "transformers_version": "4.12.5",
  "type_vocab_size": 2,
  "use_cache": true,
  "

{'loss': 127.9023, 'learning_rate': 1e-05, 'epoch': 12.5}



 12%|█▎        | 100/800 [00:07<00:46, 14.99it/s]Saving model checkpoint to temp/checkpoint-100
Configuration saved in temp/checkpoint-100/config.json


{'eval_loss': 104.49866485595703, 'eval_f1': 0.41876554726368154, 'eval_runtime': 0.0245, 'eval_samples_per_second': 9138.095, 'eval_steps_per_second': 40.795, 'epoch': 12.5}


Model weights saved in temp/checkpoint-100/pytorch_model.bin
 25%|██▌       | 200/800 [00:14<00:41, 14.39it/s]***** Running Evaluation *****
  Num examples = 224
  Batch size = 384


{'loss': 106.0766, 'learning_rate': 2e-05, 'epoch': 25.0}



 25%|██▌       | 200/800 [00:14<00:41, 14.39it/s]Saving model checkpoint to temp/checkpoint-200
Configuration saved in temp/checkpoint-200/config.json


{'eval_loss': 81.363525390625, 'eval_f1': 0.44085130156457086, 'eval_runtime': 0.0304, 'eval_samples_per_second': 7371.071, 'eval_steps_per_second': 32.907, 'epoch': 25.0}


Model weights saved in temp/checkpoint-200/pytorch_model.bin
 38%|███▊      | 300/800 [00:22<00:35, 13.90it/s]***** Running Evaluation *****
  Num examples = 224
  Batch size = 384


{'loss': 92.9538, 'learning_rate': 3e-05, 'epoch': 37.5}



 38%|███▊      | 300/800 [00:22<00:35, 13.90it/s]Saving model checkpoint to temp/checkpoint-300
Configuration saved in temp/checkpoint-300/config.json


{'eval_loss': 49.482242584228516, 'eval_f1': 0.47131867786127796, 'eval_runtime': 0.0286, 'eval_samples_per_second': 7834.329, 'eval_steps_per_second': 34.975, 'epoch': 37.5}


Model weights saved in temp/checkpoint-300/pytorch_model.bin
 50%|█████     | 400/800 [00:30<00:31, 12.70it/s]***** Running Evaluation *****
  Num examples = 224
  Batch size = 384


{'loss': 77.2565, 'learning_rate': 4e-05, 'epoch': 50.0}



 50%|█████     | 400/800 [00:30<00:31, 12.70it/s]Saving model checkpoint to temp/checkpoint-400
Configuration saved in temp/checkpoint-400/config.json


{'eval_loss': 48.27881622314453, 'eval_f1': 0.44067961800104655, 'eval_runtime': 0.0307, 'eval_samples_per_second': 7306.979, 'eval_steps_per_second': 32.62, 'epoch': 50.0}


Model weights saved in temp/checkpoint-400/pytorch_model.bin
 62%|██████▎   | 500/800 [00:40<00:22, 13.20it/s]***** Running Evaluation *****
  Num examples = 224
  Batch size = 384


{'loss': 68.6108, 'learning_rate': 5e-05, 'epoch': 62.5}



 62%|██████▎   | 500/800 [00:40<00:22, 13.20it/s]Saving model checkpoint to temp/checkpoint-500
Configuration saved in temp/checkpoint-500/config.json


{'eval_loss': 30.116487503051758, 'eval_f1': 0.4540488624527826, 'eval_runtime': 0.0363, 'eval_samples_per_second': 6174.337, 'eval_steps_per_second': 27.564, 'epoch': 62.5}


Model weights saved in temp/checkpoint-500/pytorch_model.bin
 75%|███████▌  | 600/800 [00:49<00:13, 14.30it/s]***** Running Evaluation *****
  Num examples = 224
  Batch size = 384


{'loss': 61.8798, 'learning_rate': 3.3333333333333335e-05, 'epoch': 75.0}



 75%|███████▌  | 600/800 [00:49<00:13, 14.30it/s]Saving model checkpoint to temp/checkpoint-600
Configuration saved in temp/checkpoint-600/config.json


{'eval_loss': 31.25191307067871, 'eval_f1': 0.48534705811129897, 'eval_runtime': 0.0262, 'eval_samples_per_second': 8560.038, 'eval_steps_per_second': 38.214, 'epoch': 75.0}


Model weights saved in temp/checkpoint-600/pytorch_model.bin
 88%|████████▊ | 700/800 [00:57<00:06, 15.43it/s]***** Running Evaluation *****
  Num examples = 224
  Batch size = 384


{'loss': 59.3871, 'learning_rate': 1.6666666666666667e-05, 'epoch': 87.5}



 88%|████████▊ | 700/800 [00:57<00:06, 15.43it/s]Saving model checkpoint to temp/checkpoint-700
Configuration saved in temp/checkpoint-700/config.json
Model weights saved in temp/checkpoint-700/pytorch_model.bin


{'eval_loss': 21.307676315307617, 'eval_f1': 0.5036866544667861, 'eval_runtime': 0.0263, 'eval_samples_per_second': 8513.421, 'eval_steps_per_second': 38.006, 'epoch': 87.5}


100%|██████████| 800/800 [01:05<00:00, 13.39it/s]***** Running Evaluation *****
  Num examples = 224
  Batch size = 384


{'loss': 56.9525, 'learning_rate': 0.0, 'epoch': 100.0}



100%|██████████| 800/800 [01:05<00:00, 13.39it/s]Saving model checkpoint to temp/checkpoint-800
Configuration saved in temp/checkpoint-800/config.json


{'eval_loss': 20.704652786254883, 'eval_f1': 0.5605856980135274, 'eval_runtime': 0.026, 'eval_samples_per_second': 8613.008, 'eval_steps_per_second': 38.451, 'epoch': 100.0}


Model weights saved in temp/checkpoint-800/pytorch_model.bin


Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from temp/checkpoint-800 (score: 20.704652786254883).
100%|██████████| 800/800 [01:06<00:00, 12.09it/s]
***** Running training *****
  Num examples = 1365
  Num Epochs = 100
  Instantaneous batch size per device = 192
  Total train batch size (w. parallel, distributed & accumulation) = 192
  Gradient Accumulation steps = 1
  Total optimization steps = 800


{'train_runtime': 66.1478, 'train_samples_per_second': 2063.559, 'train_steps_per_second': 12.094, 'train_loss': 81.37743713378906, 'epoch': 100.0}


 12%|█▎        | 100/800 [00:07<00:49, 14.02it/s]***** Running Evaluation *****
  Num examples = 224
  Batch size = 384


{'loss': 55.5581, 'learning_rate': 1e-05, 'epoch': 12.5}



 12%|█▎        | 100/800 [00:07<00:49, 14.02it/s]Saving model checkpoint to temp/checkpoint-100
Configuration saved in temp/checkpoint-100/config.json


{'eval_loss': 20.108333587646484, 'eval_accuracy': 0.6071428571428571, 'eval_runtime': 0.0247, 'eval_samples_per_second': 9059.671, 'eval_steps_per_second': 40.445, 'epoch': 12.5}


Model weights saved in temp/checkpoint-100/pytorch_model.bin
 25%|██▌       | 200/800 [00:15<00:47, 12.51it/s]***** Running Evaluation *****
  Num examples = 224
  Batch size = 384


{'loss': 54.4385, 'learning_rate': 2e-05, 'epoch': 25.0}



 25%|██▌       | 200/800 [00:15<00:47, 12.51it/s]Saving model checkpoint to temp/checkpoint-200
Configuration saved in temp/checkpoint-200/config.json


{'eval_loss': 21.28782844543457, 'eval_accuracy': 0.5803571428571429, 'eval_runtime': 0.0264, 'eval_samples_per_second': 8497.021, 'eval_steps_per_second': 37.933, 'epoch': 25.0}


Model weights saved in temp/checkpoint-200/pytorch_model.bin
 38%|███▊      | 300/800 [00:23<00:35, 13.92it/s]***** Running Evaluation *****
  Num examples = 224
  Batch size = 384


{'loss': 53.4062, 'learning_rate': 3e-05, 'epoch': 37.5}



 38%|███▊      | 300/800 [00:23<00:35, 13.92it/s]Saving model checkpoint to temp/checkpoint-300
Configuration saved in temp/checkpoint-300/config.json
Model weights saved in temp/checkpoint-300/pytorch_model.bin


{'eval_loss': 17.73983383178711, 'eval_accuracy': 0.6071428571428571, 'eval_runtime': 0.0245, 'eval_samples_per_second': 9146.992, 'eval_steps_per_second': 40.835, 'epoch': 37.5}


 50%|█████     | 400/800 [00:30<00:29, 13.69it/s]***** Running Evaluation *****
  Num examples = 224
  Batch size = 384


{'loss': 51.5842, 'learning_rate': 4e-05, 'epoch': 50.0}



 50%|█████     | 400/800 [00:30<00:29, 13.69it/s]Saving model checkpoint to temp/checkpoint-400
Configuration saved in temp/checkpoint-400/config.json
Model weights saved in temp/checkpoint-400/pytorch_model.bin


{'eval_loss': 18.752735137939453, 'eval_accuracy': 0.5848214285714286, 'eval_runtime': 0.0247, 'eval_samples_per_second': 9073.933, 'eval_steps_per_second': 40.509, 'epoch': 50.0}


 62%|██████▎   | 500/800 [00:38<00:21, 14.20it/s]***** Running Evaluation *****
  Num examples = 224
  Batch size = 384


{'loss': 49.7508, 'learning_rate': 5e-05, 'epoch': 62.5}



 62%|██████▎   | 500/800 [00:38<00:21, 14.20it/s]Saving model checkpoint to temp/checkpoint-500
Configuration saved in temp/checkpoint-500/config.json


{'eval_loss': 13.39850902557373, 'eval_accuracy': 0.6294642857142857, 'eval_runtime': 0.0242, 'eval_samples_per_second': 9264.336, 'eval_steps_per_second': 41.359, 'epoch': 62.5}


Model weights saved in temp/checkpoint-500/pytorch_model.bin
 75%|███████▌  | 600/800 [00:46<00:15, 13.19it/s]***** Running Evaluation *****
  Num examples = 224
  Batch size = 384


{'loss': 48.8079, 'learning_rate': 3.3333333333333335e-05, 'epoch': 75.0}



 75%|███████▌  | 600/800 [00:46<00:15, 13.19it/s]Saving model checkpoint to temp/checkpoint-600
Configuration saved in temp/checkpoint-600/config.json


{'eval_loss': 12.328020095825195, 'eval_accuracy': 0.6339285714285714, 'eval_runtime': 0.0245, 'eval_samples_per_second': 9150.11, 'eval_steps_per_second': 40.849, 'epoch': 75.0}


Model weights saved in temp/checkpoint-600/pytorch_model.bin
 88%|████████▊ | 700/800 [00:54<00:07, 14.15it/s]***** Running Evaluation *****
  Num examples = 224
  Batch size = 384


{'loss': 48.0512, 'learning_rate': 1.6666666666666667e-05, 'epoch': 87.5}



 88%|████████▊ | 700/800 [00:54<00:07, 14.15it/s]Saving model checkpoint to temp/checkpoint-700
Configuration saved in temp/checkpoint-700/config.json


{'eval_loss': 12.009588241577148, 'eval_accuracy': 0.6339285714285714, 'eval_runtime': 0.0263, 'eval_samples_per_second': 8517.898, 'eval_steps_per_second': 38.026, 'epoch': 87.5}


Model weights saved in temp/checkpoint-700/pytorch_model.bin
100%|██████████| 800/800 [01:02<00:00, 13.57it/s]***** Running Evaluation *****
  Num examples = 224
  Batch size = 384


{'loss': 47.5084, 'learning_rate': 0.0, 'epoch': 100.0}



100%|██████████| 800/800 [01:02<00:00, 13.57it/s]Saving model checkpoint to temp/checkpoint-800
Configuration saved in temp/checkpoint-800/config.json
Model weights saved in temp/checkpoint-800/pytorch_model.bin


{'eval_loss': 12.150206565856934, 'eval_accuracy': 0.6339285714285714, 'eval_runtime': 0.0249, 'eval_samples_per_second': 9008.938, 'eval_steps_per_second': 40.218, 'epoch': 100.0}




Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from temp/checkpoint-700 (score: 12.009588241577148).
100%|██████████| 800/800 [01:02<00:00, 12.76it/s]

{'train_runtime': 62.7148, 'train_samples_per_second': 2176.519, 'train_steps_per_second': 12.756, 'train_loss': 51.13815979003906, 'epoch': 100.0}





TrainOutput(global_step=800, training_loss=51.13815979003906, metrics={'train_runtime': 62.7148, 'train_samples_per_second': 2176.519, 'train_steps_per_second': 12.756, 'train_loss': 51.13815979003906, 'epoch': 100.0})