# Download MARBERT checkpoint

In [None]:
!wget https://huggingface.co/UBC-NLP/MARBERT/resolve/main/MARBERT_pytorch_verison.tar.gz

In [None]:
!tar -xvf MARBERT_pytorch_verison.tar.gz

In [None]:
!wget https://raw.githubusercontent.com/UBC-NLP/marbert/main/examples/UBC_AJGT_final_shuffled_train.tsv
!wget https://raw.githubusercontent.com/UBC-NLP/marbert/main/examples/UBC_AJGT_final_shuffled_test.tsv

In [10]:
!mkdir -p AJGT


In [None]:
!pip install GPUtil pytorch_pretrained_bert transformers pytorch-transformers pyarabic textblob tashaphyne

# Fine-tuning code

In [None]:
# (1)load libraries 
import json, sys, regex
import re
import torch
import GPUtil
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler
from keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split
from pytorch_pretrained_bert import BertTokenizer, BertConfig, BertAdam, BertForSequenceClassification
from tqdm import tqdm, trange
import pandas as pd
import os
import numpy as np
from sklearn.metrics import accuracy_score, f1_score, recall_score, precision_score, classification_report, confusion_matrix
##----------------------------------------------------
from transformers import *
from transformers import XLMRobertaConfig
from transformers import XLMRobertaModel
from transformers import AutoTokenizer, AutoModelWithLMHead
from transformers import XLMRobertaForSequenceClassification, XLMRobertaTokenizer, XLMRobertaModel
from tokenizers import Tokenizer, models, pre_tokenizers, decoders, processors
from transformers import AdamW, get_linear_schedule_with_warmup
from transformers import AutoTokenizer, AutoModel
from textblob import TextBlob
from nltk.corpus import stopwords
from tashaphyne.stemming import ArabicLightStemmer
import nltk
nltk.download('stopwords')
nltk.download('punkt')


In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print ("your device ", device)

In [8]:

def create_label2ind_file(file, label_col):
	labels_json={}
	#load train_dev_test file
	df = pd.read_csv(file, sep="\t")
	df.head(5)
	#get labels and sort it A-Z
	labels = df[label_col].unique()
	labels.sort()
	#convert labels to indexes
	for idx in range(0, len(labels)):
		labels_json[labels[idx]]=idx
	#save labels with indexes to file
	with open(label2idx_file, 'w') as json_file:
		json.dump(labels_json, json_file)


In [84]:

def data_prepare_BERT(file_path, lab2ind, tokenizer, content_col, label_col, MAX_LEN):
	# Use pandas to load dataset
	df = pd.read_csv(file_path, delimiter='\t', header=0)
	df = df[df[content_col].notnull()]
	df = df[df[label_col].notnull()]
	print("Data size ", df.shape)
	# Create sentence and label lists
	sentences = df[content_col].values
	sentences = ["[CLS] " + data_cleaning(sentence) + " [SEP]" for sentence in sentences] # Run processing function
	print ("The first sentence:")
	print (sentences[0])
	# Create sentence and label lists
	labels = df[label_col].values
	#print (labels)
	labels = [lab2ind[i] for i in labels]
	# Import the BERT tokenizer, used to convert our text into tokens that correspond to BERT's vocabulary.
	tokenized_texts = [tokenizer.tokenize(sent) for sent in sentences]
	print ("Tokenize the first sentence:")
	print (tokenized_texts[0])
	#print("Label is ", labels[0])
	# Use the BERT tokenizer to convert the tokens to their index numbers in the BERT vocabulary
	input_ids = [tokenizer.convert_tokens_to_ids(x) for x in tokenized_texts]
	print ("Index numbers of the first sentence:")
	print (input_ids[0])
	# Pad our input seqeunce to the fixed length (i.e., max_len) with index of [PAD] token
	# ~ input_ids = pad_sequences(input_ids, maxlen=MAX_LEN, dtype="long", truncating="post", padding="post")
	pad_ind = tokenizer.convert_tokens_to_ids(['[PAD]'])[0]
	input_ids = pad_sequences(input_ids, maxlen=MAX_LEN+2, dtype="long", truncating="post", padding="post", value=pad_ind)
	print ("Index numbers of the first sentence after padding:\n",input_ids[0])
	# Create attention masks
	attention_masks = []
	# Create a mask of 1s for each token followed by 0s for padding
	for seq in input_ids:
		seq_mask = [float(i > 0) for i in seq]
		attention_masks.append(seq_mask)
	# Convert all of our data into torch tensors, the required datatype for our model
	inputs = torch.tensor(input_ids)
	labels = torch.tensor(labels)
	masks = torch.tensor(attention_masks)
	return inputs, labels, masks


In [83]:
# data cleaning function
stops = set(stopwords.words("arabic"))
stop_word_comp = {"،","آض","آمينَ","آه","آهاً","آي","أ","أب","أجل","أجمع","أخ","أخذ","أصبح","أضحى","أقبل","أقل","أكثر","ألا","أم","أما","أمامك","أمامكَ","أمسى","أمّا","أن","أنا","أنت","أنتم","أنتما","أنتن","أنتِ","أنشأ","أنّى","أو","أوشك","أولئك","أولئكم","أولاء","أولالك","أوّهْ","أي","أيا","أين","أينما","أيّ","أَنَّ","أََيُّ","أُفٍّ","إذ","إذا","إذاً","إذما","إذن","إلى","إليكم","إليكما","إليكنّ","إليكَ","إلَيْكَ","إلّا","إمّا","إن","إنّما","إي","إياك","إياكم","إياكما","إياكن","إيانا","إياه","إياها","إياهم","إياهما","إياهن","إياي","إيهٍ","إِنَّ","ا","ابتدأ","اثر","اجل","احد","اخرى","اخلولق","اذا","اربعة","ارتدّ","استحال","اطار","اعادة","اعلنت","اف","اكثر","اكد","الألاء","الألى","الا","الاخيرة","الان","الاول","الاولى","التى","التي","الثاني","الثانية","الذاتي","الذى","الذي","الذين","السابق","الف","اللائي","اللاتي","اللتان","اللتيا","اللتين","اللذان","اللذين","اللواتي","الماضي","المقبل","الوقت","الى","اليوم","اما","امام","امس","ان","انبرى","انقلب","انه","انها","او","اول","اي","ايار","ايام","ايضا","ب","بات","باسم","بان","بخٍ","برس","بسبب","بسّ","بشكل","بضع","بطآن","بعد","بعض","بك","بكم","بكما","بكن","بل","بلى","بما","بماذا","بمن","بن","بنا","به","بها","بي","بيد","بين","بَسْ","بَلْهَ","بِئْسَ","تانِ","تانِك","تبدّل","تجاه","تحوّل","تلقاء","تلك","تلكم","تلكما","تم","تينك","تَيْنِ","تِه","تِي","ثلاثة","ثم","ثمّ","ثمّة","ثُمَّ","جعل","جلل","جميع","جير","حار","حاشا","حاليا","حاي","حتى","حرى","حسب","حم","حوالى","حول","حيث","حيثما","حين","حيَّ","حَبَّذَا","حَتَّى","حَذارِ","خلا","خلال","دون","دونك","ذا","ذات","ذاك","ذانك","ذانِ","ذلك","ذلكم","ذلكما","ذلكن","ذو","ذوا","ذواتا","ذواتي","ذيت","ذينك","ذَيْنِ","ذِه","ذِي","راح","رجع","رويدك","ريث","رُبَّ","زيارة","سبحان","سرعان","سنة","سنوات","سوف","سوى","سَاءَ","سَاءَمَا","شبه","شخصا","شرع","شَتَّانَ","صار","صباح","صفر","صهٍ","صهْ","ضد","ضمن","طاق","طالما","طفق","طَق","ظلّ","عاد","عام","عاما","عامة","عدا","عدة","عدد","عدم","عسى","عشر","عشرة","علق","على","عليك","عليه","عليها","علًّ","عن","عند","عندما","عوض","عين","عَدَسْ","عَمَّا","غدا","غير","ـ","ف","فان","فلان","فو","فى","في","فيم","فيما","فيه","فيها","قال","قام","قبل","قد","قطّ","قلما","قوة","كأنّما","كأين","كأيّ","كأيّن","كاد","كان","كانت","كذا","كذلك","كرب","كل","كلا","كلاهما","كلتا","كلم","كليكما","كليهما","كلّما","كلَّا","كم","كما","كي","كيت","كيف","كيفما","كَأَنَّ","كِخ","لئن","لا","لات","لاسيما","لدن","لدى","لعمر","لقاء","لك","لكم","لكما","لكن","لكنَّما","لكي","لكيلا","للامم","لم","لما","لمّا","لن","لنا","له","لها","لو","لوكالة","لولا","لوما","لي","لَسْتَ","لَسْتُ","لَسْتُم","لَسْتُمَا","لَسْتُنَّ","لَسْتِ","لَسْنَ","لَعَلَّ","لَكِنَّ","لَيْتَ","لَيْسَ","لَيْسَا","لَيْسَتَا","لَيْسَتْ","لَيْسُوا","لَِسْنَا","ما","ماانفك","مابرح","مادام","ماذا","مازال","مافتئ","مايو","متى","مثل","مذ","مساء","مع","معاذ","مقابل","مكانكم","مكانكما","مكانكنّ","مكانَك","مليار","مليون","مما","ممن","من","منذ","منها","مه","مهما","مَنْ","مِن","نحن","نحو","نعم","نفس","نفسه","نهاية","نَخْ","نِعِمّا","نِعْمَ","ها","هاؤم","هاكَ","هاهنا","هبّ","هذا","هذه","هكذا","هل","هلمَّ","هلّا","هم","هما","هن","هنا","هناك","هنالك","هو","هي","هيا","هيت","هيّا","هَؤلاء","هَاتانِ","هَاتَيْنِ","هَاتِه","هَاتِي","هَجْ","هَذا","هَذانِ","هَذَيْنِ","هَذِه","هَذِي","هَيْهَاتَ","و","و6","وا","واحد","واضاف","واضافت","واكد","وان","واهاً","واوضح","وراءَك","وفي","وقال","وقالت","وقد","وقف","وكان","وكانت","ولا","ولم","ومن","مَن","وهو","وهي","ويكأنّ","وَيْ","وُشْكَانََ","يكون","يمكن","يوم","ّأيّان"}
ArListem = ArabicLightStemmer()


def data_cleaning(sentence):
  # ------------ Step 1: remove non arabic characters ------------
  output = re.sub(r'\s*[A-Za-z]+\b', '' , sentence)
  output = output.rstrip()
  
  # ------------ Step 2: normalize arabic ------------
  normalized = normalize_arabic(output)

  # ------------ Step 3: stop words removal ------------
  removed = remove_stop_words(normalized)

  # ------------ Step 4: tweet specific cleaning ------------
  tweet_cleaned = clean_tweet(removed)

  return tweet_cleaned

def clean_tweet(text):
    text = re.sub('#\d+K\d+', ' ', text)  # years like 2K19
    text = re.sub('http\S+\s*', ' ', text)  # remove URLs
    text = re.sub('RT|cc', ' ', text)  # remove RT and cc
    text = re.sub('@[^\s]+',' ',text)
    text = clean_hashtag(text)
    text = clean_emoji(text)
    return text

def clean_emoji(text):
  emoji_pattern = re.compile("["
                                   u"\U0001F600-\U0001F64F"  # emoticons
                                   u"\U0001F300-\U0001F5FF"  # symbols & pictographs
                                   u"\U0001F680-\U0001F6FF"  # transport & map symbols
                                   u"\U0001F1E0-\U0001F1FF"  # flags (iOS)
                                   u"\U00002702-\U000027B0"
                                   u"\U000024C2-\U0001F251"
                                   "]+", flags=re.UNICODE)
  text = emoji_pattern.sub(r'', text)
  return text

def clean_hashtag(text):
  words = text.split()
  text = list()
  for word in words:
    if is_hashtag(word):
      text.extend(extract_hashtag(word))
    else:
      text.append(word)
    return " ".join(text)

def is_hashtag(word):
    if word.startswith("#"):
        return True
    else:
        return False

def extract_hashtag(text):
    hash_list = ([re.sub(r"(\W+)$", "", i) for i in text.split() if i.startswith("#")])
    word_list = []
    for word in hash_list :
        word_list.extend(split_hashtag_to_words(word))
    return word_list

def split_hashtag_to_words(tag):
    tag = tag.replace('#','')
    tags = tag.split('_')
    if len(tags) > 1 :
        
        return tags
    pattern = re.compile(r"[A-Z][a-z]+|\d+|[A-Z]+(?![a-z])")
    return pattern.findall(tag)




def normalize_arabic(text):
  import pyarabic.araby as araby
  text = text.strip()
  text = re.sub("[إأٱآا]", "ا", text)
  text = re.sub("ى", "ي", text)
  text = re.sub("ؤ", "ء", text)
  text = re.sub("ئ", "ء", text)
  text = re.sub("ة", "ه", text)
  noise = re.compile(""" ّ    | # Tashdid
                             َ    | # Fatha
                             ً    | # Tanwin Fath
                             ُ    | # Damma
                             ٌ    | # Tanwin Damm
                             ِ    | # Kasra
                             ٍ    | # Tanwin Kasr
                             ْ    | # Sukun
                             ـ     # Tatwil/Kashida
                         """, re.VERBOSE)
  text = re.sub(noise, '', text)
  text = re.sub(r'(.)\1+', r"\1\1", text) # Remove longation
  return araby.strip_tashkeel(text)
  
def remove_stop_words(text):
    zen = TextBlob(text)
    words = zen.words
    return " ".join([w for w in words if not w in stops and not w in stop_word_comp and len(w) >= 2])



In [12]:
# Function to calculate the accuracy of our predictions vs labels
# def flat_accuracy(preds, labels):
#	  pred_flat = np.argmax(preds, axis=1).flatten()
#	  labels_flat = labels.flatten()
#	  return np.sum(pred_flat == labels_flat) / len(labels_flat)
def flat_pred(preds, labels):
	pred_flat = np.argmax(preds, axis=1).flatten()
	labels_flat = labels.flatten()
	return pred_flat.tolist(), labels_flat.tolist()

In [13]:

def train(model, iterator, optimizer, scheduler, criterion):
	
	model.train()
	epoch_loss = 0
	for i, batch in enumerate(iterator):
		# Add batch to GPU
		batch = tuple(t.to(device) for t in batch)
		# Unpack the inputs from our dataloader
		input_ids, input_mask, labels = batch
		outputs = model(input_ids, input_mask, labels=labels)
		loss, logits = outputs[:2]
		# delete used variables to free GPU memory
		del batch, input_ids, input_mask, labels
		optimizer.zero_grad()
		if torch.cuda.device_count() == 1:
			loss.backward()
			epoch_loss += loss.cpu().item()
		else:
			loss.sum().backward()
			epoch_loss += loss.sum().cpu().item()
		optimizer.step()
		torch.nn.utils.clip_grad_norm_(model.parameters(), max_grad_norm)  # Gradient clipping is not in AdamW anymore
		# optimizer.step()
		scheduler.step()
	# free GPU memory
	if device == 'cuda':
		torch.cuda.empty_cache()
	return epoch_loss / len(iterator)

In [14]:

def evaluate(model, iterator, criterion):
	model.eval()
	epoch_loss = 0
	all_pred=[]
	all_label = []
	with torch.no_grad():
		for i, batch in enumerate(iterator):
			# Add batch to GPU
			batch = tuple(t.to(device) for t in batch)
			# Unpack the inputs from our dataloader
			input_ids, input_mask, labels = batch
			outputs = model(input_ids, input_mask, labels=labels)
			loss, logits = outputs[:2]
			# delete used variables to free GPU memory
			del batch, input_ids, input_mask
			if torch.cuda.device_count() == 1:
				epoch_loss += loss.cpu().item()
			else:
				epoch_loss += loss.sum().cpu().item()
			# identify the predicted class for each example in the batch
			probabilities, predicted = torch.max(logits.cpu().data, 1)
			# put all the true labels and predictions to two lists
			all_pred.extend(predicted)
			all_label.extend(labels.cpu())
	accuracy = accuracy_score(all_label, all_pred)
	f1score = f1_score(all_label, all_pred, average='macro') 
	recall = recall_score(all_label, all_pred, average='macro')
	precision = precision_score(all_label, all_pred, average='macro')
	report = classification_report(all_label, all_pred)
	return (epoch_loss / len(iterator)), accuracy, f1score, recall, precision



In [40]:

def fine_tuning(config):
	#---------------------------------------
	print ("[INFO] step (1) load train_test config file")
	# config_file = open(config_file, 'r', encoding="utf8")
	# config = json.load(config_file)
	task_name = config["task_name"]
	content_col = config["content_col"]
	label_col = config["label_col"]
	train_file = config["data_dir"]+config["train_file"]
	dev_file = config["data_dir"]+config["dev_file"]
	sortby = config["sortby"]
	max_seq_length= int(config["max_seq_length"])
	batch_size = int(config["batch_size"])
	lr_var = float(config["lr"])
	model_path = config['pretrained_model_path']
	num_epochs = config['epochs'] # Number of training epochs (authors recommend between 2 and 4)
	global label2idx_file
	label2idx_file = config["data_dir"]+config["task_name"]+"_labels-dict.json"
	#-------------------------------------------------------
	print ("[INFO] step (2) convert labels2index")
	create_label2ind_file(train_file, label_col)
	print (label2idx_file)
	#---------------------------------------------------------
	print ("[INFO] step (3) check checkpoit directory and report file")
	ckpt_dir = config["data_dir"]+task_name+"_bert_ckpt/"
	report = ckpt_dir+task_name+"_report.tsv"
	sorted_report = ckpt_dir+task_name+"_report_sorted.tsv"
	if not os.path.exists(ckpt_dir):
		os.mkdir(ckpt_dir)
	#-------------------------------------------------------
	print ("[INFO] step (4) load label to number dictionary")
	lab2ind = json.load(open(label2idx_file))
	print ("[INFO] train_file", train_file)
	print ("[INFO] dev_file", dev_file)
	print ("[INFO] num_epochs", num_epochs)
	print ("[INFO] model_path", model_path)
	print ("max_seq_length", max_seq_length, "batch_size", batch_size)
	#-------------------------------------------------------
	print ("[INFO] step (5) Use defined funtion to extract tokanize data")
	# tokenizer from pre-trained BERT model
	print ("loading BERT setting")
	tokenizer = BertTokenizer.from_pretrained(model_path)
	train_inputs, train_labels, train_masks = data_prepare_BERT(train_file, lab2ind, tokenizer,content_col, label_col, max_seq_length)
	validation_inputs, validation_labels, validation_masks = data_prepare_BERT(dev_file, lab2ind, tokenizer, content_col, label_col,max_seq_length)
	# Load BertForSequenceClassification, the pretrained BERT model with a single linear classification layer on top.
	model = BertForSequenceClassification.from_pretrained(model_path, num_labels=len(lab2ind))
	#--------------------------------------
	print ("[INFO] step (6) Create an iterator of data with torch DataLoader.")
#		  This helps save on memory during training because, unlike a for loop,\
#		  with an iterator the entire dataset does not need to be loaded into memory")
	train_data = TensorDataset(train_inputs, train_masks, train_labels)
	train_dataloader = DataLoader(train_data, batch_size=batch_size)
	#---------------------------
	validation_data = TensorDataset(validation_inputs, validation_masks, validation_labels)
	validation_dataloader = DataLoader(validation_data, batch_size=batch_size)
	#------------------------------------------
	print ("[INFO] step (7) run with parallel GPUs")
	if torch.cuda.is_available():
		if torch.cuda.device_count() == 1:
			print("Run", "with one GPU")
			model = model.to(device)
		else:
			n_gpu = torch.cuda.device_count()
			print("Run", "with", n_gpu, "GPUs with max 4 GPUs")
			device_ids = GPUtil.getAvailable(limit = 4)
			torch.backends.cudnn.benchmark = True
			model = model.to(device)
			model = nn.DataParallel(model, device_ids=device_ids)
	else:
		print("Run", "with CPU")
		model = model
	#---------------------------------------------------
	print ("[INFO] step (8) set Parameters, schedules, and loss function")
	global max_grad_norm
	max_grad_norm = 1.0
	warmup_proportion = 0.1
	num_training_steps	= len(train_dataloader) * num_epochs
	num_warmup_steps = num_training_steps * warmup_proportion
	### In Transformers, optimizer and schedules are instantiated like this:
	# Note: AdamW is a class from the huggingface library
	# the 'W' stands for 'Weight Decay"
	optimizer = AdamW(model.parameters(), lr=lr_var, correct_bias=False)
	# schedules
	scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=num_warmup_steps, num_training_steps=num_training_steps)  # PyTorch scheduler
	# We use nn.CrossEntropyLoss() as our loss function. 
	criterion = nn.CrossEntropyLoss()
	#---------------------------------------------------
	print ("[INFO] step (9) start fine_tuning")
	for epoch in trange(num_epochs, desc="Epoch"):
		train_loss = train(model, train_dataloader, optimizer, scheduler, criterion)	  
		val_loss, val_acc, val_f1, val_recall, val_precision = evaluate(model, validation_dataloader, criterion)
# 		print (train_loss, val_acc)
		# Create checkpoint at end of each epoch
		if not os.path.exists(ckpt_dir + 'model_' + str(int(epoch + 1)) + '/'): os.mkdir(ckpt_dir + 'model_' + str(int(epoch + 1)) + '/')
		model.save_pretrained(ckpt_dir+ 'model_' + str(int(epoch + 1)) + '/')
		epoch_eval_results = {"epoch_num":int(epoch + 1),"train_loss":train_loss,
					  "val_acc":val_acc, "val_recall":val_recall, "val_precision":val_precision, "val_f1":val_f1,"lr":lr_var }
		with open(report,"a") as fOut:
			fOut.write(json.dumps(epoch_eval_results)+"\n")
			fOut.flush()
		#------------------------------------
		report_df = pd.read_json(report, orient='records', lines=True)
		report_df.sort_values(by=[sortby],ascending=False, inplace=True)
		report_df.to_csv(sorted_report,sep="\t",index=False)
	return report_df

# Run fine-tuning for 5 epochs

In [39]:

config={"task_name": "AJGT_MARBERT", #output directory name
             "data_dir": "./AJGT/", #data directory
             "train_file": "DA_train_labeled.tsv", #train file path
             "dev_file": "DA_dev_labeled.tsv", #dev file path or test file path
             "pretrained_model_path": 'MARBERT_pytorch_verison', #MARBERT checkpoint path
             "epochs": 1, #number of epochs
             "content_col": "#2_tweet", #text column
             "label_col": "#3_country_label", #label column
             "lr": 2e-06, #learning rate
              "max_seq_length": 128, #max sequance length
              "batch_size": 16, #batch shize
              "sortby":"val_acc"} #sort results based on val_acc or val_f1


In [42]:
report_df = fine_tuning(config)

Didn't find file MARBERT_pytorch_verison/added_tokens.json. We won't load it.
Didn't find file MARBERT_pytorch_verison/special_tokens_map.json. We won't load it.
Didn't find file MARBERT_pytorch_verison/tokenizer_config.json. We won't load it.
loading file MARBERT_pytorch_verison/vocab.txt
loading file None
loading file None
loading file None
loading configuration file MARBERT_pytorch_verison/config.json


[INFO] step (1) load train_test config file
[INFO] step (2) convert labels2index
./AJGT/AJGT_MARBERT_labels-dict.json
[INFO] step (3) check checkpoit directory and report file
[INFO] step (4) load label to number dictionary
[INFO] train_file ./AJGT/DA_train_labeled.tsv
[INFO] dev_file ./AJGT/DA_dev_labeled.tsv
[INFO] num_epochs 1
[INFO] model_path MARBERT_pytorch_verison
max_seq_length 128 batch_size 16
[INFO] step (5) Use defined funtion to extract tokanize data
loading BERT setting
Data size  (21000, 4)
The first sentence:
[CLS] حاجة حلوة اكيد [SEP]
Tokenize the first sentence:
['[CLS]', 'حاجة', 'حلوة', 'اكيد', '[SEP]']
Index numbers of the first sentence:
[2, 2827, 4650, 4151, 3]
Index numbers of the first sentence after padding:
 [   2 2827 4650 4151    3    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0   

loading configuration file MARBERT_pytorch_verison/config.json
Model config BertConfig {
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "directionality": "bidi",
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2",
    "3": "LABEL_3",
    "4": "LABEL_4",
    "5": "LABEL_5",
    "6": "LABEL_6",
    "7": "LABEL_7",
    "8": "LABEL_8",
    "9": "LABEL_9",
    "10": "LABEL_10",
    "11": "LABEL_11",
    "12": "LABEL_12",
    "13": "LABEL_13",
    "14": "LABEL_14",
    "15": "LABEL_15",
    "16": "LABEL_16",
    "17": "LABEL_17",
    "18": "LABEL_18",
    "19": "LABEL_19",
    "20": "LABEL_20"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_10": 10,
    "LABEL_11": 11,
    "LABEL_12": 12,
    "LABEL_13": 13,
    "LABEL_14": 14,
    "LABEL_15": 15,
    "LABEL_16": 16,
    "LABEL_17": 17,
    "L

[INFO] step (6) Create an iterator of data with torch DataLoader.
[INFO] step (7) run with parallel GPUs
Run with one GPU




[INFO] step (8) set Parameters, schedules, and loss function
[INFO] step (9) start fine_tuning


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
Configuration saved in ./AJGT/AJGT_MARBERT_bert_ckpt/model_1/config.json
Model weights saved in ./AJGT/AJGT_MARBERT_bert_ckpt/model_1/pytorch_model.bin
Epoch: 100%|██████████| 1/1 [09:21<00:00, 561.09s/it]


In [43]:
report_df.head(5)

Unnamed: 0,epoch_num,train_loss,val_acc,val_recall,val_precision,val_f1,lr
1,2,1.747341,0.4958,0.240542,0.209736,0.210788,2e-06
2,1,2.082762,0.4906,0.22454,0.204548,0.198038,2e-06
0,1,2.115478,0.4886,0.220809,0.208834,0.196122,2e-06
