In [None]:
# Install required packages
!pip install transformers datasets torch seaborn evaluate sklearn sentencepiece accelerate kornia

In [None]:
!git clone https://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@github.com/ftakelait/ArabicNLI.git

In [5]:
# Import necessary modules

# For warning control
import warnings

# For manipulating data
import os
import pandas as pd

# For numerical operations
import numpy as np

# For plotting and visualizing data
import matplotlib.pyplot as plt

# For deep learning and data processing
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
from transformers import AutoModel, BertTokenizerFast, AutoModelForSequenceClassification, AutoTokenizer, Trainer, TrainingArguments
from tqdm import tqdm_notebook
from datasets import load_dataset, load_metric, Dataset
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix, classification_report, precision_recall_fscore_support
from sklearn.model_selection import train_test_split

# For logging and tracking experiment metrics
import wandb

# For additional data processing and model training
import re
import seaborn as sns
import sentencepiece as spm

# For advanced training functionality
from accelerate import Accelerator

from src.loss_function import *  # assuming you have a loss_function file in the src directory

warnings.filterwarnings('ignore')

In [None]:
# Login to wandb
!wandb login fd4d1a0bfccb7888acd4ea98e25eeba73b66a92e

# Initialize wandb run
wandb.init(project="ArabicNLI", entity='ftakelait')

In [6]:
# List of models to be used
list_of_models  =["UBC-NLP/MARBERT", "qarib/bert-base-qarib", "aubmindlab/bert-base-arabertv02-twitter", 'aubmindlab/bert-large-arabertv02']

# Select the first model from the list
model_name=list_of_models[0]

# Extract model_checkpoint from model_name
model_checkpoint = model_name.split("/")[-1]

# Training parameters
batch_size = 16
learning_rate = 0.00002
epochs = 4

In [7]:
# Load metrics
acc_metric = load_metric('accuracy')
f1_metric = load_metric('f1')
precision_metric = load_metric('precision')
recall_metric = load_metric('recall')

  acc_metric = load_metric('accuracy')


In [8]:
def compute_metrics(eval_pred: tuple) -> dict:
    """
    Compute evaluation metrics: accuracy, F1 score, precision, and recall.

    Args:
        eval_pred (tuple): Tuple containing predictions and labels.

    Returns:
        dict: Dictionary containing accuracy, F1 score, precision, and recall.
    """
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)

    accuracy = acc_metric.compute(predictions=predictions, references=labels)
    f1 = f1_metric.compute(predictions=predictions, references=labels, average='macro')
    precision = precision_metric.compute(predictions=predictions, references=labels, average='macro')
    recall = recall_metric.compute(predictions=predictions, references=labels, average='macro')

    return {"accuracy": accuracy['accuracy'], "f1": f1['f1'], "precision": precision['precision'], "recall": recall['recall']}

In [9]:
def upload_dataset(train_file: str, valid_file: str, test_file: str) -> Dataset:
    """
    Load datasets from provided CSV files.

    Args:
        train_file (str): Path to the training data file.
        valid_file (str): Path to the validation data file.
        test_file (str): Path to the testing data file.

    Returns:
        Dataset: HuggingFace Dataset object containing train, validation, and test data.
    """
    dataset = load_dataset("csv", data_files=train_file)
    val_data = pd.read_csv(valid_file)
    ds_val = Dataset.from_pandas(val_data)

    test_data = pd.read_csv(test_file)
    ds_test = Dataset.from_pandas(test_data)

    dataset["validation"] = ds_val
    dataset["test"] = ds_test

    return dataset

In [None]:
# Define the max length of the input sequence for each model
MAX_LEN = 90      # MARBERT ArbTEDS

# MAX_LEN = 96    #MARBERT
# MAX_LEN = 60    #qarib/bert-base-qarib
# MAX_LEN = 52    #aubmindlab/bert-base-arabertv02-twitter / aubmindlab/bert-large-arabertv02

# MAX_LEN = 329   #MARBERT XLNI
# MAX_LEN = 359   #qarib/bert-base-qarib
# MAX_LEN = 329   #aubmindlab/bert-base-arabertv02-twitter / aubmindlab/bert-large-arabertv02 X_NLI

# MAX_LEN = 97    #qarib/bert-base-qarib
# MAX_LEN = 88    #aubmindlab/bert-base-arabertv02-twitter / aubmindlab/bert-large-arabertv02 X_NLI

def preprocess_function(examples: dict) -> dict:
    """
    Preprocess function for tokenization.

    Args:
        examples (dict): Dictionary containing examples to be tokenized.

    Returns:
        dict: Dictionary containing tokenized examples.
    """
   return tokenizer(examples['t'], examples['h'], truncation=True, padding="max_length", max_length=MAX_LEN)

# Initialize the tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)

# Initialize the model
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)

# Load the ArbTEDS dataset
dataset = upload_dataset('dataset/ArbTEDS/train_ArbTEDS.csv','dataset/ArbTEDS/valid_ArbTEDS.csv','dataset/ArbTEDS/test_ArbTEDS.csv')

# Preprocess the ArbTEDS dataset
encoded_dataset = dataset.map(preprocess_function)

In [11]:
# Set training arguments
args = TrainingArguments(
    f"{model_checkpoint}_checkpoints",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    learning_rate=learning_rate,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    num_train_epochs=epochs,
    load_best_model_at_end=True,
)

# Initialize the trainer
trainer = Trainer(
    model,
    args,
    train_dataset=encoded_dataset["train"],
    eval_dataset=encoded_dataset['validation'],
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
)

# Train the model
trainer.train()

Some weights of the model checkpoint at UBC-NLP/MARBERT were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at U

  0%|          | 0/1 [00:00<?, ?it/s]



Map:   0%|          | 0/90 [00:00<?, ? examples/s]

Map:   0%|          | 0/90 [00:00<?, ? examples/s]

In [12]:
# Make predictions on the test dataset
def predict_test_dataset(trainer: Trainer, dataset: Dataset) -> 'np.ndarray':
    """
    Make predictions on the test dataset.

    Args:
        trainer (Trainer): The trainer instance.
        dataset (Dataset): The preprocessed test dataset.

    Returns:
        np.ndarray: Predictions.
    """
    predictions = trainer.predict(dataset["test"])
    return predictions

# Get and print predictions
predictions = predict_test_dataset(trainer, encoded_dataset)
print(predictions)