In [None]:
!pip install evaluate

In [None]:

test_dataset_names = ["ISEAR", "DR", "Dreaddit", "SDCNL", "DepSeverity"]
data_path = ""
if(data_path == ""):
  raise Exception("Path for dataset not found")

token_path = "Qwen/Qwen2-0.5B"
model_path = "Qwen/Qwen2-0.5B"

In [None]:
def _get_text_label_from_path(path):
    # Assuming that the dataset file is csv and contains `text` & `label` columns
    texts = []
    labels = []
    labels_set = set()
    with open(path, "r", encoding = "utf-8") as test_file:
        reader = csv.reader(test_file)
        text_id = 0
        label_id = 1
        for id, _row in enumerate(reader):
            if(id == 0 or len(_row) < 2):
                continue
            else:
                texts.append(_row[text_id])
                labels.append(label2id[_row[label_id]])
                labels_set.add(_row[label_id])
        test_file.close()
    assert (len(texts) == len(labels)), "Number of text {} is different from number of labels {}".format(len(texts), len(labels))
    print("Finishing process dataset")
    print("There are {} texts and label".format(len(texts)))
    print("This dataset contains the following set of labels {}", {x: label2id[x] for x in labels_set})
    return texts, labels


In [None]:
class CustomDataset(Dataset):
    def __init__(self, labels, encoding=None):
        self.encodings = encoding
        self.labels = labels

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        # load image as ndarray type (Height * Width * Channels)
        # be carefull for converting dtype to np.uint8 [Unsigned integer (0 to 255)]
        # in this example, i don't use ToTensor() method of torchvision.transforms
        # so you can convert numpy ndarray shape to tensor in PyTorch (H, W, C) --> (C, H, W)
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

In [None]:
def compute_metrics(p):
    accuracy_metric = evaluate.load("f1", trust_remote_code = True)
    precision_metric = evaluate.load("precision", trust_remote_code = True)
    recall_metric = evaluate.load("recall", trust_remote_code = True)

    preds = p.predictions[0] if isinstance(p.predictions, tuple) else p.predictions
    preds = torch.argmax(torch.from_numpy(preds), dim=-1).detach().cpu().numpy()
    labels = p.label_ids
    print("preds", preds)
    print("labels", labels)
    accuracy = accuracy_metric.compute(predictions=preds, references=labels, average = 'weighted')
    precision = precision_metric.compute(predictions=preds, references=labels, average = 'weighted')
    recall = recall_metric.compute(predictions=preds, references=labels, average = 'weighted')
    return {**accuracy, **precision, **recall}

In [None]:
import evaluate


token = "" #Huggingface token to access the model
# Load model directly
tokenizer = AutoTokenizer.from_pretrained(token_path, token = token, model_max_length = model_max_length, truncation_side = "left")

model_config = AutoConfig.from_pretrained(f"{model_path}")
new_model = AutoModelForSequenceClassification.from_pretrained(f"{model_path}", num_labels = model_config.label2id) # Check the config.json for the proper name

new_model.eval()

for test_dataset_name in test_dataset_names:
    _test_texts, _test_labels = _get_text_label_from_path(f"{data_path}/{test_dataset_name}.csv")
    _test_encodings = tokenizer(_test_texts , truncation = True)
    _test_dataset = CustomDataset(_test_labels, _test_encodings)
    print("input_example", _test_dataset[0])
    _training_args = TrainingArguments(
        output_dir='./results',
        per_device_eval_batch_size=1,
        logging_dir='./logs',
        eval_accumulation_steps=500
    )

    _evaluater = Trainer(
        model=new_model,
        args=_training_args,
        compute_metrics=compute_metrics,
    )

    result = _evaluater.evaluate(_test_dataset)
    print(f"for dataset {test_dataset_name} the evaluate result is {result}")