In [None]:
import json
import sys
import numpy as np
import evaluate
import torch
from datasets import load_dataset, Dataset, DatasetDict
from transformers import AutoTokenizer
from transformers import DataCollatorForTokenClassification
from transformers import TrainingArguments, Trainer
from transformers import BitsAndBytesConfig
from peft import get_peft_model, LoraConfig, TaskType
import accelerate

from modeling_llama import LlamaForTokenClassification

import seqeval
import evaluate
metric = evaluate.load("seqeval")


In [None]:
def load_ontonotesv5():
    ret = {}
    for split_name in ['train_json', 'dev_json', 'test_json']:
        data = []
        with open(f'/home/vikrant/Desktop/NER/DNRTI/{split_name}.jsonl', 'r') as reader:
            for line in reader:
                data.append(json.loads(line))
        ret[split_name] = Dataset.from_list(data)
    return DatasetDict(ret)


if len(sys.argv) != 3:
    print('usage python %.py task model_size')
    sys.exit()

task, model_size = sys.argv[1], sys.argv[2].lower()
print(f'handling task {task}')

handling task -f


In [None]:
epochs = 4
batch_size = 8
learning_rate = 1e-4
max_length = 64


model_id = 'NousResearch/Llama-2-7b-hf'
lora_r = 12


In [None]:
ds = load_ontonotesv5()
print(ds)
#label2id = {'O': 0, 'B-NORP': 1, 'B-PERSON': 2, 'B-WORK_OF_ART': 3, 'B-QUANTITY': 4, 'B-EVENT': 5, 'B-DATE': 6, 'B-TIME': 7, 'B-PERCENT': 8, 'B-LANGUAGE': 9, 'B-ORG': 10, 'B-CARDINAL': 11, 'B-LAW': 12, 'B-GPE': 13, 'B-PRODUCT': 14, 'B-LOC': 15, 'B-MONEY': 16, 'B-ORDINAL': 17, 'B-FAC': 18}

DatasetDict({
    train_json: Dataset({
        features: ['tokens', 'ner_tags'],
        num_rows: 5260
    })
    dev_json: Dataset({
        features: ['tokens', 'ner_tags'],
        num_rows: 661
    })
    test_json: Dataset({
        features: ['tokens', 'ner_tags'],
        num_rows: 663
    })
})


In [None]:
label2id = {
    'I-Org': 0,
    'I-SecTeam': 1,
    'O': 2,
    'B-Exp': 3,
    'I-Purp': 4,
    'B-Purp': 5,
    'I-Features': 6,
    'I-Time': 7,
    'B-SecTeam': 8,
    'B-SamFile': 9,
    'B-Area': 10,
    'I-Area': 11,
    'B-HackOrg': 12,
    'B-Way': 13,
    'B-OffAct': 14,
    'B-Org': 15,
    'I-Exp': 16,
    'I-OffAct': 17,
    'B-Features': 18,
    'B-Time': 19,
    'I-SamFile': 20,
    'I-Way': 21,
    'I-HackOrg': 22
}

In [None]:
id2label = {v: k for k, v in label2id.items()}
label_list = list(label2id.keys()) # ds["train"].features[f"ner_tags"].feature.names

In [None]:
label2id

{'I-Org': 0,
 'I-SecTeam': 1,
 'O': 2,
 'B-Exp': 3,
 'I-Purp': 4,
 'B-Purp': 5,
 'I-Features': 6,
 'I-Time': 7,
 'B-SecTeam': 8,
 'B-SamFile': 9,
 'B-Area': 10,
 'I-Area': 11,
 'B-HackOrg': 12,
 'B-Way': 13,
 'B-OffAct': 14,
 'B-Org': 15,
 'I-Exp': 16,
 'I-OffAct': 17,
 'B-Features': 18,
 'B-Time': 19,
 'I-SamFile': 20,
 'I-Way': 21,
 'I-HackOrg': 22}

In [None]:
compute_dtype = getattr(torch, "float16")

quant_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=compute_dtype,
    bnb_4bit_use_double_quant=False,
)


# quantization_config = BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_compute_dtype=torch.bfloat16)

model = LlamaForTokenClassification.from_pretrained(
    model_id,quantization_config=quant_config, num_labels=len(label2id), id2label=id2label, label2id=label2id,
)

peft_config = LoraConfig(task_type=TaskType.TOKEN_CLS, inference_mode=False, r=lora_r, lora_alpha=32, lora_dropout=0.1)

model = get_peft_model(model, peft_config)
model.print_trainable_parameters()





`low_cpu_mem_usage` was None, now set to True since model is quantized.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Some weights of LlamaForTokenClassification were not initialized from the model checkpoint at NousResearch/Llama-2-7b-hf and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 6,385,687 || all params: 6,613,823,534 || trainable%: 0.09655061050801843


In [None]:
def tokenize_and_align_labels(examples):
    tokenized_inputs = tokenizer(examples["tokens"], is_split_into_words=True, padding='longest', max_length=max_length, truncation=True)

    labels = []
    for i, label in enumerate(examples[f"ner_tags"]):
        word_ids = tokenized_inputs.word_ids(batch_index=i)  # Map tokens to their respective word.
        previous_word_idx = None
        label_ids = []
        for word_idx in word_ids:  # Set the special tokens to -100.
            if word_idx is None:
                label_ids.append(-100)
            elif word_idx != previous_word_idx:  # Only label the first token of a given word.
                label_ids.append(label[word_idx])
            else:
                label_ids.append(-100)
            previous_word_idx = word_idx
        labels.append(label_ids)

    tokenized_inputs["labels"] = labels
    return tokenized_inputs

In [None]:
tokenizer = AutoTokenizer.from_pretrained(model_id)
tokenized_ds = ds.map(tokenize_and_align_labels, batched=True)
data_collator = DataCollatorForTokenClassification(tokenizer=tokenizer)

Map:   0%|          | 0/5260 [00:00<?, ? examples/s]

Map:   0%|          | 0/661 [00:00<?, ? examples/s]

Map:   0%|          | 0/663 [00:00<?, ? examples/s]

In [None]:
tokenized_ds

DatasetDict({
    train_json: Dataset({
        features: ['tokens', 'ner_tags', 'input_ids', 'attention_mask', 'labels'],
        num_rows: 5260
    })
    dev_json: Dataset({
        features: ['tokens', 'ner_tags', 'input_ids', 'attention_mask', 'labels'],
        num_rows: 661
    })
    test_json: Dataset({
        features: ['tokens', 'ner_tags', 'input_ids', 'attention_mask', 'labels'],
        num_rows: 663
    })
})

In [None]:
def compute_metrics(p):
    predictions, labels = p
    predictions = np.argmax(predictions, axis=2)

    true_predictions = [
        [label_list[p] for (p, l) in zip(prediction, label) if l != -100]
        for prediction, label in zip(predictions, labels)
    ]
    true_labels = [
        [label_list[l] for (p, l) in zip(prediction, label) if l != -100]
        for prediction, label in zip(predictions, labels)
    ]

    all_metrics = metric.compute(predictions=true_predictions, references=true_labels, zero_division=0,)

    return {
        "precision": all_metrics["overall_precision"],
        "recall": all_metrics["overall_recall"],
        "f1": all_metrics["overall_f1"],
        "accuracy": all_metrics["overall_accuracy"],
    }

In [None]:
training_args = TrainingArguments(
    output_dir="my_awesome_ds_model",
    learning_rate=learning_rate,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    num_train_epochs=epochs,
    weight_decay=0.01,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
    push_to_hub=False,
)

In [None]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_ds["train_json"],
    eval_dataset=tokenized_ds["test_json"],
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
)

In [None]:
trainer.train()

Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,2.8847,,0.0,0.0,0.0,0.007849
2,0.0,,0.0,0.0,0.0,0.007849


