<a href="https://colab.research.google.com/github/avanhooser/biobert-ner-sagemaker/blob/main/run0.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [47]:
# DMRsLOg6FfRU

%run /content/biobert-ner-sagemaker/training/train_ner.py \
  --model dmis-lab/biobert-v1.1 \
  --train /content/biobert-ner-sagemaker/training/data/sample.conll \
  --valid /content/biobert-ner-sagemaker/training/data/sample.conll \
  --epochs 1 \
  --use_lora

Map:   0%|          | 0/2 [00:00<?, ? examples/s]

Map:   0%|          | 0/2 [00:00<?, ? examples/s]

Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using `include_inputs_for_metrics` is deprecated and will be removed in version 5 of 🤗 Transformers. Please use `include_for_metrics` list argument instead.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,1.36438,0.222222,0.5,0.307692,0.222222


  _warn_prf(average, modifier, msg_start, len(result))


Saved model to training/output


In [36]:
#cd82bd8d

file_content = """
# BioBERT NER fine-tuning (CoNLL-style) with optional PEFT/LoRA adapters.
# Usage (CPU okay for tiny samples; use GPU on Colab/Studio Lab for real runs):
#   python training/train_ner.py --model dmis-lab/biobert-v1.1 --train data/train.conll --valid data/dev.conll --use_lora
import os, argparse, numpy as np
from typing import List, Dict, Tuple
from datasets import Dataset
from transformers import (AutoTokenizer, AutoModelForTokenClassification, DataCollatorForTokenClassification,
                          TrainingArguments, Trainer)
import evaluate

try:
    from peft import LoraConfig, get_peft_model
    PEFT_AVAILABLE = True
except Exception:
    PEFT_AVAILABLE = False

def read_conll(path: str) -> Tuple[List[List[str]], List[List[str]]]:
    tokens, tags, cur_t, cur_y = [], [], [], []
    with open(path, "r", encoding="utf-8") as f:
        for line in f:
            line = line.strip()
            if not line:
                if cur_t:
                    tokens.append(cur_t); tags.append(cur_y)
                    cur_t, cur_y = [], []
                continue
            # token [tab or space] tag
            parts = line.split()
            if len(parts) >= 2:
                cur_t.append(parts[0])
                cur_y.append(parts[-1])
    if cur_t:
        tokens.append(cur_t); tags.append(cur_y)
    return tokens, tags

def build_dataset(tokens: List[List[str]], tags: List[List[str]]):
    return Dataset.from_dict({"tokens": tokens, "ner_tags": tags})

def align_labels_with_tokens(labels, word_ids):
    new_labels = []
    prev_word_id = None
    for word_id in word_ids:
        if word_id is None:
            new_labels.append(-100)
        elif word_id != prev_word_id:
            new_labels.append(label2id[labels[word_id]])
        else:
            # Inside subword -> assign -100 or I-*; we keep -100 for simplicity
            new_labels.append(-100)
        prev_word_id = word_id
    return new_labels

def compute_metrics(p):
    preds, refs = p
    preds = np.argmax(preds, axis=-1)
    true_preds, true_labels = [], []
    for pred, label, attention in zip(preds, p.label_ids, p.inputs["attention_mask"]):
        # align
        pred_tags, label_tags = [], []
        for p_i, l_i in zip(pred, label):
            if l_i != -100:
                pred_tags.append(id2label[p_i])
                label_tags.append(id2label[l_i])
        true_preds.append(pred_tags); true_labels.append(label_tags)
    metric = evaluate.load("seqeval")
    results = metric.compute(predictions=true_preds, references=true_labels)
    return {"precision": results["overall_precision"],
            "recall": results["overall_recall"],
            "f1": results["overall_f1"],
            "accuracy": results["overall_accuracy"]}

if __name__ == "__main__":
    ap = argparse.ArgumentParser()
    ap.add_argument("--model", default="dmis-lab/biobert-v1.1")
    ap.add_argument("--train", default="training/data/sample.conll")
    ap.add_argument("--valid", default="training/data/sample.conll")
    ap.add_argument("--out", default="training/output")
    ap.add_argument("--epochs", type=int, default=1)
    ap.add_argument("--batch", type=int, default=8)
    ap.add_argument("--use_lora", action="store_true")
    args = ap.parse_args()

    train_tokens, train_tags = read_conll(args.train)
    valid_tokens, valid_tags = read_conll(args.valid)

    # derive labels from training set
    unique_labels = sorted({t for seq in train_tags for t in seq})
    global label2id, id2label
    label2id = {l:i for i,l in enumerate(unique_labels)}
    id2label = {i:l for l,i in label2id.items()}

    tokenizer = AutoTokenizer.from_pretrained(args.model, use_fast=True)
    def tok(examples):
        tokenized = tokenizer(examples["tokens"], is_split_into_words=True, truncation=True, padding=False)
        labels = []
        for i, word_ids in enumerate([tokenized.word_ids(k) for k in range(len(examples["tokens"]))]):
            labels.append(align_labels_with_tokens(examples["ner_tags"][i], word_ids))
        tokenized["labels"] = labels
        return tokenized

    ds_train = build_dataset(train_tokens, train_tags).map(tok, batched=True)
    ds_valid = build_dataset(valid_tokens, valid_tags).map(tok, batched=True)

    model = AutoModelForTokenClassification.from_pretrained(
        args.model, num_labels=len(unique_labels), id2label=id2label, label2id=label2id
    )

    if args.use_lora:
        if not PEFT_AVAILABLE:
            raise RuntimeError("peft not installed but --use_lora was provided.")
        lora = LoraConfig(r=8, lora_alpha=16, target_modules=["query","value","key","dense"], lora_dropout=0.05, bias="none")
        model = get_peft_model(model, lora)

    data_collator = DataCollatorForTokenClassification(tokenizer=tokenizer)
    os.makedirs(args.out, exist_ok=True)
    training_args = TrainingArguments(
        output_dir=args.out,
        learning_rate=2e-5,
        per_device_train_batch_size=args.batch,
        per_device_eval_batch_size=args.batch,
        num_train_epochs=args.epochs,
        weight_decay=0.01,
        eval_strategy="epoch", # Changed from evaluation_strategy
        save_strategy="epoch",
        logging_steps=50,
        report_to=[],
    )
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=ds_train,
        eval_dataset=ds_valid,
        tokenizer=tokenizer,
        data_collator=data_collator,
        compute_metrics=compute_metrics,
    )
    trainer.train()
    trainer.save_model(args.out)
    tokenizer.save_pretrained(args.out)
    print("Saved model to", args.out)
"""

with open('/content/biobert-ner-sagemaker/training/train_ner.py', 'w') as f:
    f.write(file_content)

print("Modified train_ner.py saved.")

Modified train_ner.py saved.


In [37]:
#d21b8402

# Read the file content to confirm the changes are present
with open('/content/biobert-ner-sagemaker/training/train_ner.py', 'r') as f:
    current_file_content = f.read()

# Print confirmation (you can add a check here to see if the specific fix is in the content)
print("Content of train_ner.py before running:")
print(current_file_content)
print("-" * 30)

# Now run the script using %run
%run /content/biobert-ner-sagemaker/training/train_ner.py \
  --model dmis-lab/biobert-v1.1 \
  --train /content/biobert-ner-sagemaker/training/data/sample.conll \
  --valid /content/biobert-ner-sagemaker/training/data/sample.conll \
  --epochs 1 \
  --use_lora

Content of train_ner.py before running:

# BioBERT NER fine-tuning (CoNLL-style) with optional PEFT/LoRA adapters.
# Usage (CPU okay for tiny samples; use GPU on Colab/Studio Lab for real runs):
#   python training/train_ner.py --model dmis-lab/biobert-v1.1 --train data/train.conll --valid data/dev.conll --use_lora
import os, argparse, numpy as np
from typing import List, Dict, Tuple
from datasets import Dataset
from transformers import (AutoTokenizer, AutoModelForTokenClassification, DataCollatorForTokenClassification,
                          TrainingArguments, Trainer)
import evaluate

try:
    from peft import LoraConfig, get_peft_model
    PEFT_AVAILABLE = True
except Exception:
    PEFT_AVAILABLE = False

def read_conll(path: str) -> Tuple[List[List[str]], List[List[str]]]:
    tokens, tags, cur_t, cur_y = [], [], [], []
    with open(path, "r", encoding="utf-8") as f:
        for line in f:
            line = line.strip()
            if not line:
                if cur_t:
     

Map:   0%|          | 0/2 [00:00<?, ? examples/s]

Map:   0%|          | 0/2 [00:00<?, ? examples/s]

Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss


TypeError: 'NoneType' object is not subscriptable

In [40]:
# BioBERT NER fine-tuning (CoNLL-style) with optional PEFT/LoRA adapters.
# Usage (CPU okay for tiny samples; use GPU on Colab/Studio Lab for real runs):
#   python training/train_ner.py --model dmis-lab/biobert-v1.1 --train data/train.conll --valid data/dev.conll --use_lora
import os, argparse, numpy as np
from typing import List, Dict, Tuple
from datasets import Dataset
from transformers import (AutoTokenizer, AutoModelForTokenClassification, DataCollatorForTokenClassification,
                          TrainingArguments, Trainer)
import evaluate

try:
    from peft import LoraConfig, get_peft_model
    PEFT_AVAILABLE = True
except Exception:
    PEFT_AVAILABLE = False

def read_conll(path: str) -> Tuple[List[List[str]], List[List[str]]]:
    tokens, tags, cur_t, cur_y = [], [], [], []
    with open(path, "r", encoding="utf-8") as f:
        for line in f:
            line = line.strip()
            if not line:
                if cur_t:
                    tokens.append(cur_t); tags.append(cur_y)
                    cur_t, cur_y = [], []
                continue
            # token [tab or space] tag
            parts = line.split()
            if len(parts) >= 2:
                cur_t.append(parts[0])
                cur_y.append(parts[-1])
    if cur_t:
        tokens.append(cur_t); tags.append(cur_y)
    return tokens, tags

def build_dataset(tokens: List[List[str]], tags: List[List[str]]):
    return Dataset.from_dict({"tokens": tokens, "ner_tags": tags})

def align_labels_with_tokens(labels, word_ids):
    new_labels = []
    prev_word_id = None
    for word_id in word_ids:
        if word_id is None:
            new_labels.append(-100)
        elif word_id != prev_word_id:
            new_labels.append(label2id[labels[word_id]])
        else:
            # Inside subword -> assign -100 or I-*; we keep -100 for simplicity
            new_labels.append(-100)
        prev_word_id = word_id
    return new_labels

def compute_metrics(p):
    # Correctly unpack predictions and labels from EvalPrediction object
    preds, refs = p.predictions, p.label_ids
    preds = np.argmax(preds, axis=-1)
    true_preds, true_labels = [], []
    # Modify to iterate only over preds and labels, relying on -100 for alignment
    for pred, label in zip(preds, p.label_ids):
        # align
        pred_tags, label_tags = [], []
        for p_i, l_i in zip(pred, label):
            if l_i != -100: # Use -100 label to filter tokens
                pred_tags.append(id2label[p_i])
                label_tags.append(id2label[l_i])
        true_preds.append(pred_tags); true_labels.append(label_tags)

    metric = evaluate.load("seqeval")
    results = metric.compute(predictions=true_preds, references=true_labels)
    return {"precision": results["overall_precision"],
            "recall": results["overall_recall"],
            "f1": results["overall_f1"],
            "accuracy": results["overall_accuracy"]}


if __name__ == "__main__":
    ap = argparse.ArgumentParser()
    ap.add_argument("--model", default="dmis-lab/biobert-v1.1")
    ap.add_argument("--train", default="training/data/sample.conll")
    ap.add_argument("--valid", default="training/data/sample.conll")
    ap.add_argument("--out", default="training/output")
    ap.add_argument("--epochs", type=int, default=1)
    ap.add_argument("--batch", type=int, default=8)
    ap.add_argument("--use_lora", action="store_true")
    args = ap.parse_args()

    train_tokens, train_tags = read_conll(args.train)
    valid_tokens, valid_tags = read_conll(args.valid)

    # derive labels from training set
    unique_labels = sorted({t for seq in train_tags for t in seq})
    global label2id, id2label
    label2id = {l:i for i,l in enumerate(unique_labels)}
    id2label = {i:l for l,i in label2id.items()}

    tokenizer = AutoTokenizer.from_pretrained(args.model, use_fast=True)
    def tok(examples):
        tokenized = tokenizer(examples["tokens"], is_split_into_words=True, truncation=True, padding=False)
        labels = []
        for i, word_ids in enumerate([tokenized.word_ids(k) for k in range(len(examples["tokens"]))]):
            labels.append(align_labels_with_tokens(examples["ner_tags"][i], word_ids))
        tokenized["labels"] = labels
        return tokenized

    ds_train = build_dataset(train_tokens, train_tags).map(tok, batched=True)
    ds_valid = build_dataset(valid_tokens, valid_tags).map(tok, batched=True)

    model = AutoModelForTokenClassification.from_pretrained(
        args.model, num_labels=len(unique_labels), id2label=id2label, label2id=label2id
    )

    if args.use_lora:
        if not PEFT_AVAILABLE:
            raise RuntimeError("peft not installed but --use_lora was provided.")
        lora = LoraConfig(r=8, lora_alpha=16, target_modules=["query","value","key","dense"], lora_dropout=0.05, bias="none")
        model = get_peft_model(model, lora)

    data_collator = DataCollatorForTokenClassification(tokenizer=tokenizer)
    os.makedirs(args.out, exist_ok=True)
    training_args = TrainingArguments(
        output_dir=args.out,
        learning_rate=2e-5,
        per_device_train_batch_size=args.batch,
        per_device_eval_batch_size=args.batch,
        num_train_epochs=args.epochs,
        weight_decay=0.01,
        eval_strategy="epoch", # Changed from evaluation_strategy
        save_strategy="epoch",
        logging_steps=50,
        report_to=[],
        include_inputs_for_metrics=True, # Added to include attention_mask in compute_metrics
    )
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=ds_train,
        eval_dataset=ds_valid,
        tokenizer=tokenizer,
        data_collator=data_collator,
        compute_metrics=compute_metrics,
    )
    trainer.train()
    trainer.save_model(args.out)
    tokenizer.save_pretrained(args.out)
    print("Saved model to", args.out)

usage: colab_kernel_launcher.py [-h] [--model MODEL] [--train TRAIN]
                                [--valid VALID] [--out OUT] [--epochs EPOCHS]
                                [--batch BATCH] [--use_lora]
colab_kernel_launcher.py: error: unrecognized arguments: -f /root/.local/share/jupyter/runtime/kernel-14a04b4a-08a0-4df4-9e2c-69dde287ffb0.json


SystemExit: 2

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


In [42]:
#b8b9a660

file_content = """
# BioBERT NER fine-tuning (CoNLL-style) with optional PEFT/LoRA adapters.
# Usage (CPU okay for tiny samples; use GPU on Colab/Studio Lab for real runs):
#   python training/train_ner.py --model dmis-lab/biobert-v1.1 --train data/train.conll --valid data/dev.conll --use_lora
import os, argparse, numpy as np
from typing import List, Dict, Tuple
from datasets import Dataset
from transformers import (AutoTokenizer, AutoModelForTokenClassification, DataCollatorForTokenClassification,
                          TrainingArguments, Trainer)
import evaluate

try:
    from peft import LoraConfig, get_peft_model
    PEFT_AVAILABLE = True
except Exception:
    PEFT_AVAILABLE = False

def read_conll(path: str) -> Tuple[List[List[str]], List[List[str]]]:
    tokens, tags, cur_t, cur_y = [], [], [], []
    with open(path, "r", encoding="utf-8") as f:
        for line in f:
            line = line.strip()
            if not line:
                if cur_t:
                    tokens.append(cur_t); tags.append(cur_y)
                    cur_t, cur_y = [], []
                continue
            # token [tab or space] tag
            parts = line.split()
            if len(parts) >= 2:
                cur_t.append(parts[0])
                cur_y.append(parts[-1])
    if cur_t:
        tokens.append(cur_t); tags.append(cur_y)
    return tokens, tags

def build_dataset(tokens: List[List[str]], tags: List[List[str]]):
    return Dataset.from_dict({"tokens": tokens, "ner_tags": tags})

def align_labels_with_tokens(labels, word_ids):
    new_labels = []
    prev_word_id = None
    for word_id in word_ids:
        if word_id is None:
            new_labels.append(-100)
        elif word_id != prev_word_id:
            new_labels.append(label2id[labels[word_id]])
        else:
            # Inside subword -> assign -100 or I-*; we keep -100 for simplicity
            new_labels.append(-100)
        prev_word_id = word_id
    return new_labels

def compute_metrics(p):
    preds, refs = p
    preds = np.argmax(preds, axis=-1)
    true_preds, true_labels = [], []
    # Modify to iterate only over preds and labels, relying on -100 for alignment
    for pred, label in zip(preds, p.label_ids):
        # align
        pred_tags, label_tags = [], []
        for p_i, l_i in zip(pred, label):
            if l_i != -100: # Use -100 label to filter tokens
                pred_tags.append(id2label[p_i])
                label_tags.append(id2label[l_i])
        true_preds.append(pred_tags); true_labels.append(label_tags)

    metric = evaluate.load("seqeval")
    results = metric.compute(predictions=true_preds, references=true_labels)
    return {"precision": results["overall_precision"],
            "recall": results["overall_recall"],
            "f1": results["overall_f1"],
            "accuracy": results["overall_accuracy"]}


if __name__ == "__main__":
    ap = argparse.ArgumentParser()
    ap.add_argument("--model", default="dmis-lab/biobert-v1.1")
    ap.add_argument("--train", default="training/data/sample.conll")
    ap.add_argument("--valid", default="training/data/sample.conll")
    ap.add_argument("--out", default="training/output")
    ap.add_argument("--epochs", type=int, default=1)
    ap.add_argument("--batch", type=int, default=8)
    ap.add_argument("--use_lora", action="store_true")
    args = ap.parse_args()

    train_tokens, train_tags = read_conll(args.train)
    valid_tokens, valid_tags = read_conll(args.valid)

    # derive labels from training set
    unique_labels = sorted({t for seq in train_tags for t in seq})
    global label2id, id2label
    label2id = {l:i for i,l in enumerate(unique_labels)}
    id2label = {i:l for l,i in label2id.items()}

    tokenizer = AutoTokenizer.from_pretrained(args.model, use_fast=True)
    def tok(examples):
        tokenized = tokenizer(examples["tokens"], is_split_into_words=True, truncation=True, padding=False)
        labels = []
        for i, word_ids in enumerate([tokenized.word_ids(k) for k in range(len(examples["tokens"]))]):
            labels.append(align_labels_with_tokens(examples["ner_tags"][i], word_ids))
        tokenized["labels"] = labels
        return tokenized

    ds_train = build_dataset(train_tokens, train_tags).map(tok, batched=True)
    ds_valid = build_dataset(valid_tokens, valid_tags).map(tok, batched=True)

    model = AutoModelForTokenClassification.from_pretrained(
        args.model, num_labels=len(unique_labels), id2label=id2label, label2id=label2id
    )

    if args.use_lora:
        if not PEFT_AVAILABLE:
            raise RuntimeError("peft not installed but --use_lora was provided.")
        lora = LoraConfig(r=8, lora_alpha=16, target_modules=["query","value","key","dense"], lora_dropout=0.05, bias="none")
        model = get_peft_model(model, lora)

    data_collator = DataCollatorForTokenClassification(tokenizer=tokenizer)
    os.makedirs(args.out, exist_ok=True)
    training_args = TrainingArguments(
        output_dir=args.out,
        learning_rate=2e-5,
        per_device_train_batch_size=args.batch,
        per_device_eval_batch_size=args.batch,
        num_train_epochs=args.epochs,
        weight_decay=0.01,
        eval_strategy="epoch", # Changed from evaluation_strategy
        save_strategy="epoch",
        logging_steps=50,
        report_to=[],
        include_inputs_for_metrics=True, # Added to include attention_mask in compute_metrics
    )
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=ds_train,
        eval_dataset=ds_valid,
        tokenizer=tokenizer,
        data_collator=data_collator,
        compute_metrics=compute_metrics,
    )
    trainer.train()
    trainer.save_model(args.out)
    tokenizer.save_pretrained(args.out)
    print("Saved model to", args.out)
"""

with open('/content/biobert-ner-sagemaker/training/train_ner.py', 'w') as f:
    f.write(file_content)

print("Modified train_ner.py saved.")

Modified train_ner.py saved.


In [44]:
#875d6a61

file_content = """
# BioBERT NER fine-tuning (CoNLL-style) with optional PEFT/LoRA adapters.
# Usage (CPU okay for tiny samples; use GPU on Colab/Studio Lab for real runs):
#   python training/train_ner.py --model dmis-lab/biobert-v1.1 --train data/train.conll --valid data/dev.conll --use_lora
import os, argparse, numpy as np
from typing import List, Dict, Tuple
from datasets import Dataset
from transformers import (AutoTokenizer, AutoModelForTokenClassification, DataCollatorForTokenClassification,
                          TrainingArguments, Trainer)
import evaluate

try:
    from peft import LoraConfig, get_peft_model
    PEFT_AVAILABLE = True
except Exception:
    PEFT_AVAILABLE = False

def read_conll(path: str) -> Tuple[List[List[str]], List[List[str]]]:
    tokens, tags, cur_t, cur_y = [], [], [], []
    with open(path, "r", encoding="utf-8") as f:
        for line in f:
            line = line.strip()
            if not line:
                if cur_t:
                    tokens.append(cur_t); tags.append(cur_y)
                    cur_t, cur_y = [], []
                continue
            # token [tab or space] tag
            parts = line.split()
            if len(parts) >= 2:
                cur_t.append(parts[0])
                cur_y.append(parts[-1])
    if cur_t:
        tokens.append(cur_t); tags.append(cur_y)
    return tokens, tags

def build_dataset(tokens: List[List[str]], tags: List[List[str]]):
    return Dataset.from_dict({"tokens": tokens, "ner_tags": tags})

def align_labels_with_tokens(labels, word_ids):
    new_labels = []
    prev_word_id = None
    for word_id in word_ids:
        if word_id is None:
            new_labels.append(-100)
        elif word_id != prev_word_id:
            new_labels.append(label2id[labels[word_id]])
        else:
            # Inside subword -> assign -100 or I-*; we keep -100 for simplicity
            new_labels.append(-100)
        prev_word_id = word_id
    return new_labels

def compute_metrics(p):
    # Correctly unpack predictions and labels from EvalPrediction object
    preds, refs = p.predictions, p.label_ids
    preds = np.argmax(preds, axis=-1)
    true_preds, true_labels = [], []
    # Modify to iterate only over preds and labels, relying on -100 for alignment
    for pred, label in zip(preds, p.label_ids):
        # align
        pred_tags, label_tags = [], []
        for p_i, l_i in zip(pred, label):
            if l_i != -100: # Use -100 label to filter tokens
                pred_tags.append(id2label[p_i])
                label_tags.append(id2label[l_i])
        true_preds.append(pred_tags); true_labels.append(label_tags)

    metric = evaluate.load("seqeval")
    results = metric.compute(predictions=true_preds, references=true_labels)
    return {"precision": results["overall_precision"],
            "recall": results["overall_recall"],
            "f1": results["overall_f1"],
            "accuracy": results["overall_accuracy"]}


if __name__ == "__main__":
    ap = argparse.ArgumentParser()
    ap.add_argument("--model", default="dmis-lab/biobert-v1.1")
    ap.add_argument("--train", default="training/data/sample.conll")
    ap.add_argument("--valid", default="training/data/sample.conll")
    ap.add_argument("--out", default="training/output")
    ap.add_argument("--epochs", type=int, default=1)
    ap.add_argument("--batch", type=int, default=8)
    ap.add_argument("--use_lora", action="store_true")
    args = ap.parse_args()

    train_tokens, train_tags = read_conll(args.train)
    valid_tokens, valid_tags = read_conll(args.valid)

    # derive labels from training set
    unique_labels = sorted({t for seq in train_tags for t in seq})
    global label2id, id2label
    label2id = {l:i for i,l in enumerate(unique_labels)}
    id2label = {i:l for l,i in label2id.items()}

    tokenizer = AutoTokenizer.from_pretrained(args.model, use_fast=True)
    def tok(examples):
        tokenized = tokenizer(examples["tokens"], is_split_into_words=True, truncation=True, padding=False)
        labels = []
        for i, word_ids in enumerate([tokenized.word_ids(k) for k in range(len(examples["tokens"]))]):
            labels.append(align_labels_with_tokens(examples["ner_tags"][i], word_ids))
        tokenized["labels"] = labels
        return tokenized

    ds_train = build_dataset(train_tokens, train_tags).map(tok, batched=True)
    ds_valid = build_dataset(valid_tokens, valid_tags).map(tok, batched=True)

    model = AutoModelForTokenClassification.from_pretrained(
        args.model, num_labels=len(unique_labels), id2label=id2label, label2id=label2id
    )

    if args.use_lora:
        if not PEFT_AVAILABLE:
            raise RuntimeError("peft not installed but --use_lora was provided.")
        lora = LoraConfig(r=8, lora_alpha=16, target_modules=["query","value","key","dense"], lora_dropout=0.05, bias="none")
        model = get_peft_model(model, lora)

    data_collator = DataCollatorForTokenClassification(tokenizer=tokenizer)
    os.makedirs(args.out, exist_ok=True)
    training_args = TrainingArguments(
        output_dir=args.out,
        learning_rate=2e-5,
        per_device_train_batch_size=args.batch,
        per_device_eval_batch_size=args.batch,
        num_train_epochs=args.epochs,
        weight_decay=0.01,
        eval_strategy="epoch", # Changed from evaluation_strategy
        save_strategy="epoch",
        logging_steps=50,
        report_to=[],
        include_inputs_for_metrics=True, # Added to include attention_mask in compute_metrics
    )
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=ds_train,
        eval_dataset=ds_valid,
        tokenizer=tokenizer,
        data_collator=data_collator,
        compute_metrics=compute_metrics,
    )
    trainer.train()
    trainer.save_model(args.out)
    tokenizer.save_pretrained(args.out)
    print("Saved model to", args.out)
"""

with open('/content/biobert-ner-sagemaker/training/train_ner.py', 'w') as f:
    f.write(file_content)

print("Modified train_ner.py saved.")

Modified train_ner.py saved.


In [46]:
!pip install seqeval

Collecting seqeval
  Downloading seqeval-1.2.2.tar.gz (43 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/43.6 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.6/43.6 kB[0m [31m2.7 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: seqeval
  Building wheel for seqeval (setup.py) ... [?25l[?25hdone
  Created wheel for seqeval: filename=seqeval-1.2.2-py3-none-any.whl size=16162 sha256=b32c7e0b1d927cb35007dea938a217a4d52fb689474a1219fe7c21e04187c122
  Stored in directory: /root/.cache/pip/wheels/5f/b8/73/0b2c1a76b701a677653dd79ece07cfabd7457989dbfbdcd8d7
Successfully built seqeval
Installing collected packages: seqeval
Successfully installed seqeval-1.2.2


In [9]:
!pip install evaluate

Collecting evaluate
  Downloading evaluate-0.4.5-py3-none-any.whl.metadata (9.5 kB)
Downloading evaluate-0.4.5-py3-none-any.whl (84 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.1/84.1 kB[0m [31m3.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: evaluate
Successfully installed evaluate-0.4.5


In [49]:
!ls

biobert-ner-sagemaker  sample_data  training
