In [None]:
import json
import os

import torch
from transformers import (
    AutoModelForMaskedLM,
    AutoModelForSequenceClassification,
    AutoModelForTokenClassification,
    AutoTokenizer,
    pipeline,
)

In [None]:
ORG_ID = "aieng-lab"
RESULTS_BASE_PATH = "./results/finetuning"
TEMP_BASE_PATH = "./results/models"

In [None]:
problem_type = "fill-mask"  # None, multi_label_classification, regression, token-classification, fill-mask
task_name = "requirement_completion"
num_labels = 1

model_name = "ModernBERT-large"
flash_attention = True

In [None]:
model_path = os.path.join(RESULTS_BASE_PATH, task_name, model_name, "test", "best")
model_path

In [None]:
with open(os.path.join(model_path, "config.json")) as f:
    config = json.load(f)
base_model_id = config["_name_or_path"]
base_model_id

In [None]:
tokenizer = AutoTokenizer.from_pretrained(base_model_id)
if problem_type == "token-classification":
    model = AutoModelForTokenClassification.from_pretrained(
        model_path,
        torch_dtype=torch.bfloat16,
        attn_implementation="flash_attention_2" if flash_attention else None
    )
elif problem_type == "fill-mask":
    model = AutoModelForMaskedLM.from_pretrained(
        model_path,
        torch_dtype=torch.bfloat16,
        attn_implementation="flash_attention_2" if flash_attention else None
    )
else:
    model = AutoModelForSequenceClassification.from_pretrained(
        model_path,
        num_labels=num_labels,
        problem_type=problem_type,
        torch_dtype=torch.bfloat16,
        attn_implementation="flash_attention_2" if flash_attention else None
    )

In [None]:
if problem_type == "token-classification":
    problem_type_ = problem_type
elif problem_type == "fill-mask":
    problem_type_ = problem_type
else:
    problem_type_ = "text-classification"

pipeline_ = pipeline(
    problem_type_,
    model=model,
    tokenizer=tokenizer,
    device=0
)

In [None]:
if problem_type == "fill-mask":
    pipeline_("Hello [MASK]!")
else:
    pipeline_("Hello world!")

In [None]:
new_model_id = f"{model_name}_{task_name.replace("_", "-")}"
new_model_id

In [None]:
new_model_path = os.path.join(TEMP_BASE_PATH, new_model_id)
new_model_path

In [None]:
tokenizer.save_pretrained(new_model_path)
model.save_pretrained(new_model_path)

In [None]:
model.push_to_hub(f"{ORG_ID}/{new_model_id}", commit_message="Uploading model")
tokenizer.push_to_hub(f"{ORG_ID}/{new_model_id}", commit_message="Uploading tokenizer")