In [None]:
import json
import os
from tqdm.notebook import tqdm

from huggingface_hub import (
    create_repo,
    Repository
)
import pandas as pd
import torch
from transformers import (
    AutoModelForMaskedLM,
    AutoModelForSequenceClassification,
    AutoModelForTokenClassification,
    AutoTokenizer,
    pipeline,
)

In [None]:
ORG_ID = "aieng-lab"
RESULTS_BASE_PATH = "./results/finetuning"
TEMP_BASE_PATH = "./results/models"

In [None]:
FLASH_ATTENTION = ["ModernBERT", "Llama-3.2", "CodeLlama", "starcoder2"]

# TODO: Make something similar for licensed models

In [None]:
models_df = pd.read_csv("../assets/models.csv")

In [None]:
problem_type = "fill-mask"  # None, multi_label_classification, regression, token-classification, fill-mask
task_name = "requirement_completion"
num_labels = 1

model_names = ["bert-base-cased", "bert-large-cased", "roberta-base", "roberta-large", "ModernBERT-base", "ModernBERT-large",
               "gpt2", "gpt2-medium", "gpt2-large", "gpt2-xl", "Llama-3.2-1B", "Llama-3.2-3B",
               "t5-small", "t5-base", "t5-large", "t5-3b",
               "codebert-base",
               "CodeLlama-7b-hf", "starcoder2-3b", "starcoder2-7b",
               "codet5p-220m", "codet5p-770m"
               ]

In [None]:
model_card = """---
library_name: transformers
license: mit
language:
- en
metrics:
- accuracy
- perplexity
base_model:
- {base_model_id}
pipeline_tag: {problem_type}
---

# {base_model_name} for filling user actions in requirement specifications

This model fills masks ([MASK]) in requirements specifications. During the fine-tuning process, POS verbs were used as a proxy of user actions.

- **Developed by:** Fabian C. Peña, Steffen Herbold
- **Finetuned from:** [{base_model_id}](https://huggingface.co/{base_model_id})
- **Paper:** [Evaluating Large Language Models on Non-Code Software Engineering Tasks](https://arxiv.org/abs/2506.10833)
- **Replication kit:** [https://github.com/aieng-lab/senlp-benchmark](https://github.com/aieng-lab/senlp-benchmark)
- **Language:** English
- **License:** MIT

## Cite as

```
@misc{{peña2025evaluatinglargelanguagemodels,
  title={{Evaluating Large Language Models on Non-Code Software Engineering Tasks}}, 
  author={{Fabian C. Peña and Steffen Herbold}},
  year={{2025}},
  eprint={{2506.10833}},
  archivePrefix={{arXiv}},
  primaryClass={{cs.SE}},
  url={{https://arxiv.org/abs/2506.10833}}, 
}
```
"""

In [None]:
def load_model(model_path, flash_attention):
    if problem_type == "token-classification":
        return AutoModelForTokenClassification.from_pretrained(
            model_path,
            torch_dtype=torch.bfloat16,
            attn_implementation="flash_attention_2" if flash_attention else None
        )
    elif problem_type == "fill-mask":
        return AutoModelForMaskedLM.from_pretrained(
            model_path,
            torch_dtype=torch.bfloat16,
            attn_implementation="flash_attention_2" if flash_attention else None
        )
    return AutoModelForSequenceClassification.from_pretrained(
        model_path,
        num_labels=num_labels,
        problem_type=problem_type,
        torch_dtype=torch.bfloat16,
        attn_implementation="flash_attention_2" if flash_attention else None
    )

In [None]:
print("Models to push:", len(model_names))
for model_name in tqdm(model_names):
    print("Working on model:", model_name)

    model_path = os.path.join(RESULTS_BASE_PATH, task_name, model_name, "test", "best")
    print(" - Model path:", model_path)

    try:
        with open(os.path.join(model_path, "config.json")) as f:
            config = json.load(f)
        base_model_id = config["_name_or_path"]
        print(" - Base model ID:", base_model_id)

        # Checking whether model has flash attention support
        flash_attention = any([(attn in base_model_id) for attn in FLASH_ATTENTION])

        # Loading model and tokenizer
        tokenizer = AutoTokenizer.from_pretrained(base_model_id)
        model = load_model(model_path, flash_attention)

        # (Workaround) Updating config
        model.config._name_or_path = base_model_id

        # Testing the model before pushing
        if problem_type == "token-classification":
            problem_type_ = problem_type
        elif problem_type == "fill-mask":
            problem_type_ = problem_type
        else:
            problem_type_ = "text-classification"

        pipeline_ = pipeline(
            problem_type_,
            model=model,
            tokenizer=tokenizer,
            device=0
        )
        if problem_type == "fill-mask":
            # print("- Test inference:", pipeline_("Hello [MASK]!"))
            print("- Test inference:", pipeline_("Hello <mask>!"))
        else:
            print("- Test inference:",pipeline_("Hello world!"))

        new_model_id = f"{model_name}_{task_name.replace("_", "-")}"
        print("- New model ID:", new_model_id)

        new_model_path = os.path.join(TEMP_BASE_PATH, task_name, new_model_id)
        print("- New model path:", new_model_path)

        # Moving the model and tokenizer
        create_repo(repo_id=f"{ORG_ID}/{new_model_id}", repo_type="model", private=False)
        repo = Repository(local_dir=new_model_path, clone_from=f"{ORG_ID}/{new_model_id}")
        tokenizer.save_pretrained(new_model_path)
        model.save_pretrained(new_model_path)

        # Creating model card
        base_model_id_ = base_model_id.split("/")
        if len(base_model_id_) > 1:
            base_model_id_ = base_model_id_[1]
        else:
            base_model_id_ = base_model_id_[0]
        base_model_name = models_df.loc[models_df["Model ID"] == base_model_id_, "Model name"].values[0]
        model_card_ = model_card.format(base_model_id=base_model_id, problem_type=problem_type_, base_model_name=base_model_name)
        # print("- Model card:", model_card_)
        with open(os.path.join(new_model_path, "README.md"), "w") as f:
            f.write(model_card_)

        # Pushing to the hub
        #model.push_to_hub(f"{ORG_ID}/{new_model_id}", commit_message="Uploading model")
        #tokenizer.push_to_hub(f"{ORG_ID}/{new_model_id}", commit_message="Uploading tokenizer")
        repo.push_to_hub(commit_message="Pushing model, tokenizer and model card")
    except FileNotFoundError:
        print("- Model not found!")