In [None]:
%%capture
!pip install adapters datasets

In [None]:
import torch
from huggingface_hub import login
from tqdm import tqdm
from datasets import load_dataset
from transformers import set_seed, AutoModelForCausalLM, AutoTokenizer, pipeline
from adapters import init
from adapters.composition import Stack

In [None]:
SEED = 42
modelpath = "gpt2-medium"
domain_adapter = "hf_path_to_trained_DA"
task_adapter = "hf_path_to_trained_TA"
HF_KEY = "hf_key"

# generation params
temperature = 0.1
rp = 1.03
max_new_tokens = 100

set_seed(SEED)
torch.manual_seed(SEED)
DEVICE = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
login(HF_KEY)

tokenizer = AutoTokenizer.from_pretrained(modelpath)
tokenizer.pad_token = tokenizer.eos_token
model = AutoModelForCausalLM.from_pretrained(modelpath).to(DEVICE)
init(model)

model.load_adapter(domain_adapter, load_as="domain", with_head=False)
model.load_adaptert(task_adapter, load_as="task", with_head=True)
model.active_adapters = Stack("domain","task")
model.adapter_to(DEVICE)
# print(model.adapter_summary())

def generate_answer(text: str):
  encoding = tokenizer('Question: ' + text + "Answer: ", return_tensors="pt").to(device)
  input_ids = encoding.input_ids

  model.eval()
  with torch.inference_mode():
    outputs = model.generate(input_ids, max_new_tokens=max_new_tokens, do_sample=True, temperature=temperature, repetition_penalty= rp)
  return tokenizer.decode(outputs[0], skip_special_tokens=True)

In [None]:
generate_answer("What is adapter fine-tuning?")