# lingering problems:
gpu problems
* check raw hf (non sentence transformer mem usage forward backward), fp16, with lora and not with lora
* check the effect of not loading the model to gpu (automatically) during the first load
* MultipleNegativeLoss might influence gpu vram usage


# data
allminilm
* gpu idle: 206MB
* inference mode: 234MB
* inference mode and autocast: 266MB

alibaba
* gpu idle: 1768MB
* inference mode: 1818MB
* inference mode and autocast: 2590MB
* standard NO inference mode: 2100MB
* lora NO inference mode: 2160MB

alibaba backward:
* vanilla: 3956
* lora (r=8): 2182  (yes indeed lora helps!!)
* lora (r=64): 2286  (yes indeed lora helps!!)

alibaba backward, but using sentence transformer
* vanilla: 

In [1]:
from sentence_transformers import SentenceTransformer
from transformers import AutoModel, AutoTokenizer
from transformers import BertModel
import torch
import re
from peft import LoraConfig, TaskType, get_peft_model  # type: ignore

  from tqdm.autonotebook import tqdm, trange


In [2]:
def get_target_modules(model) -> list[str]:
    if isinstance(model, BertModel):
        return ["query", "key", "value", "dense"]
    elif re.search(r"Alibaba-NLP.+NewModel", str(type(model))):
        return ["qkv_proj", "o_proj", "up_gate_proj", "down_proj"]
    raise ValueError(
        f"Model with type {type(model)} is unsupported, please manually inspect and add lora modules."
    )

In [3]:
# model_name = "sentence-transformers/all-MiniLM-L6-v2"
model_name = "Alibaba-NLP/gte-large-en-v1.5"


st_model = SentenceTransformer(model_name, trust_remote_code=True)  # automatically moved to cuda here by sentence transformer
# hf_model = AutoModel.from_pretrained(model_name, trust_remote_code=True).eval()
tokenizer = AutoTokenizer.from_pretrained(model_name)

In [None]:
# lora
lora_modules = get_target_modules(st_model[0]._modules["auto_model"])
peft_config = LoraConfig(
    task_type=TaskType.FEATURE_EXTRACTION,
    target_modules=lora_modules,
    inference_mode=False,
    r=8,
    lora_alpha=32,  # just set to 2 * alpha as a rule of thumb
    lora_dropout=0.2,
)
st_model[0]._modules["auto_model"] = get_peft_model(
    st_model[0]._modules["auto_model"],
    peft_config,
)

In [5]:
# for name, param in lora_model.named_parameters():
#     print(param.requires_grad, "\t", param.device, "\t", name)

In [4]:
text = "The dog (Canis familiaris or Canis lupus familiaris) is a domesticated descendant of the wolf. Also called the domestic dog, it was selectively bred from an extinct population of wolves during the Late Pleistocene by hunter-gatherers. The dog was the first species to be domesticated by humans, over 14,000 years ago and before the development of agriculture. Experts estimate that due to their long association with humans, dogs have gained the ability to thrive on a starch-rich diet that would be inadequate for other canids."

enc = tokenizer(text, return_tensors="pt").to("cuda")
out = st_model(enc)
out["token_embeddings"].sum().backward()

In [None]:
SentenceTransformer.encode

{'input_ids': tensor([[  101,  1996,  3899,  1006,  2064,  2483,  5220,  2483,  2030,  2064,
          2483, 11320, 12207,  5220,  2483,  1007,  2003,  1037,  4968,  4383,
         12608,  1997,  1996,  4702,  1012,  2036,  2170,  1996,  4968,  3899,
          1010,  2009,  2001, 13228,  2135, 13680,  2013,  2019,  8548,  2313,
          1997,  8588,  2076,  1996,  2397, 25080,  2011,  4477,  1011,  8587,
          2545,  1012,  1996,  3899,  2001,  1996,  2034,  2427,  2000,  2022,
          4968,  4383,  2011,  4286,  1010,  2058,  2403,  1010,  2199,  2086,
          3283,  1998,  2077,  1996,  2458,  1997,  5237,  1012,  8519, 10197,
          2008,  2349,  2000,  2037,  2146,  2523,  2007,  4286,  1010,  6077,
          2031,  4227,  1996,  3754,  2000, 25220,  2006,  1037,  2732,  2818,
          1011,  4138,  8738,  2008,  2052,  2022, 14710,  2005,  2060,  2064,
          9821,  1012,   102]], device='cuda:0'), 'token_type_ids': tensor([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0

In [None]:
SentenceTransformer.forward