In [1]:
from scripts.model import EvalModel
import os
from scripts.datasets import VQA_dataset
from scripts.datasets import SQUAD_dataset

%load_ext autoreload
%autoreload 2

In [2]:
CHECKPOINT_DIR = (
    "/mnt/d/models/"
    if os.environ.get("CHECKPOINT_DIR") == None
    else os.environ["CHECKPOINT_DIR"]
)

In [3]:
model_args = {
    "vision_encoder_path": "ViT-L-14",
    "vision_encoder_pretrained": "openai",
    "lm_path": "anas-awadalla/mpt-1b-redpajama-200b",
    "lm_tokenizer_path": "anas-awadalla/mpt-1b-redpajama-200b",
    "checkpoint_path": f"{CHECKPOINT_DIR}/OpenFlamingo-3B-vitl-mpt1b/checkpoint.pt",
    "cross_attn_every_n_layers": 1,
    "precision": "bf16",
    "device": 0,
}

print(f"Loading Checkpoint from {CHECKPOINT_DIR}")
model = EvalModel(model_args)

Loading Checkpoint from /mnt/d/models/


In [None]:
data = SQUAD_dataset()

def collate_fn(batch):

    final = {'text': []}
    for d in batch:
        contexts = d['context']
        questions = d['question']
        answers = d['answers']['text'][0]
        temp = data.qa_prompt(contexts, questions, answers)
        final['text'].append(temp)

    return final

In [None]:
from torch.utils.data import DataLoader

train_d = DataLoader(data.train_dataset, batch_size=4, collate_fn=collate_fn)

In [None]:
def print_trainable_parameters(model):
    trainable_params = 0
    all_param = 0
    for _, param in model.named_parameters():
        all_param += param.numel()
        if param.requires_grad:
            trainable_params += param.numel()
    print(
        f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param:.2f}"
    )

print_trainable_parameters(model.model)

trainable params: 1046992944 || all params: 2559117360 || trainable%: 40.91


In [None]:
from peft import LoraConfig, get_peft_model

config = LoraConfig(
    r = 32,
    lora_alpha = 32,
    target_modules = ['to_q', 'to_kv', 'to_out', 'ff.1', 'ff.3'],
    lora_dropout=0.1,
    bias='none',
)
lora_model = get_peft_model(model.model, config)
print_trainable_parameters(lora_model)

trainable params: 22216704 || all params: 2581334064 || trainable%: 0.86


In [None]:
from torch import optim
from torch import nn
import torch
import tqdm

criterion = nn.CrossEntropyLoss()
optimizer = optim.AdamW(lora_model.parameters(), lr=0.001, weight_decay=0.01)

loss_vals = []
for epoch in range(5):
    counter = 0
    for batch in tqdm.tqdm(train_d):

        if counter > 2000:
            continue
        counter += 1

        token = model.tokenizer(batch['text'])
        image = data.palceholder_image
        image_token = data.image_preprocess_batch(model.image_processor, [image])
        image_tokens = torch.cat([image_token] * len(batch['text']), dim=0)
        input_ids, attention_mask = model._prepare_text(batch['text'])

        output = lora_model(
                    image_tokens.to(0, dtype=torch.bfloat16),
                    input_ids[:, :-1],
                    attention_mask[:, :-1]
                )
        loss = criterion(output.logits.reshape(4, 50280, -1), input_ids[:, :-1])
        

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        loss_vals.append(loss.cpu().detach().cpu().item())
        torch.cuda.empty_cache()
    print(f"loss: {sum(loss_vals)/len(loss_vals)}")

        



  0%|          | 0/21900 [00:00<?, ?it/s]


RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn

In [25]:
lora_model.push_to_hub("ToviTu/fine-tuned-nl-flamingo")

adapter_model.safetensors:   0%|          | 0.00/174M [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/ToviTu/fine-tuned-nl-flamingo/commit/3a47056055b63209ee8cce8620c149776c90bf46', commit_message='Upload model', commit_description='', oid='3a47056055b63209ee8cce8620c149776c90bf46', pr_url=None, pr_revision=None, pr_num=None)

In [24]:
from huggingface_hub import notebook_login

notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [27]:
merged = lora_model.merge_and_unload()

In [28]:
merged.push_to_hub("ToviTu/fine-tuned-nl-flamingo")

AttributeError: 'Flamingo' object has no attribute 'push_to_hub'

In [38]:
from transformers import AutoModelForCausalLM

m = AutoModelForCausalLM.from_pretrained("/mnt/d/models/OpenFlamingo-3B-vitl-mpt1b-ft-squad/", local_files_only=True)

OSError: /mnt/d/models/OpenFlamingo-3B-vitl-mpt1b-ft-squad/ does not appear to have a file named config.json. Checkout 'https://huggingface.co//mnt/d/models/OpenFlamingo-3B-vitl-mpt1b-ft-squad//None' for available files.

In [45]:
model.model.lang_encoder.save_pretrained("/mnt/d/models/OpenFlamingo-3B-vitl-mpt1b-ft-squad/")



In [46]:
model.model.lang_encoder.push_to_hub("ToviTu/fine-tuned-nl-flamingo")

model.safetensors:   0%|          | 0.00/4.38G [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/ToviTu/fine-tuned-nl-flamingo/commit/51d58c5fd57ca92f5bcdd7d65afd0b0a2f1e3c72', commit_message='Upload MosaicGPT', commit_description='', oid='51d58c5fd57ca92f5bcdd7d65afd0b0a2f1e3c72', pr_url=None, pr_revision=None, pr_num=None)