In [1]:
import wandb
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

    
api = wandb.Api()
project = "lora"
workspace = "username"

# Get our two main experiments so far
experiment_tags = ["mt_eval"]

# get all runs that both: 1.  match any experiment tag and 2. are finished
runs = api.runs(f"{workspace}/{project}",
                {"$and": [
                    {"tags": {"$in": experiment_tags}},
                ]})


In [2]:
import os
import lightning as L
from lit_gpt.lora import GPT,  Config
from lit_gpt.utils import (
    check_valid_checkpoint_dir,
    lazy_load,
)
from pathlib import Path

import gc

import lightning as L
from functools import partial

from scripts.convert_lit_checkpoint import check_conversion_supported, copy_weights_llama, incremental_save

from lit_gpt.lora import GPT, Config, lora_filter, merge_lora_weights
from lit_gpt.model import Config as ModelConfig
from lit_gpt.utils import check_valid_checkpoint_dir,  lazy_load

import contextlib

from finetune.lora import validate, get_max_seq_length

from lit_gpt.tokenizer import Tokenizer

import torch

def setup(run, checkpoints ="checkpoints/meta-llama/Llama-2-7b-hf",
           adapter_path = "downloads/", out_dir= "merged/", out_name="model.pth"):
    config = run.config
    checkpoints = Path(checkpoints)
    conf = Config.from_name(
            name=checkpoints.name,
            r=config["lora_r"],
            alpha=config["alpha"],
            dropout=config["lora_dropout"],
            to_query=config["lora_query"],
            to_key=config["lora_key"],
            to_value=config["lora_value"],
            to_projection=config["lora_projection"],
            to_mlp=config["lora_mlp"],
            to_head=config["lora_head"],
            joint_qkvp=config["joint_qkvp"],
            tensor_lora=config["tensor_lora"],
        )
    fabric = L.Fabric(devices=1, strategy="auto", precision="bf16-true", plugins=None)
    fabric.seed_everything(0)  # same seed for every process to init model (FSDP)
    
    with fabric.init_module(empty_init=(False)):
        model = GPT(conf)
    #tokenizer = Tokenizer(io.checkpoint_dir)
    check_valid_checkpoint_dir(checkpoints)

    if fabric.global_rank == 0:
        os.makedirs(out_dir, exist_ok=True)

    checkpoint_path = checkpoints / "lit_model.pth"
    #print(adapter_path)
    adapter = lazy_load(Path(adapter_path))
    base = lazy_load(Path(checkpoint_path))
    params = {**adapter.sd["model"], **base.sd}
    #print(params.keys())
    model.load_state_dict(params, strict=True)
    #model.eval()

    print("merging weights")
    
    merge_lora_weights(model)

    print("evaluating")

    data_dir = Path("data/alpaca")
    val_data = torch.load(data_dir / "test.pt")
    train_data = torch.load(data_dir / "train.pt")
    tokenizer = Tokenizer(checkpoints)
    fabric = L.Fabric(devices=1, strategy="auto", precision="bf16-true", loggers=None)
    max_seq_length, longest_seq_length, longest_seq_ix = get_max_seq_length(train_data)

    return validate(fabric, model, val_data, tokenizer, longest_seq_length) 


    
    #wandb.init(id=run.id, project=run.project, resume="allow")
    #wandb.log(results['results'])
    #wandb.finish()




  from .autonotebook import tqdm as notebook_tqdm
  _torch_pytree._register_pytree_node(
  _torch_pytree._register_pytree_node(


In [3]:
for run in runs:
    print(run)
    if run.id == 'jsyf5mt1':
        weights = run.file(f"checkpoints/meta-llama/Llama-2-7b-hf/{run.name}_lora_finetuned.pth")
        # create the directory if it doesn't exist
        os.makedirs(f"download/{run.id}", exist_ok=True)
        out = weights.download(f"download/{run.id}", replace=True)
        adapter_path = out.name
        checkpoints ="checkpoints/meta-llama/Llama-2-7b-hf"
        out_dir= "sandbox/"

        config = run.config
        checkpoints = Path(checkpoints)
        conf = Config.from_name(
                name=checkpoints.name,
                r=config["lora_r"],
                alpha=config["alpha"],
                dropout=config["lora_dropout"],
                to_query=config["lora_query"],
                to_key=config["lora_key"],
                to_value=config["lora_value"],
                to_projection=config["lora_projection"],
                to_mlp=config["lora_mlp"],
                to_head=config["lora_head"],
                joint_qkvp=config["joint_qkvp"],
                tensor_lora=config["tensor_lora"],
            )
        fabric = L.Fabric(devices=1, strategy="auto", precision="bf16-true", plugins=None)
        fabric.seed_everything(0)  # same seed for every process to init model (FSDP)
        
        with fabric.init_module(empty_init=(False)):
            model = GPT(conf)
        #tokenizer = Tokenizer(io.checkpoint_dir)
        check_valid_checkpoint_dir(checkpoints)

        if fabric.global_rank == 0:
            os.makedirs(out_dir, exist_ok=True)

        checkpoint_path = checkpoints / "lit_model.pth"
        #print(adapter_path)
        adapter = lazy_load(Path(adapter_path))
        base = lazy_load(Path(checkpoint_path))
        params = {**adapter.sd["model"], **base.sd}
        #print(params.keys())
        model.load_state_dict(params, strict=True)
        #model.eval()

        print("merging weights")
        
        merge_lora_weights(model)

        print("evaluating")

        data_dir = Path("data/alpaca")
        val_data = torch.load(data_dir / "test.pt")
        train_data = torch.load(data_dir / "train.pt")
        tokenizer = Tokenizer(checkpoints)
        fabric = L.Fabric(devices=1, strategy="auto", precision="bf16-true", loggers=None)
        max_seq_length, longest_seq_length, longest_seq_ix = get_max_seq_length(train_data)

        result = validate(fabric, model, val_data, tokenizer, longest_seq_length) 



    

<Run hounie/lora/jsyf5mt1 (finished)>


Seed set to 0


merging weights
evaluating
Validating ...
Recommend a movie for me to watch during the weekend and explain the reason.
Below is an instruction that describes a task. Write a response that appropriately completes the request.

### Instruction:
Recommend a movie for me to watch during the weekend and explain the reason.

### Response:
The movie is 'The Avengers', the superheroes from the Marvel Universe.

The movie is a must-watch because it is based on the comic books of the same name and shows an epic battle between good and evil.

The plot revolves around the superheroes from the Marvel Universe as they fight against an evil alien race called the Chitauri. The superheroes fight for the survival of the human race and


NameError: name 'val_loss' is not defined

In [None]:
result

tensor(12.1462)

In [None]:
print(out.name)

download/wieemkak/checkpoints/meta-llama/Llama-2-7b-hf/tensor_lora_r_48_joint_heads_joint_layers_joint_qkvp_lora_finetuned.pth
