In [4]:
import torch
torch.cuda.is_available()

True

In [5]:
import os
from typing import List
from tqdm import tqdm
import fire
import torch
from transformers import LlamaTokenizer, LlamaForCausalLM, AutoModelForCausalLM,  BitsAndBytesConfig
from peft import (
    LoraConfig,
    get_peft_model,
    get_peft_model_state_dict,
    prepare_model_for_kbit_training,
)
from fed_utils import FedAvg, client_selection, global_evaluation, GeneralClient
import datasets
from utils.prompter import Prompter
datasets.utils.logging.set_verbosity_error()


In [None]:
# Encode some text
text = "What are all the positions in hockey?"
input_ids = tokenizer.encode(text, return_tensors="pt")
# Generate predictions
output = model.generate(input_ids, max_length=50, num_return_sequences=1)
# Decode the generated tokens to get the text
predicted_text = tokenizer.decode(output[0], skip_special_tokens=True)
print(predicted_text)

In [None]:
# Load the tokenizer and model
global_model = "chavinlo/alpaca-native"
tokenizer = LlamaTokenizer.from_pretrained(global_model)
tokenizer.cache_dir = "/dpc/kunf0007/amine/tokenizer"
tokenizer.pad_token_id = (0)
tokenizer.padding_side = "right"

In [None]:
# Quantization configuration
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=True,
)

In [None]:
model = AutoModelForCausalLM.from_pretrained(
    global_model,
    quantization_config=bnb_config,
    torch_dtype=torch.float16,
    device_map={"": 0},
    cache_dir="/dpc/kunf0007/amine/model"
)

In [None]:
model = prepare_model_for_kbit_training(model)
config = LoraConfig(
    r=16,
    lora_alpha=16,
    target_modules= ["q_proj","k_proj","v_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
)
model = get_peft_model(model, config)

In [None]:
prompter = Prompter('alpaca', verbose=True)

In [None]:
cutoff_len = 512
def tokenize(prompt, add_eos_token=True):
    result = tokenizer(
        prompt,
        truncation=True,
        max_length=cutoff_len,
        padding=False,
        return_tensors=None,
    )
    if (
            result["input_ids"][-1] != tokenizer.eos_token_id
            and len(result["input_ids"]) < cutoff_len
            and add_eos_token
    ):
        result["input_ids"].append(tokenizer.eos_token_id)
        result["attention_mask"].append(1)

    result["labels"] = result["input_ids"].copy()
    return result

def generate_and_tokenize_prompt(data_point):
    full_prompt = prompter.generate_prompt(
        data_point["instruction"],
        data_point["context"],
        data_point["response"],
    )
    tokenized_full_prompt = tokenize(full_prompt)
    return tokenized_full_prompt

In [None]:
from datasets import load_dataset
local_data_path = './data/1/local_training_0.json'
local_output_dir = '/dpc/kunf0007/amine/output/local_output_0'
local_data = load_dataset("json", data_files=local_data_path)


In [None]:
local_train_dataset = local_data["train"].shuffle().map(generate_and_tokenize_prompt)

In [None]:
import transformers
from trl import SFTTrainer

gradient_accumulation_steps = 8 // 4
def build_local_trainer(
    tokenizer=tokenizer,
    local_micro_batch_size=4,
    gradient_accumulation_steps=2,
    local_learning_rate=3e-4,
    group_by_length=False,
):
    train_args = transformers.TrainingArguments(
        per_device_train_batch_size=local_micro_batch_size,
        gradient_accumulation_steps=gradient_accumulation_steps,
        warmup_steps=1,
        num_train_epochs=1,
        learning_rate=local_learning_rate,
        fp16=False,
        logging_steps=1,
        optim="adamw_torch",
        output_dir=local_output_dir,
        group_by_length=group_by_length,
        dataloader_drop_last=False,
    )
    local_trainer = SFTTrainer(
        model=model,
        train_dataset=local_train_dataset,
        args=train_args,
        tokenizer=tokenizer,
        dataset_text_field="instruction",
        max_seq_length=512,
    )
    return local_trainer

In [None]:
local_trainer = build_local_trainer()

In [None]:
from collections import OrderedDict
import copy
def initiate_local_training():
    model.config.use_cache = False
    params_dict_old = copy.deepcopy(
        OrderedDict(
            (name, param.detach())
            for name, param in model.named_parameters()
            if "default" in name
        )
    )
    params_dict_new = OrderedDict(
        (name, param.detach())
        for name, param in model.named_parameters()
        if "default" in name
    )
    model.state_dict = (
        lambda instance, *_, **__: get_peft_model_state_dict(
            instance, params_dict_new, "default"
        )
    ).__get__(model, type(model))
initiate_local_training()

In [None]:
import gc
def train():

    gc.collect()
    gc.collect()
    local_trainer.train()

In [None]:
train()

In [None]:
model.save_pretrained("/dpc/kunf0007/amine/model/mylora-shepherd-v0")
tokenizer.save_pretrained("/dpc/kunf0007/amine/model/mylora-shepherd-v0")

In [None]:
!transformers-cli repo create mylora-shepherd

In [None]:
from huggingface_hub import notebook_login

notebook_login()

In [None]:
model.push_to_hub("mylora-shepherd")

In [None]:
tokenizer_ = LlamaTokenizer.from_pretrained(local_path, cache_dir="/dpc/kunf0007/amine")
model_ = AutoModelForCausalLM.from_pretrained(
    local_path,
    torch_dtype=torch.float16,
    quantization_config=bnb_config,
    low_cpu_mem_usage=True
)

In [None]:
# Encode some text
text_ = "What are all the positions in hockey?"
input_ids_ = tokenizer_.encode(text_, return_tensors="pt")

# Generate predictions
output_ = model_.generate(input_ids_, max_length=50, num_return_sequences=1)

# Decode the generated tokens to get the text
predicted_text_ = tokenizer_.decode(output_[0], skip_special_tokens=True)
print(predicted_text_)

In [None]:
def infer(tokenizer, model, text):
    input_ids = tokenizer.encode(text, return_tensors="pt")
    output = model.generate(
        input_ids,
        max_length=50,
        num_return_sequences=1,
        num_beams=5,  # Using beam search with 5 beams
        temperature=1.0,  # Default temperature
        top_k=50,  # Top-K sampling
        top_p=0.95,  # Nucleus sampling
        do_sample=True  # Enable stochastic sampling
    )

    # Decode the generated tokens to get the text
    predicted_text = tokenizer.decode(output[0], skip_special_tokens=True)
    return predicted_text


In [None]:
input_ids = tokenizer.encode( "give me a summary of what a kidney donor chain is.", return_tensors="pt")

In [None]:
input_ids

In [None]:
text=  "give me a summary of what a kidney donor chain is."
infer(tokenizer, model, text)