In [1]:
import torch
torch.cuda.is_available()

True

In [48]:
import os
from typing import List
from tqdm import tqdm
import fire
import torch
from transformers import LlamaTokenizer, LlamaForCausalLM, AutoModelForCausalLM,  BitsAndBytesConfig
from peft import (
    LoraConfig,
    get_peft_model,
    get_peft_model_state_dict,
    prepare_model_for_kbit_training,
)
from fed_utils import FedAvg, client_selection, global_evaluation, GeneralClient
import datasets
from utils.prompter import Prompter
datasets.utils.logging.set_verbosity_error()


In [None]:
# Encode some text
text = "What are all the positions in hockey?"
input_ids = tokenizer.encode(text, return_tensors="pt")
# Generate predictions
output = model.generate(input_ids, max_length=50, num_return_sequences=1)
# Decode the generated tokens to get the text
predicted_text = tokenizer.decode(output[0], skip_special_tokens=True)
print(predicted_text)

In [45]:
# Load the tokenizer and model
global_model = "chavinlo/alpaca-native"
tokenizer = LlamaTokenizer.from_pretrained(global_model)
tokenizer.cache_dir = "/dpc/kunf0007/amine/tokenizer"
tokenizer.pad_token_id = (0)
tokenizer.padding_side = "right"

In [7]:
# Quantization configuration
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=True,
)

In [8]:
model = AutoModelForCausalLM.from_pretrained(
    global_model,
    quantization_config=bnb_config,
    torch_dtype=torch.float16,
    device_map={"": 0},
    cache_dir="/dpc/kunf0007/amine/model"
)

config.json:   0%|          | 0.00/556 [00:00<?, ?B/s]

pytorch_model.bin.index.json:   0%|          | 0.00/26.8k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/3 [00:00<?, ?it/s]

pytorch_model-00001-of-00003.bin:   0%|          | 0.00/9.88G [00:00<?, ?B/s]

pytorch_model-00002-of-00003.bin:   0%|          | 0.00/9.89G [00:00<?, ?B/s]

pytorch_model-00003-of-00003.bin:   0%|          | 0.00/7.18G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/137 [00:00<?, ?B/s]

In [11]:
model = prepare_model_for_kbit_training(model)
config = LoraConfig(
    r=16,
    lora_alpha=16,
    target_modules= ["q_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
)
model = get_peft_model(model, config)

In [9]:
prompter = Prompter('alpaca', verbose=True)

Using prompt template alpaca: Template used by Alpaca-LoRA.


In [12]:
cutoff_len = 512
def tokenize(prompt, add_eos_token=True):
    result = tokenizer(
        prompt,
        truncation=True,
        max_length=cutoff_len,
        padding=False,
        return_tensors=None,
    )
    if (
            result["input_ids"][-1] != tokenizer.eos_token_id
            and len(result["input_ids"]) < cutoff_len
            and add_eos_token
    ):
        result["input_ids"].append(tokenizer.eos_token_id)
        result["attention_mask"].append(1)

    result["labels"] = result["input_ids"].copy()
    return result

def generate_and_tokenize_prompt(data_point):
    full_prompt = prompter.generate_prompt(
        data_point["instruction"],
        data_point["context"],
        data_point["response"],
    )
    tokenized_full_prompt = tokenize(full_prompt)
    return tokenized_full_prompt

In [15]:
from datasets import load_dataset
local_data_path = './data/1/local_training_0.json'
local_output_dir = '/dpc/kunf0007/amine/output/local_output_0'
local_data = load_dataset("json", data_files=local_data_path)


In [22]:
local_train_dataset = local_data["train"].shuffle().map(generate_and_tokenize_prompt)

Map:   0%|          | 0/1493 [00:00<?, ? examples/s]

Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
Who is Steven Spielberg?

### Input:
Steven Allan Spielberg KBE (/ˈspiːlbɜːrɡ/; born December 18, 1946) is an American film director, writer and producer.[1] A major figure of the New Hollywood era and pioneer of the modern blockbuster, he is the most commercially successful director of all time.[2] He is the recipient of various accolades, including three Academy Awards, two BAFTA Awards, and four Directors Guild of America Awards, as well as the AFI Life Achievement Award in 1995, the Kennedy Center Honor in 2006, the Cecil B. DeMille Award in 2009 and the Presidential Medal of Freedom in 2015. Seven of his films have been inducted into the National Film Registry by the Library of Congress as "culturally, historically or aesthetically significant".[3][4]

### Response:
Steven Spielberg is a celebrated American 

In [59]:
import transformers
from trl import SFTTrainer

gradient_accumulation_steps = 8 // 4
def build_local_trainer(
    tokenizer=tokenizer,
    local_micro_batch_size=4,
    gradient_accumulation_steps=2,
    local_learning_rate=3e-4,
    group_by_length=False,
):
    train_args = transformers.TrainingArguments(
        per_device_train_batch_size=local_micro_batch_size,
        gradient_accumulation_steps=gradient_accumulation_steps,
        warmup_steps=1,
        num_train_epochs=1,
        learning_rate=local_learning_rate,
        fp16=False,
        logging_steps=1,
        optim="adamw_torch",
        output_dir=local_output_dir,
        group_by_length=group_by_length,
        dataloader_drop_last=False,
    )
    local_trainer = SFTTrainer(
        model=model,
        train_dataset=local_train_dataset,
        args=train_args,
        tokenizer=tokenizer,
        dataset_text_field="instruction",
        max_seq_length=512,
    )
    return local_trainer

In [60]:
local_trainer = build_local_trainer()

Map:   0%|          | 0/1493 [00:00<?, ? examples/s]

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


In [61]:
from collections import OrderedDict
import copy
def initiate_local_training():
    model.config.use_cache = False
    params_dict_old = copy.deepcopy(
        OrderedDict(
            (name, param.detach())
            for name, param in model.named_parameters()
            if "default" in name
        )
    )
    params_dict_new = OrderedDict(
        (name, param.detach())
        for name, param in model.named_parameters()
        if "default" in name
    )
    model.state_dict = (
        lambda instance, *_, **__: get_peft_model_state_dict(
            instance, params_dict_new, "default"
        )
    ).__get__(model, type(model))
initiate_local_training()

In [62]:
import gc
def train():

    gc.collect()
    gc.collect()
    local_trainer.train()

In [63]:
train()



Step,Training Loss
1,4.3744
2,4.7249
3,4.5267
4,4.5996
5,4.4441
6,5.7807
7,4.4505
8,4.611
9,4.5866
10,4.4205


In [72]:
model.save_pretrained("/dpc/kunf0007/amine/model/mylora-shepherd-v0")
tokenizer.save_pretrained("/dpc/kunf0007/amine/model/mylora-shepherd-v0")

AttributeError: 'SFTTrainer' object has no attribute 'save_pretrained'

In [67]:
!transformers-cli repo create mylora-shepherd

/bin/bash: /dpc/kunf0007/amine/.conda/envs/llama2/bin/transformers-cli: /home/kunet.ae/100053678/.conda/envs/llama2/bin/python: bad interpreter: No such file or directory


In [73]:
from huggingface_hub import notebook_login

notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [74]:
model.push_to_hub("mylora-shepherd")

adapter_model.safetensors:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/amew0/mylora-shepherd/commit/d39f9c817ff24cc08791e019c65230f810b038da', commit_message='Upload model', commit_description='', oid='d39f9c817ff24cc08791e019c65230f810b038da', pr_url=None, pr_revision=None, pr_num=None)

In [None]:
tokenizer_ = LlamaTokenizer.from_pretrained(local_path, cache_dir="/dpc/kunf0007/amine")
model_ = AutoModelForCausalLM.from_pretrained(
    local_path,
    torch_dtype=torch.float16,
    quantization_config=bnb_config,
    low_cpu_mem_usage=True
)

In [None]:
# Encode some text
text_ = "What are all the positions in hockey?"
input_ids_ = tokenizer_.encode(text_, return_tensors="pt")

# Generate predictions
output_ = model_.generate(input_ids_, max_length=50, num_return_sequences=1)

# Decode the generated tokens to get the text
predicted_text_ = tokenizer_.decode(output_[0], skip_special_tokens=True)
print(predicted_text_)

In [87]:
def infer(tokenizer, model, text):
    input_ids = tokenizer.encode(text, return_tensors="pt")
    output = model.generate(
        input_ids,
        max_length=50,
        num_return_sequences=1,
        num_beams=5,  # Using beam search with 5 beams
        temperature=1.0,  # Default temperature
        top_k=50,  # Top-K sampling
        top_p=0.95,  # Nucleus sampling
        do_sample=True  # Enable stochastic sampling
    )

    # Decode the generated tokens to get the text
    predicted_text = tokenizer.decode(output[0], skip_special_tokens=True)
    return predicted_text


In [88]:
input_ids = tokenizer.encode( "give me a summary of what a kidney donor chain is.", return_tensors="pt")

In [80]:
input_ids

tensor([[    2,  2367,   592,   263, 15837,   310,   825,   263, 26397,  3801,
          1016,   272,  9704,   338, 29889]])

In [89]:
text=  "give me a summary of what a kidney donor chain is."
infer(tokenizer, model, text)

TypeError: PeftModelForCausalLM.generate() takes 1 positional argument but 2 were given