In [1]:
from datetime import datetime
import os
import sys
import pandas as pd
from transformers import AutoTokenizer, AutoModelForCausalLM
from transformers import TrainingArguments, Trainer, DataCollatorForSeq2Seq
import torch
from peft import (
    LoraConfig,
    get_peft_model,
    get_peft_model_state_dict,
    prepare_model_for_int8_training,
    set_peft_model_state_dict,
)

# Create a dictionary with your data
data = {
    'instruction': ['Turn on the light', 'Play some music', 'What is the weather like?'],
    'prompt': ['Turn on the light.', 'Play some music.', 'What is the weather like?'],
    'response': ['Okay, turning on the light.', 'Okay, playing music.', 'The weather is sunny.']
}

# Create a DataFrame from the dictionary
df = pd.DataFrame(data)
df

Unnamed: 0,instruction,prompt,response
0,Turn on the light,Turn on the light.,"Okay, turning on the light."
1,Play some music,Play some music.,"Okay, playing music."
2,What is the weather like?,What is the weather like?,The weather is sunny.


In [2]:
from datasets import Dataset

def data_preparation(df):

    # Convert to dictionary 
    data_dict = df.to_dict('list')

    # To Huggingface Dataset
    dataset = Dataset.from_dict(data_dict)
    
    return dataset


def data_tokenization(dataset):

    # Tokenization
    tokenizer = AutoTokenizer.from_pretrained("codellama/CodeLlama-7b-hf")
    tokenizer.add_eos_token = True
    tokenizer.pad_token_id = 0
    tokenizer.padding_side = "left"
    
    def tokenize(text):
        result = tokenizer(
            text,
            truncation=True,
            max_length=512,
            padding=False,
            return_tensors=None,
        )

        # "self-supervised learning" means the labels are also the inputs:
        result["labels"] = result["input_ids"].copy()

        return result


    def tokenize_chatbot_text(data_point):
        full_text =f"""You are an AI assistant. Your job is to create a correct json format based on the questions.

                        If you do not know, please say "I am sorry, please try again.".

                    ### Instruction:
                    {data_point["instruction"]}

                    ### Prompt:
                    {data_point["prompt"]}

                    ### Response:
                    {data_point["response"]}
                    """
        return tokenize(full_text)
    
    tokenized_dataset = dataset.map(tokenize_chatbot_text)
    
    return tokenizer, tokenized_dataset


def model_preparation():
    base_model = "codellama/CodeLlama-7b-hf"
    model = AutoModelForCausalLM.from_pretrained(
        base_model,
        load_in_8bit=True,
        torch_dtype=torch.float16,
        device_map="auto",
    )

    model.train() # put model back into training mode
    model = prepare_model_for_int8_training(model)

    config = LoraConfig(
        r=16,
        lora_alpha=16,
        target_modules=[
        "q_proj",
        "k_proj",
        "v_proj",
        "o_proj",
    ],
        lora_dropout=0.05,
        bias="none",
        task_type="CAUSAL_LM",
    )
    model = get_peft_model(model, config)

    resume_from_checkpoint = "" # set this to the adapter_model.bin file you want to resume from

    if resume_from_checkpoint:
        if os.path.exists(resume_from_checkpoint):
            print(f"Restarting from {resume_from_checkpoint}")
            adapters_weights = torch.load(resume_from_checkpoint)
            set_peft_model_state_dict(model, adapters_weights)
        else:
            print(f"Checkpoint {resume_from_checkpoint} not found")


    if torch.cuda.device_count() > 1:
        # keeps Trainer from trying its own DataParallelism when more than 1 gpu is available
        model.is_parallelizable = True
        model.model_parallel = True
        
    return model


def model_training(model, tokenizer, tokenized_dataset_train, tokenized_dataset_val):

    batch_size = 4
    per_device_train_batch_size = 2
    gradient_accumulation_steps = batch_size // per_device_train_batch_size
    output_dir = "checkpoints"

    training_args = TrainingArguments(
            per_device_train_batch_size=per_device_train_batch_size,
            gradient_accumulation_steps=gradient_accumulation_steps,
            warmup_steps=100,
            max_steps=400,
            learning_rate=3e-4,
            fp16=True,
            logging_steps=10,
            optim="adamw_torch",
            evaluation_strategy="steps",
            save_strategy="steps",
            eval_steps=20,
            save_steps=20,
            output_dir=output_dir,
            load_best_model_at_end=False,
            group_by_length=True, # group sequences of roughly the same length together to speed up training
            run_name=f"codellama-{datetime.now().strftime('%Y-%m-%d-%H-%M')}", # if use_wandb else None,
        )

    trainer = Trainer(
        model=model,
        train_dataset=tokenized_dataset_train,
        eval_dataset=tokenized_dataset_val,
        args=training_args,
        data_collator=DataCollatorForSeq2Seq(
            tokenizer, pad_to_multiple_of=8, return_tensors="pt", padding=True
        ),
    )
    
    model.config.use_cache = False

    old_state_dict = model.state_dict
    model.state_dict = (lambda self, *_, **__: get_peft_model_state_dict(self, old_state_dict())).__get__(
        model, type(model)
    )
    if torch.__version__ >= "2" and sys.platform != "win32":
        print("compiling the model")
        model = torch.compile(model)
        
    # Train now
    trainer.train()
    
    return None

In [None]:
dataset_train = data_preparation(df)

tokenizer, tokenized_dataset_train = data_tokenization(dataset_train)

model = model_preparation()

model_training(model, tokenizer, tokenized_dataset_train, tokenized_dataset_train)

In [1]:
import torch
from transformers import AutoModelForCausalLM, BitsAndBytesConfig, AutoTokenizer
from peft import PeftModel

base_model = "codellama/CodeLlama-7b-hf"
model = AutoModelForCausalLM.from_pretrained(
    base_model,
    load_in_8bit=True,
    torch_dtype=torch.float16,
    device_map="auto",
)
tokenizer = AutoTokenizer.from_pretrained("codellama/CodeLlama-7b-hf")

The model weights are not tied. Please use the `tie_weights` method before using the `infer_auto_device` function.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [3]:
output_dir = 'checkpoints/checkpoint-100'
model = PeftModel.from_pretrained(model, output_dir)

In [9]:
eval_prompt = """You are an AI assistant. Your job is to create a correct json format based on the questions.

                If you do not know, please say "I am sorry, please try again.
                ### Instruction:
                This is Instruction.

                ### Prompt:
                How are you?

                ### Response:
                """

model_input = tokenizer(eval_prompt, return_tensors="pt").to("cuda")

model.eval()
with torch.no_grad():
    print(tokenizer.decode(model.generate(**model_input, max_new_tokens=200)[0], skip_special_tokens=True))


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


You are an AI assistant. Your job is to create a correct json format based on the questions.

                If you do not know, please say "I am sorry, please try again.
                ### Instruction:
                This is Instruction.

                ### Prompt:
                How are you?

                ### Response:
                 You are an AI assistant. You are an AI assistant. You are an AI assistant. You are an AI assistant. You are an AI assistant.
                 You are an AI assistant. You are an AI assistant. You are an AI assistant. You are an AI assistant. You are an AI assistant.
                 You are an AI assistant. You are an AI assistant. You are an AI assistant. You are an AI assistant. You are an AI assistant.
                 You are an AI assistant. You are an AI assistant. You are an AI assistant. You are an AI assistant. You are an AI assistant.
                 You are an AI assistant. You are an AI assistant. You are an AI assistant. You are a