In [None]:
import os
os.environ["CUDA_VISIBLE_DEVICES"]="1"

os.environ["TRANSFORMERS_CACHE"] = "/data/../llm_cache"
from huggingface_hub import login

login("..")

In [None]:
import os
import sys
import math
import torch
import numpy as np
import pandas as pd
import argparse
import textwrap
import transformers
from peft import PeftModel
from transformers import GenerationConfig, TextStreamer, BitsAndBytesConfig
from llama_attn_replace import replace_llama_attn


from dataclasses import dataclass, field
from typing import Dict, Optional

from accelerate import Accelerator
import datasets
from datasets import Dataset, load_dataset
from peft import LoraConfig
from transformers import AutoModelForCausalLM, AutoTokenizer, HfArgumentParser, TrainingArguments, set_seed

from trl import DPOTrainer


import torch
import torch.nn as nn
import transformers
from torch.utils.data import Dataset
from transformers import Trainer, DataCollatorForLanguageModeling, BitsAndBytesConfig
from llama_attn_replace_sft import replace_llama_attn
from gptneox_attn_replace import replace_gpt_neox_attn
from peft import LoraConfig, get_peft_model
from torch.distributed import barrier
from transformers import TrainerCallback, TrainingArguments, TrainerState, TrainerControl

### Merge and unload with llama3.1

#### **set the below params**

In [None]:

step = 330 
adapter_model_name = "../your/model/path/"
adapter_model_name += "checkpoint-{}".format(step)
base_model = "../your/model/path/"
save_path = "../your/model/path/"


In [None]:
import argparse

# step = 80 # 160 240 320 
steps = [step,]
for step in steps:
    # Example dictionary of arguments
    args_dict = {
        'base_model':base_model, # "meta-llama/Meta-Llama-3.1-8B-Instruct",
        'cache_dir': None,
        'peft_model': adapter_model_name,
        'save_path': save_path,

    }

    # Creating an args object
    args = argparse.Namespace(**args_dict)
    
    quantization_config = BitsAndBytesConfig(
            load_in_4bit=True,
            bnb_4bit_use_double_quant=True,
            bnb_4bit_quant_type="nf4",
            bnb_4bit_compute_dtype=torch.bfloat16,
)

    # Accessing arguments
    print(args)

    device = "cuda:0"
    torch.cuda.set_device(device)

    print("base model", args.base_model)
    print("cache dir", args.cache_dir)
    print("peft model", args.peft_model)
    
    # Load the base model
    model = AutoModelForCausalLM.from_pretrained(args.base_model,
                                                 torch_dtype= torch.float16, # "auto",
                                                 device_map="auto",
                                                # quantization_config = quantization_config
                                                )
    

    # Load the tokenizer
    tokenizer = AutoTokenizer.from_pretrained(args.base_model)
    # set pad_token_id equal to the eos_token_id if not set
    if tokenizer.pad_token_id is None:
      tokenizer.pad_token_id = tokenizer.eos_token_id

    # Set reasonable default for models without max length
    if tokenizer.model_max_length > 100_000:
      tokenizer.model_max_length = 4096 # 16384 # 2048

    response_template = "Rewrite:<|eot_id|><|start_header_id|>assistant<|end_header_id|>"

    # Define special tokens
    special_tokens_dict = {
        "additional_special_tokens": [response_template],
    }
    # Add the special tokens to the tokenizer
    tokenizer.add_special_tokens(special_tokens_dict)

    # Resize the model embeddings to accommodate the new special tokens
    model.resize_token_embeddings(len(tokenizer))

    # Load the adapter model
    model = PeftModel.from_pretrained(model, 
                                      args.peft_model, 
                                      device_map="auto",
                                      torch_dtype=torch.float16,# "auto",
                                     )
    
#########################
##### Example usage
#########################
# input_text = "Rewrite:<|eot_id|><|start_header_id|>assistant<|end_header_id|> What are aspects of the story of Ed the Happy Clown that involve Ronald Reagan?"
input_text = "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\nCutting Knowledge Date: December 2023\nToday Date: 26 Jul 2024\n\n<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nGiven a question, its previous questions (Q) and answers (A), decontextualize the question by addressing coreference and omission issues. The resulting question should retain its original meaning and be as informative as possible, and should not duplicate any previously asked questions in the context.\n\nContext: [Q: When was Born to Fly released? A: Sara Evans's third studio album, Born to Fly, was released on October 10, 2000.]\nQuestion: Was Born to Fly well received by critics?\nRewrite: Was Born to Fly well received by critics?\n\nContext: [Q: When was Keith Carradine born? A: Keith Ian Carradine was born August 8, 1949. Q: Is he married? A: Keith Carradine married Sandra Will on February 6, 1982.]\nQuestion: Do they have any children?\nRewrite: Do Keith Carradine and Sandra Will have any children?\n\nContext: [Q: Who proposed that atoms are the basic units of matter? A: John Dalton proposed that each chemical element is composed of atoms of a single, unique type, and they can combine to form more complex structures called chemical compounds.]\nQuestion: How did the proposal come about?\nRewrite: How did John Dalton's proposal that each chemical element is composed of atoms of a single unique type, and they can combine to form more complex structures called chemical compounds come about?\n\nContext: [Q: What is it called when two liquids separate? A: Decantation is a process for the separation of mixtures of immiscible liquids or of a liquid and a solid mixture such as a suspension. Q: How does the separation occur? A: The layer closer to the top of the container-the less dense of the two liquids, or the liquid from which the precipitate or sediment has settled out-is poured off.]\nQuestion: Then what happens?\nRewrite: Then what happens after the layer closer to the top of the container is poured off with decantation?\n\nContext: [Q: What was Faith Hill's first country album? A: Take Me as I Am is the debut studio album by country singer Faith Hill. Q: What was a single from the album? A: The first single from Faith Hill's Take Me as I am is Wild One. Q: Was the song a success? A: Hill's rendition was also her first Number One, spending the first four chart weeks of 1994 at the top of the Billboard Hot Country Singles & Tracks chart. Q: Did the album perform well? A: Take Me as I am has been certified 3× platinum in the United States for shipments of three million copies. Q: Did she write her own songs? A: Faith Hill performs songs other people wrote. Q: Did she tour? A: Faith Hill's Soul2Soul II Tour 2006 with McGraw became the highest-grossing country tour of all time. Q: Who did she tour with? A: Faith Hill toured with Tim Mcgraw in 2006.]\nQuestion: Did they sing together?\nRewrite:<|eot_id|><|start_header_id|>assistant<|end_header_id|>"
inputs = tokenizer(input_text, return_tensors="pt").to(model.device)
streamer = TextStreamer(tokenizer)

# Generate text
model.eval()
outputs = model.generate(**inputs, max_new_tokens=512, temperature=0.6,
            top_p=0.9,
            use_cache=True,
                         streamer=streamer)
generated_text = tokenizer.decode(outputs[0],) # skip_special_tokens=True)

# print("Generated Text:", generated_text)


# print("Generated Text:", generated_text)

merged_model = model.merge_and_unload() 
# merge the adapters into the base model so you can use the model like a normal transformers model,
# however the checkpoint will be significantly bigger
merged_model.save_pretrained(args.save_path)
tokenizer.save_pretrained(args.save_path)

# Example usage
# input_text = "Rewrite:<|eot_id|><|start_header_id|>assistant<|end_header_id|> What are aspects of the story of Ed the Happy Clown that involve Ronald Reagan?"
input_text = "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\nCutting Knowledge Date: December 2023\nToday Date: 26 Jul 2024\n\n<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nGiven a question, its previous questions (Q) and answers (A), decontextualize the question by addressing coreference and omission issues. The resulting question should retain its original meaning and be as informative as possible, and should not duplicate any previously asked questions in the context.\n\nContext: [Q: When was Born to Fly released? A: Sara Evans's third studio album, Born to Fly, was released on October 10, 2000.]\nQuestion: Was Born to Fly well received by critics?\nRewrite: Was Born to Fly well received by critics?\n\nContext: [Q: When was Keith Carradine born? A: Keith Ian Carradine was born August 8, 1949. Q: Is he married? A: Keith Carradine married Sandra Will on February 6, 1982.]\nQuestion: Do they have any children?\nRewrite: Do Keith Carradine and Sandra Will have any children?\n\nContext: [Q: Who proposed that atoms are the basic units of matter? A: John Dalton proposed that each chemical element is composed of atoms of a single, unique type, and they can combine to form more complex structures called chemical compounds.]\nQuestion: How did the proposal come about?\nRewrite: How did John Dalton's proposal that each chemical element is composed of atoms of a single unique type, and they can combine to form more complex structures called chemical compounds come about?\n\nContext: [Q: What is it called when two liquids separate? A: Decantation is a process for the separation of mixtures of immiscible liquids or of a liquid and a solid mixture such as a suspension. Q: How does the separation occur? A: The layer closer to the top of the container-the less dense of the two liquids, or the liquid from which the precipitate or sediment has settled out-is poured off.]\nQuestion: Then what happens?\nRewrite: Then what happens after the layer closer to the top of the container is poured off with decantation?\n\nContext: [Q: What was Faith Hill's first country album? A: Take Me as I Am is the debut studio album by country singer Faith Hill. Q: What was a single from the album? A: The first single from Faith Hill's Take Me as I am is Wild One. Q: Was the song a success? A: Hill's rendition was also her first Number One, spending the first four chart weeks of 1994 at the top of the Billboard Hot Country Singles & Tracks chart. Q: Did the album perform well? A: Take Me as I am has been certified 3× platinum in the United States for shipments of three million copies. Q: Did she write her own songs? A: Faith Hill performs songs other people wrote. Q: Did she tour? A: Faith Hill's Soul2Soul II Tour 2006 with McGraw became the highest-grossing country tour of all time. Q: Who did she tour with? A: Faith Hill toured with Tim Mcgraw in 2006.]\nQuestion: Did they sing together?\nRewrite:<|eot_id|><|start_header_id|>assistant<|end_header_id|>"
inputs = tokenizer(input_text, return_tensors="pt").to(model.device)
streamer = TextStreamer(tokenizer)

# Generate text
model.eval()
outputs = model.generate(**inputs, max_new_tokens=512, temperature=0.6,
            top_p=0.9,
            use_cache=True,
                         streamer=streamer)
generated_text = tokenizer.decode(outputs[0],) # skip_special_tokens=True)
