In [None]:
import torch
import os
from datasets import load_dataset, Dataset
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, TrainingArguments, GPTQConfig
from peft import LoraConfig, PeftModel, AutoPeftModelForCausalLM, prepare_model_for_kbit_training, get_peft_model
from trl import SFTTrainer

In [None]:
data = load_dataset("knkarthick/dialogsum", split="train")


In [None]:
data

Dataset({
    features: ['id', 'dialogue', 'summary', 'topic'],
    num_rows: 12460
})

In [None]:
data_df = data.to_pandas()

In [None]:
data_df["formatted"] = data_df.apply(
    lambda x: f"{x['prompt']} {x['target']}", axis=1
)


In [None]:
data_df = data

Unnamed: 0,id,dialogue,summary,topic,text,prompt,target,formatted
0,train_0,"#Person1#: Hi, Mr. Smith. I'm Doctor Hawkins. ...","Mr. Smith's getting a check-up, and Doctor Haw...",get a check-up,###Human: Summarize this following dialogue: #...,###Human: Summarize this following dialogue:\n...,"Mr. Smith's getting a check-up, and Doctor Haw...",###Human: Summarize this following dialogue:\n...
1,train_1,"#Person1#: Hello Mrs. Parker, how have you bee...",Mrs Parker takes Ricky for his vaccines. Dr. P...,vaccines,###Human: Summarize this following dialogue: #...,###Human: Summarize this following dialogue:\n...,Mrs Parker takes Ricky for his vaccines. Dr. P...,###Human: Summarize this following dialogue:\n...
2,train_2,"#Person1#: Excuse me, did you see a set of key...",#Person1#'s looking for a set of keys and asks...,find keys,###Human: Summarize this following dialogue: #...,###Human: Summarize this following dialogue:\n...,#Person1#'s looking for a set of keys and asks...,###Human: Summarize this following dialogue:\n...
3,train_3,#Person1#: Why didn't you tell me you had a gi...,#Person1#'s angry because #Person2# didn't tel...,have a girlfriend,###Human: Summarize this following dialogue: #...,###Human: Summarize this following dialogue:\n...,#Person1#'s angry because #Person2# didn't tel...,###Human: Summarize this following dialogue:\n...
4,train_4,"#Person1#: Watsup, ladies! Y'll looking'fine t...",Malik invites Nikki to dance. Nikki agrees if ...,dance,###Human: Summarize this following dialogue: #...,###Human: Summarize this following dialogue:\n...,Malik invites Nikki to dance. Nikki agrees if ...,###Human: Summarize this following dialogue:\n...
...,...,...,...,...,...,...,...,...
12455,train_12455,#Person1#: Excuse me. You are Mr. Green from M...,Tan Ling picks Mr. Green up who is easily reco...,pick up someone,###Human: Summarize this following dialogue: #...,###Human: Summarize this following dialogue:\n...,Tan Ling picks Mr. Green up who is easily reco...,###Human: Summarize this following dialogue:\n...
12456,train_12456,#Person1#: Mister Ewing said we should show up...,#Person1# and #Person2# plan to take the under...,conference center,###Human: Summarize this following dialogue: #...,###Human: Summarize this following dialogue:\n...,#Person1# and #Person2# plan to take the under...,###Human: Summarize this following dialogue:\n...
12457,train_12457,#Person1#: How can I help you today?\n#Person2...,#Person2# rents a small car for 5 days with th...,rent a car,###Human: Summarize this following dialogue: #...,###Human: Summarize this following dialogue:\n...,#Person2# rents a small car for 5 days with th...,###Human: Summarize this following dialogue:\n...
12458,train_12458,#Person1#: You look a bit unhappy today. What'...,#Person2#'s mom lost her job. #Person2# hopes ...,job losing,###Human: Summarize this following dialogue: #...,###Human: Summarize this following dialogue:\n...,#Person2#'s mom lost her job. #Person2# hopes ...,###Human: Summarize this following dialogue:\n...


In [None]:
data_df['formatted'][0]

"###Human: Summarize this following dialogue:\n#Person1#: Hi, Mr. Smith. I'm Doctor Hawkins. Why are you here today?\n#Person2#: I found it would be a good idea to get a check-up.\n#Person1#: Yes, well, you haven't had one for 5 years. You should have one every year.\n#Person2#: I know. I figure as long as there is nothing wrong, why go see the doctor?\n#Person1#: Well, the best way to avoid serious illnesses is to find out about them early. So try to come at least once a year for your own good.\n#Person2#: Ok.\n#Person1#: Let me see here. Your eyes and ears look fine. Take a deep breath, please. Do you smoke, Mr. Smith?\n#Person2#: Yes.\n#Person1#: Smoking is the leading cause of lung cancer and heart disease, you know. You really should quit.\n#Person2#: I've tried hundreds of times, but I just can't seem to kick the habit.\n#Person1#: Well, we have classes and some medications that might help. I'll give you more information before you leave.\n#Person2#: Ok, thanks doctor.\n###Assist

In [None]:
data = Dataset.from_pandas(data_df)

In [None]:
quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype= torch.float16,
    bnb_4bit_use_double_quant= True
)

In [None]:
model = AutoModelForCausalLM.from_pretrained(
    "mistralai/Mistral-7B-Instruct-v0.1",
    quantization_config = quantization_config,
    device_map = "auto"
)

config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/25.1k [00:00<?, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.94G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/4.54G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

In [None]:
print(model)

MistralForCausalLM(
  (model): MistralModel(
    (embed_tokens): Embedding(32000, 4096)
    (layers): ModuleList(
      (0-31): 32 x MistralDecoderLayer(
        (self_attn): MistralAttention(
          (q_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
          (k_proj): Linear4bit(in_features=4096, out_features=1024, bias=False)
          (v_proj): Linear4bit(in_features=4096, out_features=1024, bias=False)
          (o_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
        )
        (mlp): MistralMLP(
          (gate_proj): Linear4bit(in_features=4096, out_features=14336, bias=False)
          (up_proj): Linear4bit(in_features=4096, out_features=14336, bias=False)
          (down_proj): Linear4bit(in_features=14336, out_features=4096, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): MistralRMSNorm((4096,), eps=1e-05)
        (post_attention_layernorm): MistralRMSNorm((4096,), eps=1e-05)
      )
    )
    (norm): Mist

In [None]:
model = prepare_model_for_kbit_training(model)

In [None]:
print(model)

MistralForCausalLM(
  (model): MistralModel(
    (embed_tokens): Embedding(32000, 4096)
    (layers): ModuleList(
      (0-31): 32 x MistralDecoderLayer(
        (self_attn): MistralAttention(
          (q_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
          (k_proj): Linear4bit(in_features=4096, out_features=1024, bias=False)
          (v_proj): Linear4bit(in_features=4096, out_features=1024, bias=False)
          (o_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
        )
        (mlp): MistralMLP(
          (gate_proj): Linear4bit(in_features=4096, out_features=14336, bias=False)
          (up_proj): Linear4bit(in_features=4096, out_features=14336, bias=False)
          (down_proj): Linear4bit(in_features=14336, out_features=4096, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): MistralRMSNorm((4096,), eps=1e-05)
        (post_attention_layernorm): MistralRMSNorm((4096,), eps=1e-05)
      )
    )
    (norm): Mist

In [None]:
from peft import LoraConfig, TaskType

peft_config = LoraConfig(
    r=16,
    lora_alpha=16,
    lora_dropout=0.05,
    bias="none",
    task_type=TaskType.CAUSAL_LM,
    target_modules=["q_proj", "v_proj"]
)


In [None]:
model = get_peft_model(model, peft_config)

In [None]:
print(model)

PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): MistralForCausalLM(
      (model): MistralModel(
        (embed_tokens): Embedding(32000, 4096)
        (layers): ModuleList(
          (0-31): 32 x MistralDecoderLayer(
            (self_attn): MistralAttention(
              (q_proj): lora.Linear4bit(
                (base_layer): Linear4bit(in_features=4096, out_features=4096, bias=False)
                (lora_dropout): ModuleDict(
                  (default): Dropout(p=0.05, inplace=False)
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=4096, out_features=16, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=16, out_features=4096, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
                (lora_magnitude_vector): ModuleDict()
              )
              (k_pro

In [None]:
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained(
    "mistralai/Mistral-7B-Instruct-v0.1",
    use_auth_token=True
)




tokenizer_config.json:   0%|          | 0.00/2.10k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.80M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]

In [None]:
from transformers import TrainingArguments
from trl import SFTTrainer

training_args = TrainingArguments(
    output_dir="mistral-finetuned-samsum",
    per_device_train_batch_size=8,
    gradient_accumulation_steps=1,
    optim="paged_adamw_32bit",  # spelling fixed from "pages_admw_32bit"
    learning_rate=2e-4,
    lr_scheduler_type="cosine",
    save_strategy="epoch",
    logging_steps=100,
    num_train_epochs=5,
    max_steps=250,
    fp16=True,
    push_to_hub=True,
    hub_model_id="iamAbhishek01/mistral-samsum-v1",
    report_to="none"
)



In [None]:
data = data.rename_columns({"target": "completion"})


In [None]:
import os

os.environ["HF_TOKEN"] = ""  # Make sure it's the write-access token

from huggingface_hub import HfApi
import os

# Set your write-access token
os.environ["HF_TOKEN"] = ""

api = HfApi()
api.create_repo(
    repo_id="iamAbhishek01/mistral-samsum-v1",  # full repo name with username
    token=os.environ["HF_TOKEN"],
    repo_type="model",
    private=False
)




RepoUrl('https://huggingface.co/iamAbhishek01/mistral-samsum-v1', endpoint='https://huggingface.co', repo_type='model', repo_id='iamAbhishek01/mistral-samsum-v1')

In [None]:
trainer = SFTTrainer(
    model=model,
    train_dataset=data,
    peft_config=peft_config,
    args=training_args
)


Adding EOS to train dataset:   0%|          | 0/12460 [00:00<?, ? examples/s]

Tokenizing train dataset:   0%|          | 0/12460 [00:00<?, ? examples/s]



Truncating train dataset:   0%|          | 0/12460 [00:00<?, ? examples/s]

No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


In [None]:
trainer.train()

`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.
  return fn(*args, **kwargs)


Step,Training Loss
100,0.9761
200,0.8834


TrainOutput(global_step=250, training_loss=0.9238211059570313, metrics={'train_runtime': 1678.9578, 'train_samples_per_second': 1.191, 'train_steps_per_second': 0.149, 'total_flos': 3.98743772479488e+16, 'train_loss': 0.9238211059570313})

In [None]:
save_path = "./mistral-finetuned-samsum"

trainer.model.save_pretrained(save_path)
tokenizer.save_pretrained(save_path)


('./mistral-finetuned-samsum/tokenizer_config.json',
 './mistral-finetuned-samsum/special_tokens_map.json',
 './mistral-finetuned-samsum/chat_template.jinja',
 './mistral-finetuned-samsum/tokenizer.model',
 './mistral-finetuned-samsum/added_tokens.json',
 './mistral-finetuned-samsum/tokenizer.json')

In [None]:
prompt = """###Human: Summarize this following dialogue:
#Person1#: Hello! How are you doing today?
#Person2#: I'm doing fine. Just wanted to get a quick check-up.
###Assistant:"""


In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
import torch

model_path = "./mistral-finetuned-samsum"

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4"
)

tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForCausalLM.from_pretrained(model_path, quantization_config=bnb_config, device_map="auto")
model.eval()


ValueError: Some modules are dispatched on the CPU or the disk. Make sure you have enough GPU RAM to fit the quantized model. If you want to dispatch the model on the CPU or the disk while keeping these modules in 32-bit, you need to set `llm_int8_enable_fp32_cpu_offload=True` and pass a custom `device_map` to `from_pretrained`. Check https://huggingface.co/docs/transformers/main/en/main_classes/quantization#offload-between-cpu-and-gpu for more details. 

In [None]:
import torch
torch.cuda.empty_cache()
torch.cuda.reset_peak_memory_stats()
