# LLAMA3
https://ai.meta.com/blog/meta-llama-3/

https://github.com/meta-llama/llama3/blob/main/MODEL_CARD.md

# 1. GPU detection to prevent version conflicts

In [1]:
%%capture
import torch
!pip install bitsandbytes
!pip install datasets
major_version, minor_version = torch.cuda.get_device_capability()
if major_version >= 8:
    !pip install --no-deps packaging ninja einops flash-attn xformers trl peft accelerate bitsandbytes
else:
    !pip install --no-deps xformers trl peft accelerate bitsandbytes
pass

# 2. Import Python Packages

In [2]:
import torch, os, json, random, bitsandbytes as bnb, torch.nn as nn, psutil
from datasets import Dataset, DatasetDict, load_dataset
from transformers import AutoTokenizer, AutoModelForCausalLM, AutoModel, TrainingArguments, BitsAndBytesConfig
from trl import SFTTrainer
import re
from pprint import pprint
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
from peft import prepare_model_for_kbit_training, get_peft_model, LoraConfig

2024-07-21 00:08:28.860705: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-07-21 00:08:28.860836: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-07-21 00:08:29.004429: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


# 3. LLAMA 3 8B 8bit quantized

In [3]:
HF_TOKEN = "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"
model_name = "meta-llama/Meta-Llama-3-8B"
max_seq_length = 2048

def load_model_and_tokenizer():
    try:
        print("Loading tokenizer...")
        tokenizer = AutoTokenizer.from_pretrained(model_name, use_auth_token=HF_TOKEN)
        tokenizer.pad_token = tokenizer.eos_token
        tokenizer.padding_side = "right"

        special_tokens = tokenizer.special_tokens_map_extended
        eos_token = tokenizer.eos_token
        eos_token_id = tokenizer.eos_token_id

        print("EOS Token:", eos_token)
        print("EOS Token ID:", eos_token_id)

        # Configure Quantization
        quantization_config = BitsAndBytesConfig(load_in_8bit=True)

        # Load Pretrained Model with Quantization
        print("Loading model...")
        model = AutoModelForCausalLM.from_pretrained(
            model_name,
            quantization_config=quantization_config,
            device_map='auto',  # Automatically distribute to CPU and GPU
            low_cpu_mem_usage=True,
            use_auth_token=HF_TOKEN
        )

        # Enable Gradient Checkpointing and Prepare for k-bit Training
        print("Applying gradient checkpointing and preparing for k-bit training...")
        model.gradient_checkpointing_enable()
        model = prepare_model_for_kbit_training(model)

        print("Model and tokenizer loaded and configured successfully.")
        return model, tokenizer

    except Exception as e:
        print("An error occurred:", e)

# Load the model and tokenizer
model, tokenizer = load_model_and_tokenizer()


Loading tokenizer...




tokenizer_config.json:   0%|          | 0.00/50.6k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/73.0 [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


EOS Token: <|end_of_text|>
EOS Token ID: 128001
Loading model...




config.json:   0%|          | 0.00/654 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/4 [00:00<?, ?it/s]

model-00001-of-00004.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00002-of-00004.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00003-of-00004.safetensors:   0%|          | 0.00/4.92G [00:00<?, ?B/s]

model-00004-of-00004.safetensors:   0%|          | 0.00/1.17G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/177 [00:00<?, ?B/s]

Applying gradient checkpointing and preparing for k-bit training...
Model and tokenizer loaded and configured successfully.


# 4. Lora Config and test

In [4]:
from peft import LoraConfig, get_peft_model

def apply_lora_config(model):
    try:
        print("Applying LoRA configuration...")

        # Define LoRA configuration
        lora_config = LoraConfig(
            r=16,
            lora_alpha=16,
            target_modules=["q_proj", "v_proj"],
            lora_dropout=0.05,
            bias="none",
            task_type="CAUSAL_LM"
        )

        # Apply LoRA configuration to the model
        model = get_peft_model(model, lora_config)

        print("LoRA configuration applied successfully.")
        return model

    except Exception as e:
        print("An error occurred while applying LoRA configuration:", e)
        return model

# Apply LoRA configuration
model = apply_lora_config(model)


Applying LoRA configuration...
LoRA configuration applied successfully.


# 5. Data Preparation

In [5]:
!pip install -q gdown
import gdown
# Google Drive file ID
file_id = '1yl9K1M_Ey86DCU26jHX69v2GQhoURIcf'
# Local file path where the downloaded file will be saved
output_path = 'qa_pairs.json'
# Download the file from Google Drive
gdown.download(f'https://drive.google.com/uc?id={file_id}', output_path, quiet=False)

  pid, fd = os.forkpty()
Downloading...
From: https://drive.google.com/uc?id=1yl9K1M_Ey86DCU26jHX69v2GQhoURIcf
To: /kaggle/working/qa_pairs.json
100%|██████████| 43.0k/43.0k [00:00<00:00, 42.1MB/s]


'qa_pairs.json'

In [6]:
from datasets import Dataset
import json
from transformers import AutoTokenizer
special_tokens = tokenizer.special_tokens_map_extended
eos_token = tokenizer.eos_token
eos_token_id = tokenizer.eos_token_id

# Load your data
with open("qa_pairs.json") as json_file:
    data = json.load(json_file)

# Check if 'questions' key exists and if it has the required structure
if "questions" not in data or not isinstance(data["questions"], list):
    raise ValueError("The data does not contain the 'questions' key or it is not a list.")

# Define the prompt format
ecommerce_prompt = """Below is a question paired with an answer. Write a response that appropriately completes the request.

### Question:
{}

### Answer:
{}"""

# Function to format the prompts
def formatting_prompts_func(examples):
    questions = examples["question"]
    answers = examples["answer"]
    texts = []
    for question, answer in zip(questions, answers):
        # Must add EOS_TOKEN, otherwise your generation will go on forever!
        text = ecommerce_prompt.format(question, answer) + eos_token
        texts.append(text)
    return {"text": texts}

# Convert your data into a dataset and format it
dataset_dict = {
    "question": [item["question"] for item in data["questions"]],
    "answer": [item["answer"] for item in data["questions"]]
}
dataset = Dataset.from_dict(dataset_dict)
dataset = dataset.map(formatting_prompts_func, batched=True)

# Check the formatted dataset
print(dataset[3])


Map:   0%|          | 0/147 [00:00<?, ? examples/s]

{'question': 'What is the name of the CNSC?', 'answer': 'The Canadian Nuclear Safety Commission (CNSC; French: Commission Canadienne de sûreté nucléaire) is the federal regulator of nuclear power and materials in Canada.', 'text': 'Below is a question paired with an answer. Write a response that appropriately completes the request.\n\n### Question:\nWhat is the name of the CNSC?\n\n### Answer:\nThe Canadian Nuclear Safety Commission (CNSC; French: Commission Canadienne de sûreté nucléaire) is the federal regulator of nuclear power and materials in Canada.<|end_of_text|>'}


In [11]:
# # Check the formatted dataset
# for i in range(5):  # Show the first 5 examples
#     print(f"Example {i + 1}:")
#     print(f"Text: {dataset[i]['text']}")
#     print()

Example 1:
Text: Below is a question paired with an answer. Write a response that appropriately completes the request.

### Question:
What is the name of the CNSC?

### Answer:
The CNSC is an agency of the Government of Canada which reports to the Parliament of Canada through the Minister of Natural Resources.<|end_of_text|>

Example 2:
Text: Below is a question paired with an answer. Write a response that appropriately completes the request.

### Question:
What is the role of the CNSC?

### Answer:
The Participant Funding Program allows the public, Indigenous groups, and other stakeholders to request funding from the CNSC to participate in its regulatory processes.<|end_of_text|>

Example 3:
Text: Below is a question paired with an answer. Write a response that appropriately completes the request.

### Question:
When did she become the President and CEO?

### Answer:
Rumina Velshi joined the organisation in 2011 and in 2018 she became the President and CEO.<|end_of_text|>

Example 4:


# 6. Training

In [29]:
from transformers import Trainer, TrainingArguments, DataCollatorForLanguageModeling

# Eğitim konfigürasyonu
OUTPUT_DIR = "experiments"

training_args = TrainingArguments(
    per_device_train_batch_size=1,
    gradient_accumulation_steps=4,
    num_train_epochs=1,
    learning_rate=2e-4,
    fp16=True,
    save_total_limit=3,
    logging_steps=1,
    output_dir=OUTPUT_DIR,
    max_steps=80,
    optim="paged_adamw_8bit",
    lr_scheduler_type="cosine",
    warmup_ratio=0.05,
    report_to="tensorboard",
)  

# Trainer'ı oluşturma
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    tokenizer=tokenizer,
    data_collator = DataCollatorForLanguageModeling(tokenizer, mlm=False),
)

model.config.use_cache = False
# Modeli eğitme
trainer.train()


max_steps is given, it will override any value given in num_train_epochs


Step,Training Loss
1,0.8638
2,0.8293
3,1.1284
4,1.1116
5,0.8293
6,1.0238
7,1.0167
8,1.0702
9,0.9391
10,1.1752



Cannot access gated repo for url https://huggingface.co/meta-llama/Meta-Llama-3-8B/resolve/main/config.json.
Access to model meta-llama/Meta-Llama-3-8B is restricted. You must be authenticated to access it. - silently ignoring the lookup for the file config.json in meta-llama/Meta-Llama-3-8B.


TrainOutput(global_step=80, training_loss=0.927222053706646, metrics={'train_runtime': 1169.7346, 'train_samples_per_second': 0.274, 'train_steps_per_second': 0.068, 'total_flos': 7384341298544640.0, 'train_loss': 0.927222053706646, 'epoch': 2.4242424242424243})

In [32]:
import os
# check Log 
log_dir = "/kaggle/working/experiments"  
if not os.path.exists(log_dir):
    print(f"Log directory '{log_dir}' does not exist.")
else:
    print(f"Log directory '{log_dir}' exists.")
    print("Files in log directory:")
    print(os.listdir(log_dir))
# download logs, enter below codes to cmd to show details
# tensorboard --logdir=C:\Users\engba\Desktop\runs\Jul21_01-35-02_ed8e81291c4a
# site : http://localhost:6007/

Log directory '/kaggle/working/experiments' exists.
Files in log directory:
['checkpoint-80', 'runs']


In [68]:
from huggingface_hub import login, HfApi
login(token="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")

The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: write).
Your token has been saved to /root/.cache/huggingface/token
Login successful


In [69]:
# # `config.json` cheking
# config = model.config
# print(config)

LlamaConfig {
  "_name_or_path": "meta-llama/Meta-Llama-3-8B",
  "architectures": [
    "LlamaForCausalLM"
  ],
  "attention_bias": false,
  "attention_dropout": 0.0,
  "bos_token_id": 128000,
  "eos_token_id": 128001,
  "hidden_act": "silu",
  "hidden_size": 4096,
  "initializer_range": 0.02,
  "intermediate_size": 14336,
  "max_position_embeddings": 8192,
  "mlp_bias": false,
  "model_type": "llama",
  "num_attention_heads": 32,
  "num_hidden_layers": 32,
  "num_key_value_heads": 8,
  "pretraining_tp": 1,
  "quantization_config": {
    "_load_in_4bit": false,
    "_load_in_8bit": true,
    "bnb_4bit_compute_dtype": "float32",
    "bnb_4bit_quant_storage": "uint8",
    "bnb_4bit_quant_type": "fp4",
    "bnb_4bit_use_double_quant": false,
    "llm_int8_enable_fp32_cpu_offload": false,
    "llm_int8_has_fp16_weight": false,
    "llm_int8_skip_modules": null,
    "llm_int8_threshold": 6.0,
    "load_in_4bit": false,
    "load_in_8bit": true,
    "quant_method": "bitsandbytes"
  },
  "rms

In [71]:
from transformers import Trainer

output_dir = "/kaggle/working/trained-model"

# save model
trainer.save_model(output_dir)

In [72]:
# import os
# # Directory where the model is saved
# output_dir = "trained-model"

# # List files in the directory
# files = os.listdir(output_dir)
# print(f"Files in '{output_dir}': {files}")

Files in 'trained-model': ['config.json', 'special_tokens_map.json', 'adapter_config.json', 'adapter_model.safetensors', 'README.md', 'training_args.bin', 'tokenizer.json', 'tokenizer_config.json']


In [80]:
import shutil

# Create a ZIP file
zip_file_path = "/kaggle/working/trained-model.zip"
shutil.make_archive("/kaggle/working/trained-model", 'zip', output_dir)

print(f"Model saved to {zip_file_path}.")


Model  /kaggle/working/trained-model.zip saved.


In [81]:
# import os

# # List of files in the output directory
# print("Output directory contents:")
# for filename in os.listdir('/kaggle/working'):
#     print(filename)

Output directory contents:
trained-model.zip
config.json
peft_lab_outputs
qa_pairs.json
.virtual_documents
wandb
experiments.zip
experiments
trained-model
