#### Start

In [1]:
# mount
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
# Load token from file
import os
with open('/content/drive/MyDrive/hf_token.txt', 'r') as file:
    hf_token = file.read().strip()

In [3]:
import torch
print(torch.__version__)

2.5.1+cu121


### Finetuning

In [4]:
!pip install datasets bitsandbytes peft

Collecting datasets
  Downloading datasets-3.1.0-py3-none-any.whl.metadata (20 kB)
Collecting bitsandbytes
  Downloading bitsandbytes-0.44.1-py3-none-manylinux_2_24_x86_64.whl.metadata (3.5 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py310-none-any.whl.metadata (7.2 kB)
Collecting fsspec<=2024.9.0,>=2023.1.0 (from fsspec[http]<=2024.9.0,>=2023.1.0->datasets)
  Downloading fsspec-2024.9.0-py3-none-any.whl.metadata (11 kB)
Downloading datasets-3.1.0-py3-none-any.whl (480 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m480.6/480.6 kB[0m [31m17.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading bitsandbytes-0.44.1-py3-none-manylinux_2_24_x86_64.whl (122.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━

In [5]:
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, Trainer, TrainingArguments
from datasets import load_dataset, Dataset
import torch
import bitsandbytes as bnb
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
folder_path = "/content/drive/MyDrive/data_argi_llm/kvk_pop/LAKSHADWEEP/lakshadweep/lakshadweep"

In [6]:
# Model and dataset configuration
model_name = "NousResearch/Llama-2-7b-chat-hf"
dataset_path = os.path.join(folder_path, 'data_ft.txt')
new_model = "Llama-2-7b-chat-ft"

In [7]:
# Load tokenizer and model with 4-bit precision and configure LoRA
tokenizer = AutoTokenizer.from_pretrained(model_name, token=hf_token)
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=False
)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map="auto",
    use_auth_token=hf_token
)
model = prepare_model_for_kbit_training(model)

# Configure LoRA parameters and apply LoRA to the model
lora_config = LoraConfig(
    r=64,
    lora_alpha=16,
    lora_dropout=0.1,
    bias="none",
    task_type="CAUSAL_LM"
)
model = get_peft_model(model, lora_config)
model.config.use_cache = False


tokenizer_config.json:   0%|          | 0.00/746 [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/21.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/435 [00:00<?, ?B/s]



config.json:   0%|          | 0.00/583 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/26.8k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.98G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/3.50G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/200 [00:00<?, ?B/s]

In [8]:
# Load dataset
dataset = load_dataset("text", data_files=dataset_path, split="train")

# Function to format the dataset for training
def format_dataset(example):
    # Extract the relevant parts from the string
    text = example['text']
    return {
        'input_ids': tokenizer(text, padding='max_length', truncation=True, max_length=512).input_ids,
        'labels': tokenizer(text, padding='max_length', truncation=True, max_length=512).input_ids
    }

# Format the dataset
dataset = dataset.map(format_dataset, remove_columns=["text"])

Generating train split: 0 examples [00:00, ? examples/s]

Map:   0%|          | 0/12 [00:00<?, ? examples/s]

In [9]:
# Training arguments with checkpointing
training_args = TrainingArguments(
    output_dir=os.path.join(folder_path, "results"),
    num_train_epochs=1,
    per_device_train_batch_size=4,
    gradient_accumulation_steps=1,
    gradient_checkpointing=True,
    fp16=False,
    bf16=False,
    learning_rate=2e-4,
    weight_decay=0.001,
    logging_steps=25,
    max_grad_norm=0.3,
    lr_scheduler_type="cosine",
    warmup_ratio=0.03,
    save_steps=500,
    save_total_limit=3,
    report_to="tensorboard"
)


# Define Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=dataset,
    tokenizer=tokenizer,
)

# Start fine-tuning
trainer.train()


  trainer = Trainer(


Step,Training Loss


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


TrainOutput(global_step=3, training_loss=13.647689819335938, metrics={'train_runtime': 81.7841, 'train_samples_per_second': 0.147, 'train_steps_per_second': 0.037, 'total_flos': 244810065641472.0, 'train_loss': 13.647689819335938, 'epoch': 1.0})

In [10]:
# Save trained model
trainer.model.save_pretrained(os.path.join(folder_path, new_model))
tokenizer.save_pretrained(os.path.join(folder_path, new_model))

('/content/drive/MyDrive/data_argi_llm/kvk_pop/LAKSHADWEEP/lakshadweep/lakshadweep/Llama-2-7b-chat-ft/tokenizer_config.json',
 '/content/drive/MyDrive/data_argi_llm/kvk_pop/LAKSHADWEEP/lakshadweep/lakshadweep/Llama-2-7b-chat-ft/special_tokens_map.json',
 '/content/drive/MyDrive/data_argi_llm/kvk_pop/LAKSHADWEEP/lakshadweep/lakshadweep/Llama-2-7b-chat-ft/tokenizer.model',
 '/content/drive/MyDrive/data_argi_llm/kvk_pop/LAKSHADWEEP/lakshadweep/lakshadweep/Llama-2-7b-chat-ft/added_tokens.json',
 '/content/drive/MyDrive/data_argi_llm/kvk_pop/LAKSHADWEEP/lakshadweep/lakshadweep/Llama-2-7b-chat-ft/tokenizer.json')

### Output

In [11]:
!pip install transformers pipeline

Collecting pipeline
  Downloading pipeline-0.1.0-py3-none-any.whl.metadata (483 bytes)
Downloading pipeline-0.1.0-py3-none-any.whl (2.6 kB)
Installing collected packages: pipeline
Successfully installed pipeline-0.1.0


In [12]:
import logging
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
import torch

In [13]:
# Load the fine-tuned model and tokenizer with quantization
model_name = os.path.join(folder_path, new_model)  # Ensure this path is correct and contains required files

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Load model with 4-bit quantization
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    load_in_4bit=True,  # Directly specify loading in 4-bit mode
    torch_dtype=torch.float16  # Set the compute dtype
)

# Run text generation pipeline with our fine-tuned model
pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=200)

# Function to generate response
def generate_response(prompt):
    result = pipe(f"<s>[INST] {prompt} [/INST]")
    return result[0]['generated_text']

# Example usage
prompt = "What is the bio bin made of?"
output = generate_response(prompt)
print(output)


The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.
`low_cpu_mem_usage` was None, now default to True since model is quantized.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.


<s>[INST] What is the bio bin made of? [/INST]  A bio bag, also known as a bio bin or biodegradable bag, is typically made from a variety of materials, including:
 Unterscheidung between different types of bio bags:

1. Polylactic acid (PLA) - This is a biodegradable plastic made from renewable resources such as corn starch or sugarcane. PLA is a popular choice for bio bags because it is biodegradable, compostable, and has a lower carbon footprint than traditional plastics.
2. Starch-based biodegradable bags - These bags are made from starches such as potato starch, tapioca starch, or corn starch. They are biodegradable and compostable, but may not be as strong as PLA bags.
3. Sugarcane/bag
