# seallm

##### Step 1: Setup Environment

In [None]:
!pip install torch transformers datasets peft accelerate bitsandbytes matplotlib scikit-learn pandas

In [1]:
import pandas as pd
from datasets import Dataset
from transformers import AutoTokenizer

# Load dataset
csv_path = r"C:\Users\BNC\Documents\ITC-Internship\LLM\LLM-Model\visal\split_data\data\test\test.csv"  # Change to your file path
df = pd.read_csv(csv_path, names=["input_text", "output_text"])

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-1.5B-Chat")

# Tokenize function
def tokenize_function(examples):
    messages = [
        {"role": "system", "content": "You are a helpful assistant."},
        {"role": "user", "content": examples["input_text"]},
        {"role": "assistant", "content": examples["output_text"]}
    ]
    formatted_text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=False)
    return tokenizer(formatted_text, truncation=True, padding="max_length", max_length=512)

# Convert to Hugging Face dataset
dataset = Dataset.from_pandas(df)
tokenized_datasets = dataset.map(tokenize_function, batched=False)

# Split dataset
train_test_split = tokenized_datasets.train_test_split(test_size=0.1)  # 90% train, 10% test
train_dataset = train_test_split["train"]
eval_dataset = train_test_split["test"]


tokenizer_config.json:   0%|          | 0.00/1.64k [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


vocab.json:   0%|          | 0.00/2.78M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/1.67M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/7.03M [00:00<?, ?B/s]

Map:   0%|          | 0/421 [00:00<?, ? examples/s]

##### Step 2: Prepare Your Dataset

In [2]:
import torch
from transformers import TrainingArguments, Trainer, AutoModelForCausalLM
from peft import get_peft_model, LoraConfig, TaskType

# Load base model with 8-bit quantization
model = AutoModelForCausalLM.from_pretrained(
    "SeaLLMs/SeaLLMs-v3-1.5B-Chat",
    torch_dtype=torch.bfloat16,
    device_map="auto",
    load_in_8bit=True  # Reduce VRAM usage
)

# Configure LoRA
lora_config = LoraConfig(
    task_type=TaskType.CAUSAL_LM, 
    r=16,  # LoRA rank
    lora_alpha=32,
    lora_dropout=0.05
)
model = get_peft_model(model, lora_config)

# Training arguments
training_args = TrainingArguments(
    output_dir="./fine_tuned_model",
    per_device_train_batch_size=2,
    per_device_eval_batch_size=2,
    gradient_accumulation_steps=4,
    evaluation_strategy="steps",
    save_strategy="steps",
    save_steps=100,
    eval_steps=100,
    logging_steps=10,
    learning_rate=2e-4,
    weight_decay=0.01,
    fp16=True,
    num_train_epochs=100,  # Train for 100 epochs
    report_to="none"
)

# Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset
)

# Train model
trainer.train()


config.json:   0%|          | 0.00/767 [00:00<?, ?B/s]

The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.
The installed version of bitsandbytes was compiled without GPU support. 8-bit optimizers, 8-bit multiplication, and GPU quantization are unavailable.
CUDA is required but not available for bitsandbytes. Please consider installing the multi-platform enabled version of bitsandbytes, which is currently a work in progress. Please check currently supported platforms and installation instructions at https://huggingface.co/docs/bitsandbytes/main/en/installation#multi-backend


RuntimeError: CUDA is required but not available for bitsandbytes. Please consider installing the multi-platform enabled version of bitsandbytes, which is currently a work in progress. Please check currently supported platforms and installation instructions at https://huggingface.co/docs/bitsandbytes/main/en/installation#multi-backend

#### Step 3: Fine-Tune with LoRA

In [None]:
import numpy as np
from transformers import pipeline
from datasets import load_metric

# Load metric
metric = load_metric("sacrebleu")

# Load model
model.eval()

# Evaluation function
def evaluate_model(dataset):
    references, predictions = [], []
    for example in dataset:
        input_text = example["input_text"]
        true_output = example["output_text"]

        # Generate response
        messages = [
            {"role": "system", "content": "You are a helpful assistant."},
            {"role": "user", "content": input_text}
        ]
        formatted_text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
        input_ids = tokenizer([formatted_text], return_tensors="pt").to("cuda")

        with torch.no_grad():
            output_ids = model.generate(input_ids.input_ids, max_new_tokens=50)
        
        pred_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)

        references.append([true_output])
        predictions.append(pred_text)

    bleu_score = metric.compute(predictions=predictions, references=references)
    print(f"BLEU Score: {bleu_score['score']}")

# Evaluate
evaluate_model(eval_dataset)


#### Step 4: Evaluate Model Performance

In [None]:
import numpy as np
from transformers import pipeline
from datasets import load_metric

# Load metric
metric = load_metric("sacrebleu")

# Load model
model.eval()

# Evaluation function
def evaluate_model(dataset):
    references, predictions = [], []
    for example in dataset:
        input_text = example["input_text"]
        true_output = example["output_text"]

        # Generate response
        messages = [
            {"role": "system", "content": "You are a helpful assistant."},
            {"role": "user", "content": input_text}
        ]
        formatted_text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
        input_ids = tokenizer([formatted_text], return_tensors="pt").to("cuda")

        with torch.no_grad():
            output_ids = model.generate(input_ids.input_ids, max_new_tokens=50)
        
        pred_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)

        references.append([true_output])
        predictions.append(pred_text)

    bleu_score = metric.compute(predictions=predictions, references=references)
    print(f"BLEU Score: {bleu_score['score']}")

# Evaluate
evaluate_model(eval_dataset)


#### Step 5: Plot Loss Curves

In [None]:
import matplotlib.pyplot as plt

# Load training logs
logs = trainer.state.log_history
train_loss = [log["loss"] for log in logs if "loss" in log]
eval_loss = [log["eval_loss"] for log in logs if "eval_loss" in log]
steps = range(len(train_loss))

# Plot loss
plt.figure(figsize=(10,5))
plt.plot(steps, train_loss, label="Train Loss")
plt.plot(steps, eval_loss, label="Eval Loss")
plt.xlabel("Steps")
plt.ylabel("Loss")
plt.title("Training and Evaluation Loss")
plt.legend()
plt.show()


#### Step 6: Test the Model

In [None]:
def chat_with_model(prompt):
    messages = [
        {"role": "system", "content": "You are a helpful assistant."},
        {"role": "user", "content": prompt}
    ]
    formatted_text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
    input_ids = tokenizer([formatted_text], return_tensors="pt").to("cuda")

    output_ids = model.generate(input_ids.input_ids, max_new_tokens=100, do_sample=True)
    response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
    return response

# Example test
print(chat_with_model("តោះទៅមុខ ៣.២៤ ម៉ែត ដោយល្បឿន ៣.៧៥ ម៉ែត្រក្នុង១វិនាទី បន្ត ងាកទៅស្ដាំ ២៣ ដឺក្រេ។"))
