<a href="https://colab.research.google.com/github/albrecht0210/CebuanoStoryTellerAI/blob/master/notebook/StoryTellingCebuano.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
%%capture
# Installs Unsloth, Xformers (Flash Attention) and all other packages!
!pip install "unsloth [colab-new] @ git+https://github.com/unslothai/unsloth.git"
# We have to check which Torch version for Xformers (2.3 -> 0.0.27)
from torch import __version__; from packaging.version import Version as V
xformers = "xformers==0.0.27" if V(__version__) < V("2.4.0") else "xformers"
!pip install --no-deps {xformers} trl peft accelerate bitsandbytes triton
!pip install transformers

In [None]:
from unsloth import FastLanguageModel
import torch
max_seq_length = 2048 # Choose any! We auto support RoPE Scaling internally!
dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/Meta-Llama-3.1-8B",
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
    # token = "hf_...", # use one if using gated models like meta-llama/Llama-2-7b-hf
)

In [None]:
model = FastLanguageModel.get_peft_model(
    model,
    r = 16, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 16,
    lora_dropout = 0, # Supports any, but = 0 is optimized
    bias = "none",    # Supports any, but = "none" is optimized
    # [NEW] "unsloth" uses 30% less VRAM, fits 2x larger batch sizes!
    use_gradient_checkpointing = "unsloth", # True or "unsloth" for very long context
    random_state = 3407,
    use_rslora = False,  # We support rank stabilized LoRA
    loftq_config = None, # And LoftQ
)

In [None]:
alpaca_prompt = """Ania ang usa ka instruksyon nga naghulagway sa usa ka buluhaton, gipares sa usa ka input nga naghatag og dugang nga konteksto. Paghimo og tubag nga angayng makompleto sa hangyo.

### Instruksiyon:
{}

### Input:
{}

### Tubag:
{}
"""

EOS_TOKEN = tokenizer.eos_token # Must add EOS_TOKEN
def formatting_prompts_func(examples):
    print(examples)
    instructions = examples["instruction"]
    inputs       = examples["input"]
    outputs      = examples["output"]
    texts = []
    for instruction, input, output in zip(instructions, inputs, outputs):
        # Must add EOS_TOKEN, otherwise your generation will go on forever!
        text = alpaca_prompt.format(instruction, input, output) + EOS_TOKEN
        texts.append(text)
    return { "text" : texts, }
pass

from datasets import load_dataset, concatenate_datasets
dataset_cebuano_data = load_dataset("json", data_files=f'datasets/chatgpt_cebuano_data.jsonl', split='train')
dataset_cebuano_translation = load_dataset("json", data_files=f'datasets/chatgpt_cebuano_translation.jsonl', split='train')
dataset_cebuano_story = load_dataset("json", data_files=f'datasets/chatgpt_cebuano_story.jsonl', split='train')
combined_dataset = concatenate_datasets([dataset_cebuano_data, dataset_cebuano_translation, dataset_cebuano_story]).shuffle(seed=32)
dataset = combined_dataset.map(formatting_prompts_func, batched = True,)

In [None]:
dataset[0]

In [None]:
from trl import SFTTrainer
from transformers import TrainingArguments
from unsloth import is_bfloat16_supported

trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = dataset,
    dataset_text_field = "text",
    max_seq_length = max_seq_length,
    dataset_num_proc = 2,
    packing = False, # Can make training 5x faster for short sequences.
    args = TrainingArguments(
        per_device_train_batch_size = 2,
        gradient_accumulation_steps = 4,
        warmup_steps = 5,
        num_train_epochs = 1, # Set this for 1 full training run.
        # max_steps = 200,
        learning_rate = 2e-4,
        fp16 = not is_bfloat16_supported(),
        bf16 = is_bfloat16_supported(),
        logging_steps = 1,
        optim = "adamw_8bit",
        weight_decay = 0.01,
        lr_scheduler_type = "linear",
        seed = 3407,
        output_dir = "outputs",
        report_to = "none", # Use this for WandB etc
    ),
)

In [None]:
trainer_stats = trainer.train()

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
max_seq_length = 2048 # Choose any! We auto support RoPE Scaling internally!
dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.

if True:
    from unsloth import FastLanguageModel
    model, tokenizer = FastLanguageModel.from_pretrained(
        model_name = "cebuano_lora_epoch_model", # YOUR MODEL YOU USED FOR TRAINING
        max_seq_length = max_seq_length,
        dtype = dtype,
        load_in_4bit = load_in_4bit,
    )
    FastLanguageModel.for_inference(model)

In [None]:
if True:
    alpaca_prompt = """Ania ang usa ka instruksyon nga naghulagway sa usa ka buluhaton, gipares sa usa ka input nga naghatag og dugang nga konteksto. Paghimo og tubag nga angayng makompleto sa hangyo.

### Instruksiyon:
{}

### Input:
{}

### Tubag:
{}
"""

In [None]:
# alpaca_prompt = Copied from above
FastLanguageModel.for_inference(model) # Enable native 2x faster inference
inputs = tokenizer(
[
    alpaca_prompt.format(
        "Pagsulat ug usa ka istorya bahin ni Juan og iyang mga amigo.", # instruction
        "", # input
        "", # output - leave this blank for generation!
    )
], return_tensors = "pt").to("cuda")

outputs = model.generate(**inputs, max_new_tokens = 2000, do_sample=True, use_cache = True)
tokenizer.batch_decode(outputs)

In [None]:
FastLanguageModel.for_inference(model) # Enable native 2x faster inference
inputs = tokenizer(
[
    alpaca_prompt.format(
        "Pagsulat ug usa ka istorya bahin ni Ian og iyang iring.", # instruction
        "", # input
        "", # output - leave this blank for generation!
    )
], return_tensors = "pt").to("cuda")

from transformers import TextStreamer
text_streamer = TextStreamer(tokenizer)
_ = model.generate(**inputs, streamer = text_streamer, max_new_tokens = 2000)

In [None]:
# alpaca_prompt = Copied from above
FastLanguageModel.for_inference(model) # Enable native 2x faster inference
inputs = tokenizer(
[
    alpaca_prompt.format(
        "Translate to English: 'Si Ian adunay iring nga si Bibo. Kada buntag, magdula sila og bola sa gawas. Ang iring magdali ug magdula, ug si Ian magpasalamat sa iyang kalipay. Ang moral sa istorya mao nga ang pag-atiman sa mga hayop maghatag og kalipay.'", # instruction
        "", # input
        "", # output - leave this blank for generation!
    )
], return_tensors = "pt").to("cuda")

text_streamer = TextStreamer(tokenizer)
_ = model.generate(**inputs, streamer = text_streamer, max_new_tokens = 2000)

In [None]:
model.save_pretrained("cebuano_lora_epoch_model")
tokenizer.save_pretrained("cebuano_lora_epoch_model")

In [None]:
model.save_pretrained_gguf("model_epoch", tokenizer,)