# **Installation**

In [None]:
!pip uninstall unsloth -y
!pip install --upgrade --no-cache-dir --no-deps git+https://github.com/unslothai/unsloth.git
!pip install unsloth_zoo bitsandbytes

Found existing installation: unsloth 2025.7.1
Uninstalling unsloth-2025.7.1:
  Successfully uninstalled unsloth-2025.7.1
Collecting git+https://github.com/unslothai/unsloth.git
  Cloning https://github.com/unslothai/unsloth.git to /tmp/pip-req-build-rl_2yrym
  Running command git clone --filter=blob:none --quiet https://github.com/unslothai/unsloth.git /tmp/pip-req-build-rl_2yrym
  Resolved https://github.com/unslothai/unsloth.git to commit 6ac4e2e36f2f8bd0bc63a6eb85afa7097948ff3d
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Building wheels for collected packages: unsloth
  Building wheel for unsloth (pyproject.toml) ... [?25l[?25hdone
  Created wheel for unsloth: filename=unsloth-2025.7.1-py3-none-any.whl size=296136 sha256=3c40adf589264b3269aa1da1c75a982abb60b95b71f05195015bc1cb8c808bc2
  Stored in directory: /tmp/pip-ephem-wheel-cache-n0n0yatm/wheels/d1/17/

# **Merge model fine-tuning and model base**

In [None]:
from unsloth import FastLanguageModel
import torch

max_seq_length = 1024
dtype = None
load_in_4bit = True

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "aismaanly/ai_synthetic",
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
)

FastLanguageModel.for_inference(model)

ü¶• Unsloth: Will patch your computer to enable 2x faster free finetuning.
ü¶• Unsloth Zoo will now patch everything to make training faster!
==((====))==  Unsloth 2025.7.1: Fast Llama patching. Transformers: 4.53.0.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.6.0+cu124. CUDA: 7.5. CUDA Toolkit: 12.4. Triton: 3.2.0
\        /    Bfloat16 = FALSE. FA [Xformers = None. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors:   0%|          | 0.00/2.24G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/230 [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/459 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.2M [00:00<?, ?B/s]

adapter_model.safetensors:   0%|          | 0.00/97.3M [00:00<?, ?B/s]

Unsloth 2025.7.1 patched 28 layers with 28 QKV layers, 28 O layers and 28 MLP layers.


PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): LlamaForCausalLM(
      (model): LlamaModel(
        (embed_tokens): Embedding(128256, 3072, padding_idx=128004)
        (layers): ModuleList(
          (0-27): 28 x LlamaDecoderLayer(
            (self_attn): LlamaAttention(
              (q_proj): lora.Linear4bit(
                (base_layer): Linear4bit(in_features=3072, out_features=3072, bias=False)
                (lora_dropout): ModuleDict(
                  (default): Identity()
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=3072, out_features=16, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=16, out_features=3072, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
                (lora_magnitude_vector): ModuleDict()
              )
              (k_proj): lor

# **Data Prep**

## **Load CSV**

In [None]:
from datasets import load_dataset
from google.colab import drive

drive.mount('/content/drive')

# Muat kembali dataset asli
dataset = load_dataset('csv', data_files='/content/drive/MyDrive/Dataset/dataset_sosmed_new.csv', sep=';')

Mounted at /content/drive


Generating train split: 0 examples [00:00, ? examples/s]

In [None]:
data_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
{}

### Input:
{}

### Response:
{}
"""

## **Split dataset**

In [None]:
dataset_dict = dataset["train"].train_test_split(test_size=0.2)
eval_test_split = dataset_dict["test"].train_test_split(test_size=0.5)

dataset_dict["eval"] = eval_test_split["train"]
dataset_dict["test"] = eval_test_split["test"]

test_dataset = dataset_dict["test"]
print(test_dataset)

Dataset({
    features: ['instruction', 'input', 'output'],
    num_rows: 115
})


# **Evaluation**

In [None]:
from tqdm import tqdm
import pandas as pd
from transformers import pipeline, logging

logging.set_verbosity(logging.CRITICAL)

# Inisialisasi pipeline untuk generasi teks
pipe = pipeline(
    task="text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=64,
    return_full_text=False,
    do_sample=True,
    temperature=0.7,
    top_p=0.9
)

In [None]:
import re
from tqdm import tqdm

predictions = []
references = []
rows = []

print("Generating comments for evaluation...")

num_examples_to_evaluate = len(test_dataset)

for i in tqdm(range(num_examples_to_evaluate)):
    example = test_dataset[i]

    instruction = str(example.get("instruction", ""))
    input_text = str(example.get("input", ""))
    original_comment = str(example.get("output", ""))

    formatted_prompt = data_prompt.format(instruction, input_text, "")

    outputs = pipe(formatted_prompt)
    full_generated_text = outputs[0]["generated_text"]

    response_start_tag = "### Response:"
    if response_start_tag in full_generated_text:
        generated_comment = full_generated_text.split(response_start_tag, 1)[1].strip()
    else:
        generated_comment = full_generated_text.strip()

    generated_comment = generated_comment.replace(tokenizer.eos_token, "").strip()
    generated_comment = re.sub(r'<\|eot_id\|>', '', generated_comment).strip()

    if not generated_comment:
        generated_comment = "[EMPTY_GENERATED_COMMENT]"
    if not original_comment:
        original_comment = "[EMPTY_ORIGINAL_COMMENT]"

    predictions.append(generated_comment)
    references.append([original_comment])

    rows.append({
        "instruction": instruction,
        "input_post": input_text,
        "original_comment": original_comment,
        "generated_comment": generated_comment,
    })

Generating comments for evaluation...


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 115/115 [03:50<00:00,  2.00s/it]


In [None]:
predictions_df = pd.DataFrame(rows)
predictions_df.sample(5)

Unnamed: 0,instruction,input_post,original_comment,generated_comment
2,Buat komentar positif untuk seniman jalanan ya...,Seorang seniman jalanan melukis mural besar di...,"Wow, muralnya realistis banget, jadi tahu bany...",Komukasa dan seniman jalanan menggambar ekologi
108,Berikan komentar netral pada postingan tentang...,Seorang penggemar drama Korea membagikan dafta...,"Rekomendasi drakornya lumayan banyak, bisa jad...",Berikan komentar positatakan postingan tentang...
32,Berikan komentar netral pada postingan tentang...,Lembaga Tes Masuk Perguruan Tinggi (LTMPT) men...,"Info jalur SNMPTN ya, lumayan buat yang mau co...",Jadwal pendaftaran kuliah jalur SMSMPTN tahunM...
48,Buat komentar positif untuk program beasiswa b...,Yayasan sosial meluncurkan program beasiswa pe...,"Program beasiswa ini mulia banget, semoga berk...",Program beasiswa sekolah menengah ini bagus ba...
60,Tulis komentar negatif tentang kualitas layana...,Pelanggan mengeluh tentang lamanya respons dar...,Layanan pelanggan maskapai kok lambat banget y...,Berikan komentar negatifPengurusaran penerbara...


In [None]:
# Instalasi Perhitungan Metrik Evaluasi
!pip install evaluate
!pip install bert_score
!pip install numpy



In [None]:
import evaluate
import numpy as np

# BERTScore
bertscore = evaluate.load("bertscore")
bertscore_result = bertscore.compute(predictions=predictions, references=references, lang="en")

print("\n=== BERTScore ===")
print(f"BERTScore (Precision)    : {np.mean(bertscore_result['precision']):.4f}")
print(f"BERTScore (Recall)       : {np.mean(bertscore_result['recall']):.4f}")
print(f"BERTScore (F1)           : {np.mean(bertscore_result['f1']):.4f}")

Downloading builder script: 0.00B [00:00, ?B/s]

tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/482 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.42G [00:00<?, ?B/s]


=== BERTScore ===
BERTScore (Precision)    : 0.8110
BERTScore (Recall)       : 0.8006
BERTScore (F1)           : 0.8054
