# Load Library

In [1]:
import torch

In [2]:
torch.cuda.current_device()

0

In [3]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
os.environ["TOKENIZERS_PARALLELISM"] = "false"
os.environ["WANDB_MODE"] = "offline"

In [4]:
import warnings
warnings.filterwarnings("ignore")

In [5]:
import numpy as np
import pandas as pd
import os
from tqdm import tqdm

import torch
import torch.nn as nn

import transformers
from transformers import (AutoModelForCausalLM,
                          AutoTokenizer,
                          BitsAndBytesConfig,
                          TrainingArguments,
                          pipeline,
                          logging)
from datasets import Dataset
from peft import LoraConfig, PeftConfig
import bitsandbytes as bnb
from trl import SFTTrainer

from sklearn.metrics import (accuracy_score,
                             classification_report,
                             confusion_matrix)
from sklearn.model_selection import train_test_split

⚙️  Running in WANDB offline mode


In [6]:
from transformers import logging

logging.set_verbosity_error()

In [7]:
from datasets import load_dataset, DatasetDict

## Load Dataset

In [8]:
data = load_dataset('../dataset/thai-sentiment/')

In [9]:
data

DatasetDict({
    train: Dataset({
        features: ['label', 'text'],
        num_rows: 500
    })
    validation: Dataset({
        features: ['label', 'text'],
        num_rows: 100
    })
    test: Dataset({
        features: ['label', 'text'],
        num_rows: 200
    })
})

In [10]:
data['train'][0]

{'label': 0,
 'text': 'อยากได้ลิปรววแต่กูไม่ไหวใจอีทูดี้ละ การที่คิ้วกูหายระหว่างวันแม่งไม่ตลก'}

# Data PreProcess

In [11]:
def generate_prompt(data_point):
    return f"""
            Analyze the sentiment of the tweet enclosed in square brackets,
            determine if it is positive or negative, and return the answer as
            the corresponding sentiment label "positive" or  "negative"

            [{data_point["text"]}] = {data_point["label"]}
            """.strip()

def generate_test_prompt(data_point):
    return f"""
            Analyze the sentiment of the tweet enclosed in square brackets,
            determine if it is positive or negative, and return the answer as
            the corresponding sentiment label "positive" or  "negative"

            [{data_point["text"]}] =

            """.strip()

In [12]:
test = data['test'].to_pandas()
validation = data['validation'].to_pandas()
train = data['train'].to_pandas()

In [13]:
def convert(x):
    if x==1:
        return 'positive'
    elif x==0:
        return 'negative'
    else:
        return 'none'

In [14]:
train['label'] = train['label'].apply(lambda x: convert(x))
validation['label'] = validation['label'].apply(lambda x: convert(x))
test['label'] = test['label'].apply(lambda x: convert(x))

In [15]:
train.head()

Unnamed: 0,label,text
0,negative,อยากได้ลิปรววแต่กูไม่ไหวใจอีทูดี้ละ การที่คิ้ว...
1,positive,คัดจนเกลี้ยง
2,negative,=+10 กุ้งเทมปูระ ก็แหม่งๆรสชาติแปลกๆไปเหมือนกั...
3,positive,ดีพอใจมากครับ เบรคนุ่มไปไกลๆมาแล้วเบรคมั่นใจ ข...
4,negative,ไม่โอเคกับการเมาแสงโสมเลยอะ


# Apply Prompt

In [16]:
X_train = pd.DataFrame(train.apply(generate_prompt, axis=1),
                       columns=["text"])
X_eval = pd.DataFrame(validation.apply(generate_prompt, axis=1),
                      columns=["text"])

In [17]:
y_true = test.label
X_test = pd.DataFrame(test.apply(generate_test_prompt, axis=1), columns=["text"])

In [18]:
train_data = Dataset.from_pandas(X_train)
eval_data = Dataset.from_pandas(X_eval)

In [19]:
print(train_data['text'][0])

Analyze the sentiment of the tweet enclosed in square brackets,
            determine if it is positive or negative, and return the answer as
            the corresponding sentiment label "positive" or  "negative"

            [อยากได้ลิปรววแต่กูไม่ไหวใจอีทูดี้ละ การที่คิ้วกูหายระหว่างวันแม่งไม่ตลก] = negative


In [20]:
print(eval_data['text'][0])

Analyze the sentiment of the tweet enclosed in square brackets,
            determine if it is positive or negative, and return the answer as
            the corresponding sentiment label "positive" or  "negative"

            [เดี๋ยวเจอ 285 ขนาดยังไม่แดกกูก็รู้สึกปวดหัวละ 55] = negative


In [21]:
def evaluate(y_true, y_pred):

    labels = ['positive',  'negative']
    mapping = {'positive': 1, 'negative': 0, 'none':1,}
    def map_func(x):
        return mapping.get(x, 1)

    y_true = np.vectorize(map_func)(y_true)
    y_pred = np.vectorize(map_func)(y_pred)

    # Calculate accuracy
    accuracy = accuracy_score(y_true=y_true, y_pred=y_pred)
    print(f'Accuracy: {accuracy:.3f}')

    # Generate accuracy report
    unique_labels = set(y_true)  # Get unique labels

    for label in unique_labels:
        label_indices = [i for i in range(len(y_true))
                         if y_true[i] == label]
        label_y_true = [y_true[i] for i in label_indices]
        label_y_pred = [y_pred[i] for i in label_indices]
        accuracy = accuracy_score(label_y_true, label_y_pred)
        print(f'Accuracy for label {label}: {accuracy:.3f}')

    # Generate classification report
    class_report = classification_report(y_true=y_true, y_pred=y_pred)
    print('\nClassification Report:')
    print(class_report)

    # Generate confusion matrix
    conf_matrix = confusion_matrix(y_true=y_true, y_pred=y_pred, labels=[0, 1])
    print('\nConfusion Matrix:')
    print(conf_matrix)

In [22]:
model_name = "../model/llama-Meta-Llama-3-8B-Instruct/" #Qwen/Qwen3-1.7B #Qwen/Qwen2.5-1.5B

compute_dtype = getattr(torch, "float16")

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=False,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=compute_dtype,
)

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="auto",
    quantization_config=bnb_config,
)

model.config.use_cache = False
model.config.pretraining_tp = 1

tokenizer = AutoTokenizer.from_pretrained(model_name)

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

In [23]:
model

LlamaForCausalLM(
  (model): LlamaModel(
    (embed_tokens): Embedding(128256, 4096)
    (layers): ModuleList(
      (0-31): 32 x LlamaDecoderLayer(
        (self_attn): LlamaAttention(
          (q_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
          (k_proj): Linear4bit(in_features=4096, out_features=1024, bias=False)
          (v_proj): Linear4bit(in_features=4096, out_features=1024, bias=False)
          (o_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
        )
        (mlp): LlamaMLP(
          (gate_proj): Linear4bit(in_features=4096, out_features=14336, bias=False)
          (up_proj): Linear4bit(in_features=4096, out_features=14336, bias=False)
          (down_proj): Linear4bit(in_features=14336, out_features=4096, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): LlamaRMSNorm((4096,), eps=1e-05)
        (post_attention_layernorm): LlamaRMSNorm((4096,), eps=1e-05)
      )
    )
    (norm): LlamaRMSNorm((409

# Predoct Function

In [24]:
def predict(X_test, model, tokenizer):
    y_pred = []
    for i in tqdm(range(len(X_test))):
        prompt = X_test.iloc[i]["text"]
        input_ids = tokenizer(prompt, return_tensors="pt").to("cuda")
        outputs = model.generate(**input_ids, max_new_tokens=1, temperature=0.0,
                                 pad_token_id=tokenizer.eos_token_id, do_sample=False)
        result = tokenizer.decode(outputs[0])
        answer = result.split("=")[-1].lower()
        if "positive" in answer:
            y_pred.append("positive")
        elif "negative" in answer:
            y_pred.append("negative")
        elif "neutral" in answer:
            y_pred.append("neutral")
        else:
            y_pred.append("none")
    return y_pred

In [25]:
y_pred = predict(X_test , model, tokenizer)

100%|██████████| 200/200 [00:20<00:00,  9.94it/s]


In [26]:
evaluate(y_true, y_pred)

Accuracy: 0.415
Accuracy for label 0: 0.017
Accuracy for label 1: 1.000

Classification Report:
              precision    recall  f1-score   support

           0       1.00      0.02      0.03       119
           1       0.41      1.00      0.58        81

    accuracy                           0.41       200
   macro avg       0.70      0.51      0.31       200
weighted avg       0.76      0.41      0.25       200


Confusion Matrix:
[[  2 117]
 [  0  81]]


# Inference

In [29]:
def inference(text, model, tokenizer):
    prompt = f"""
            Analyze the sentiment of the tweet enclosed in square brackets,
            determine if it is positive or negative, and return the answer as
            the corresponding sentiment label "positive" or  "negative"

            [{text}] =

            """.strip()
    input_ids = tokenizer(prompt, return_tensors="pt").to("cuda")
    outputs = model.generate(**input_ids, max_new_tokens=1, temperature=0.01)
    result = tokenizer.decode(outputs[0])
    answer = result.split("=")[-1].lower().strip()

    return answer

In [30]:
inference('แฟนบอกว่าไม่เป็นไรหรอก คิดมาก', model, tokenizer)

'"'

In [31]:
train_data

Dataset({
    features: ['text'],
    num_rows: 500
})

# FineTune with LoRa

In [32]:
peft_config = LoraConfig(
    lora_alpha=16,
    lora_dropout=0.1,
    r=64,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules="all-linear",
)

training_arguments = TrainingArguments(
    output_dir="logs",
    num_train_epochs=2,
    per_device_train_batch_size=2,
    gradient_accumulation_steps=4,
    gradient_checkpointing=False,
    optim="paged_adamw_8bit",
    save_steps=0,
    logging_steps=25,
    learning_rate=2e-4,
    weight_decay=0.001,
    fp16=True,
    bf16=False,
    max_grad_norm=0.3,
    max_steps=-1,
    warmup_ratio=0.03,
    group_by_length=True,
    lr_scheduler_type="cosine",
    do_eval=False,
)

trainer = SFTTrainer(
    model=model,
    train_dataset=train_data,
    peft_config=peft_config,
    args=training_arguments,
)

Adding EOS to train dataset:   0%|          | 0/500 [00:00<?, ? examples/s]

Tokenizing train dataset:   0%|          | 0/500 [00:00<?, ? examples/s]

Truncating train dataset:   0%|          | 0/500 [00:00<?, ? examples/s]

In [33]:
model

LlamaForCausalLM(
  (model): LlamaModel(
    (embed_tokens): Embedding(128256, 4096)
    (layers): ModuleList(
      (0-31): 32 x LlamaDecoderLayer(
        (self_attn): LlamaAttention(
          (q_proj): lora.Linear4bit(
            (base_layer): Linear4bit(in_features=4096, out_features=4096, bias=False)
            (lora_dropout): ModuleDict(
              (default): Dropout(p=0.1, inplace=False)
            )
            (lora_A): ModuleDict(
              (default): Linear(in_features=4096, out_features=64, bias=False)
            )
            (lora_B): ModuleDict(
              (default): Linear(in_features=64, out_features=4096, bias=False)
            )
            (lora_embedding_A): ParameterDict()
            (lora_embedding_B): ParameterDict()
            (lora_magnitude_vector): ModuleDict()
          )
          (k_proj): lora.Linear4bit(
            (base_layer): Linear4bit(in_features=4096, out_features=1024, bias=False)
            (lora_dropout): ModuleDict(
       

In [34]:
import os
os.environ["WANDB_MODE"] = "offline"

In [35]:
trainer.train()

{'loss': 1.9227, 'grad_norm': 0.4714037775993347, 'learning_rate': 0.00018702852410301554, 'entropy': 2.0744064939022064, 'num_tokens': 17931.0, 'mean_token_accuracy': 0.6533071425557136, 'epoch': 0.4}
{'loss': 1.2449, 'grad_norm': 0.6389788389205933, 'learning_rate': 0.00014004539056512667, 'entropy': 1.4031887224316597, 'num_tokens': 34860.0, 'mean_token_accuracy': 0.7601819917559623, 'epoch': 0.8}
{'loss': 1.0982, 'grad_norm': 0.4026164412498474, 'learning_rate': 7.703122578682046e-05, 'entropy': 1.2945905805242306, 'num_tokens': 50700.0, 'mean_token_accuracy': 0.7813233623699266, 'epoch': 1.192}
{'loss': 1.0709, 'grad_norm': 0.5453364849090576, 'learning_rate': 2.3211955396340002e-05, 'entropy': 1.183849729001522, 'num_tokens': 68264.0, 'mean_token_accuracy': 0.7834264385700226, 'epoch': 1.592}
{'loss': 0.9212, 'grad_norm': 0.6280507445335388, 'learning_rate': 1.3259101151694708e-07, 'entropy': 1.0929750800132751, 'num_tokens': 83546.0, 'mean_token_accuracy': 0.8091595894098282, 'e

TrainOutput(global_step=126, training_loss=1.2457847131623163, metrics={'train_runtime': 130.6997, 'train_samples_per_second': 7.651, 'train_steps_per_second': 0.964, 'train_loss': 1.2457847131623163, 'entropy': 0.6650262176990509, 'num_tokens': 83786.0, 'mean_token_accuracy': 0.8774086833000183, 'epoch': 2.0})

# Evaluate

In [36]:
y_pred = predict(X_test, model, tokenizer)
evaluate(y_true, y_pred)

100%|██████████| 200/200 [00:20<00:00,  9.86it/s]

Accuracy: 0.885
Accuracy for label 0: 0.899
Accuracy for label 1: 0.864

Classification Report:
              precision    recall  f1-score   support

           0       0.91      0.90      0.90       119
           1       0.85      0.86      0.86        81

    accuracy                           0.89       200
   macro avg       0.88      0.88      0.88       200
weighted avg       0.89      0.89      0.89       200


Confusion Matrix:
[[107  12]
 [ 11  70]]





In [38]:
inference('แฟนบอกว่าไม่เป็นไรหรอก คิดมาก', model, tokenizer)

'negative'

# Save Adapter

In [40]:
trainer.model.save_pretrained("../train_model/adapter-sentiment")