In [1]:
import IPython
import sys

def clean_notebook():
    IPython.display.clear_output(wait=True)
    print("Notebook cleaned.")

# Run the installation commands
if 'google.colab' in sys.modules:
    print("Running in Google Colab")
    !pip install bitsandbytes accelerate
    !pip install gradio
else:
    print("Not running in Google Colab")
    !pip install transformers accelerate datasets bitsandbytes
    !pip install gradio

# Clean up the notebook
clean_notebook()


Notebook cleaned.


In [22]:
import os
os.environ['HF_TOKEN'] ="HF_TOKEN"
hf_token               = os.environ['HF_TOKEN']

In [20]:

################################################################################
# QLoRA parameters
################################################################################

# LoRA attention dimension
lora_r = 64

# Alpha parameter for LoRA scaling
lora_alpha = 16

# Dropout probability for LoRA layers
lora_dropout = 0

################################################################################
# bitsandbytes parameters
################################################################################

# Activate 4-bit precision base model loading
use_4bit = True

# Compute dtype for 4-bit base models
bnb_4bit_compute_dtype = "float16"

# Quantization type (fp4 or nf4)
bnb_4bit_quant_type = "nf4"

# Activate nested quantization for 4-bit base models (double quantization)
use_nested_quant = False



# Load the entire model on the GPU 0
device_map = {"": 0}


In [26]:
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    HfArgumentParser,
    TrainingArguments,
    pipeline,
    logging,
)

import torch

# Load tokenizer and model with QLoRA configuration
compute_dtype = getattr(torch, bnb_4bit_compute_dtype)

bnb_config = BitsAndBytesConfig(
    load_in_4bit=use_4bit,
    bnb_4bit_quant_type=bnb_4bit_quant_type,
    bnb_4bit_compute_dtype=compute_dtype,
    bnb_4bit_use_double_quant=use_nested_quant,
)

model_id = "meta-llama/Meta-Llama-3-8B-Instruct"

model = AutoModelForCausalLM.from_pretrained(
    model_id,
    quantization_config=bnb_config,
    device_map="auto",token=hf_token
)


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

In [10]:
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

model_id = "meta-llama/Meta-Llama-3-8B-Instruct"

tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id,quantization_config=bnb_config,)


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
`low_cpu_mem_usage` was None, now set to True since model is quantized.


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

In [30]:

messages = [
    {"role": "system", "content": "You are an AI content thai language writer.Generate a well structured 256 words blog for the given topics"},
    {"role": "user", "content": "บอกชื่อนายกในประเทศไทยมา 5 คน"},
]



terminators = [
    tokenizer.eos_token_id,
    tokenizer.convert_tokens_to_ids("<|eot_id|>")
]

input_ids = tokenizer.apply_chat_template(
    messages,
    add_generation_prompt=True,
    return_tensors="pt"
).to(model.device)

outputs = model.generate(
    input_ids,
    max_new_tokens=512,
    eos_token_id=terminators,
    do_sample=True,
    temperature=0.6,
    top_p=0.9,
)
response = outputs[0][input_ids.shape[-1]:]
print(tokenizer.decode(response, skip_special_tokens=True))


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


**Title:** "The Leaders of Thailand: 5 Notable Prime Ministers"

**Introduction:**

Thailand has had its fair share of notable leaders throughout its history. From the early days of the country's monarchy to the present day, Thailand has been ruled by a series of prime ministers who have shaped the country's politics, economy, and culture. In this blog, we'll take a look at five of the most notable prime ministers in Thailand's history.

**1. Field Marshal Plaek Phibunsongkhram (1942-1944)**

Field Marshal Plaek Phibunsongkhram was Thailand's prime minister during World War II. He was a key figure in Thailand's relations with Japan and played a crucial role in the country's neutrality during the war.

**2. Sarit Thanarat (1959-1964)**

Sarit Thanarat was a military dictator who ruled Thailand with an iron fist. He was known for his authoritarian rule and his efforts to modernize the country's economy and infrastructure.

**3. Thanin Kraivichien (1975-1976)**

Thanin Kraivichien was a l