In [None]:
pip install torch transformers

In [1]:
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import os
import time

# --- SETUP: Ganti path ini sesuai keinginan ---
# Folder tempat menyimpan hasil model di laptop Anda
output_folder = "hasil_model" 
if not os.path.exists(output_folder):
    os.makedirs(output_folder)

# --- BAGIAN 1: LOAD MODEL ---
model_id = "mdhugol/indonesia-bert-sentiment-classification"

print(f"[1/4] Sedang mendownload/load model: {model_id}...")
print("      (Pertama kali akan butuh internet, selanjutnya offline bisa)")

tokenizer = AutoTokenizer.from_pretrained(model_id)
model_fp32 = AutoModelForSequenceClassification.from_pretrained(model_id)

# Label mapping
labels = {0: "POSITIVE", 1: "NEUTRAL", 2: "NEGATIVE"}

def predict(model, text):
    inputs = tokenizer(text, return_tensors="pt")
    start_time = time.time()
    with torch.no_grad():
        outputs = model(**inputs)
    end_time = time.time()
    
    logits = outputs.logits
    predicted_class_id = logits.argmax().item()
    confidence = torch.softmax(logits, dim=1)[0][predicted_class_id].item()
    return labels[predicted_class_id], confidence, (end_time - start_time)

# --- BAGIAN 2: TES MODEL ASLI (FP32) ---
text_sample = "Pelayanan toko ini sangat lambat dan mengecewakan."
print(f"\n[2/4] Tes Prediksi Awal (FP32)")
print(f"      Kalimat: '{text_sample}'")

label_fp32, conf_fp32, time_fp32 = predict(model_fp32, text_sample)
print(f"      Hasil: {label_fp32} (Yakin: {conf_fp32:.4f}) | Waktu: {time_fp32:.4f}s")

# Simpan model FP32 ke folder lokal
path_fp32 = os.path.join(output_folder, "model_fp32.pth")
torch.save(model_fp32.state_dict(), path_fp32)
size_fp32 = os.path.getsize(path_fp32) / (1024 * 1024)

# --- BAGIAN 3: PROSES QUANTIZATION (PTQ) ---
print(f"\n[3/4] Melakukan Kuantisasi (FP32 -> INT8)...")
# Ini proses CPU-only, aman untuk laptop biasa
model_int8 = torch.quantization.quantize_dynamic(
    model_fp32, 
    {torch.nn.Linear}, 
    dtype=torch.qint8
)

# Simpan model INT8 ke folder lokal
path_int8 = os.path.join(output_folder, "model_int8.pth")
torch.save(model_int8.state_dict(), path_int8)
size_int8 = os.path.getsize(path_int8) / (1024 * 1024)

# --- BAGIAN 4: BANDINGKAN ---
label_int8, conf_int8, time_int8 = predict(model_int8, text_sample)

print(f"\n=== HASIL EKSPERIMEN LOKAL ===")
print(f"Lokasi File        : {os.path.abspath(output_folder)}")
print(f"Ukuran Asli (FP32) : {size_fp32:.2f} MB")
print(f"Ukuran Baru (INT8) : {size_int8:.2f} MB")
print(f"Penghematan Space  : {(size_fp32 - size_int8) / size_fp32 * 100:.2f}%")
print("-" * 30)
print(f"Prediksi INT8      : {label_int8} (Yakin: {conf_int8:.4f})")

if label_fp32 == label_int8:
    print("✅ KESIMPULAN: Kuantisasi BERHASIL mempertahankan akurasi pada kalimat ini.")
else:
    print("⚠️ KESIMPULAN: Prediksi berubah.")

  from .autonotebook import tqdm as notebook_tqdm


[1/4] Sedang mendownload/load model: mdhugol/indonesia-bert-sentiment-classification...
      (Pertama kali akan butuh internet, selanjutnya offline bisa)


To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`



[2/4] Tes Prediksi Awal (FP32)
      Kalimat: 'Pelayanan toko ini sangat lambat dan mengecewakan.'
      Hasil: NEGATIVE (Yakin: 0.9981) | Waktu: 0.3116s


Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`



[3/4] Melakukan Kuantisasi (FP32 -> INT8)...


For migrations of users: 
1. Eager mode quantization (torch.ao.quantization.quantize, torch.ao.quantization.quantize_dynamic), please migrate to use torchao eager mode quantize_ API instead 
2. FX graph mode quantization (torch.ao.quantization.quantize_fx.prepare_fx,torch.ao.quantization.quantize_fx.convert_fx, please migrate to use torchao pt2e quantization API instead (prepare_pt2e, convert_pt2e) 
3. pt2e quantization has been migrated to torchao (https://github.com/pytorch/ao/tree/main/torchao/quantization/pt2e) 
see https://github.com/pytorch/ao/issues/2259 for more details
  model_int8 = torch.quantization.quantize_dynamic(



=== HASIL EKSPERIMEN LOKAL ===
Lokasi File        : c:\CoolYEAH\model-quantization-sentiment-analysis\quantization-ptq\ptq-finetuned\hasil_model
Ukuran Asli (FP32) : 474.79 MB
Ukuran Baru (INT8) : 230.15 MB
Penghematan Space  : 51.53%
------------------------------
Prediksi INT8      : NEGATIVE (Yakin: 0.9981)
✅ KESIMPULAN: Kuantisasi BERHASIL mempertahankan akurasi pada kalimat ini.
