In [2]:
import os
import shutil
from huggingface_hub import snapshot_download
from langchain_chroma import Chroma
from langchain_huggingface import HuggingFaceEmbeddings

REPO_ID = "iskandarmrp/nlp-papermatch-dataset"
REPO_TYPE = "dataset"
TEST_DIR = "./chroma_download"

def test_chroma_from_hf():
    chroma_path = os.path.join(TEST_DIR, "chroma_db")
    
    if os.path.exists(chroma_path) and len(os.listdir(chroma_path)) > 0:
        print(f"Folder database ditemukan di '{chroma_path}'.")
        print("   Melewati proses download dan menggunakan data lokal.\n")
    else:
        print(f"Folder tidak ditemukan. Sedang mendownload 'chroma_db' dari {REPO_ID}...")
        try:
            snapshot_download(
                repo_id=REPO_ID,
                repo_type=REPO_TYPE,
                local_dir=TEST_DIR,
                allow_patterns="chroma_db/*",
                local_dir_use_symlinks=False
            )
            print("Download selesai!\n")
        except Exception as e:
            print(f"Gagal download: {e}")
            return

    print("Memulai tes query...")

    if not os.path.exists(chroma_path):
        print(f"Error: Folder {chroma_path} tetap tidak ditemukan.")
        return

    EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
    embedding_function = HuggingFaceEmbeddings(
        model_name=EMBEDDING_MODEL,
        encode_kwargs={"normalize_embeddings": True} 
    )

    vector_db = Chroma(
        persist_directory=chroma_path,
        embedding_function=embedding_function,
        collection_name="paper_abstracts"
    )

    my_query = "tiongkok debt trap for developing countries"
    top_k = 5

    print(f"Mencari di Vector DB...")
    results = vector_db.similarity_search_with_score(my_query, k=top_k, filter={"label": 7})

    print(f"\nQuery: '{my_query}'\n")
    print(f"{'SCORE':<10} | {'JUDUL PAPER'}")
    print("-" * 80)

    for doc, score in results:
        title = doc.metadata.get("title", "No Title")
        label = doc.metadata.get("label", "-")
        raw_abstract = doc.metadata.get("raw_abstract", "-")
        
        print(f"{score:.4f}     | {title} | {label} | {raw_abstract}")
        print(f"Abstract   : {doc.page_content[:200]}...") 
        print("-" * 80)

if __name__ == "__main__":
    test_chroma_from_hf()

Folder database ditemukan di './chroma_download\chroma_db'.
   Melewati proses download dan menggunakan data lokal.

Memulai tes query...
Mencari di Vector DB...

Query: 'tiongkok debt trap for developing countries'

SCORE      | JUDUL PAPER
--------------------------------------------------------------------------------
1.6227     | Physics of eta-prime with rooted staggered quarks | 7 | The quark-mass dependence of the eta in the Schwinger model, which -- like the eta-prime in QCD -- becomes massive through the axial anomaly, is studied on the lattice with N_f=0,1,2. Staggered quarks are used, with a rooted determinant for N_f=1. In the chiral limit the Schwinger mass is reproduced, which suggests that the anomaly is being treated correctly.
Abstract   : quark mass dependence eta schwinger like eta prime qcd becomes massive axial anomaly studied lattice staggered quark used rooted determinant chiral limit schwinger mass reproduced suggests anomaly tre...
-----------------------------

In [3]:
import torch
import json
import os
from peft import PeftModel, PeftConfig
from transformers import AutoModelForSequenceClassification, AutoTokenizer
import torch.nn.functional as F

MODEL_ID = "iskandarmrp/distilbert-lora-paper-topic-classification"

LABEL_MAPPING_FILE = "label_mapping.json"

def test_inference():
    print(f"Sedang meload model dari Hugging Face: {MODEL_ID}...")
    
    if os.path.exists(LABEL_MAPPING_FILE):
        with open(LABEL_MAPPING_FILE, "r") as f:
            label_mapping = json.load(f)
        id2label = {v: k for k, v in label_mapping.items()}
        num_labels = len(label_mapping)
        print(f"Label mapping ditemukan ({num_labels} kelas).")
    else:
        print("Warning: 'label_mapping.json' tidak ditemukan. Hasil akan berupa angka.")
        id2label = {}
        num_labels = 20

    peft_config = PeftConfig.from_pretrained(MODEL_ID)

    tokenizer = AutoTokenizer.from_pretrained(peft_config.base_model_name_or_path)

    base_model = AutoModelForSequenceClassification.from_pretrained(
        peft_config.base_model_name_or_path,
        num_labels=num_labels,
        ignore_mismatched_sizes=True 
    )

    model = PeftModel.from_pretrained(base_model, MODEL_ID)
    
    # device = "cuda" if torch.cuda.is_available() else "cpu"
    device = "cpu"
    model.to(device)
    model.eval()
    print(f"Model sukses dimuat di {device}!")

    test_text = """
    This study investigates the impact of fiscal policy on economic growth in developing nations 
    during periods of high inflation. Using a panel data approach covering 50 countries from 2000 to 2020, 
    we analyze the relationship between government spending, tax revenues, and GDP per capita. 
    Our results indicate that while fiscal stimulus can boost short-term growth, excessive public debt 
    negatively correlates with long-term economic stability.
    """

    print("\n" + "="*50)
    print("TEST QUERY:")
    print(test_text.strip())
    print("="*50)

    inputs = tokenizer(
        test_text, 
        return_tensors="pt", 
        truncation=True, 
        max_length=256,
        padding="max_length"
    ).to(device)

    with torch.no_grad():
        outputs = model(**inputs)
        logits = outputs.logits
        probabilities = F.softmax(logits, dim=1)

        predicted_class_id = torch.argmax(probabilities, dim=1).item()
        confidence = probabilities[0][predicted_class_id].item()

    predicted_label = id2label.get(predicted_class_id, str(predicted_class_id))
    
    print(f"\nHASIL PREDIKSI:")
    print(f"Label ID   : {predicted_class_id}")
    print(f"Kategori   : {predicted_label}")
    print(f"Confidence : {confidence:.4f} ({confidence*100:.2f}%)")

if __name__ == "__main__":
    test_inference()

Sedang meload model dari Hugging Face: iskandarmrp/distilbert-lora-paper-topic-classification...
Label mapping ditemukan (20 kelas).


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Model sukses dimuat di cpu!

TEST QUERY:
This study investigates the impact of fiscal policy on economic growth in developing nations 
    during periods of high inflation. Using a panel data approach covering 50 countries from 2000 to 2020, 
    we analyze the relationship between government spending, tax revenues, and GDP per capita. 
    Our results indicate that while fiscal stimulus can boost short-term growth, excessive public debt 
    negatively correlates with long-term economic stability.

HASIL PREDIKSI:
Label ID   : 3
Kategori   : econ
Confidence : 0.4891 (48.91%)


In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from huggingface_hub import snapshot_download
import os
import shutil

MODEL_ID = "Alan43/related_works_generation_model"
LOCAL_DIR = "./model_local_final"

torch.cuda.empty_cache()

if not torch.cuda.is_available():
    print("ERROR: Tidak ada GPU NVIDIA yang terdeteksi! Script ini butuh GPU.")
    exit()

print(f"GPU Terdeteksi: {torch.cuda.get_device_name(0)}")
print(f"   VRAM Tersedia: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.2f} GB")

try:
    snapshot_download(repo_id=MODEL_ID, local_dir=LOCAL_DIR, local_dir_use_symlinks=False, resume_download=True)
except:
    pass

print("\n--- Memuat Model ke GPU (Full VRAM) ---")

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
    llm_int8_enable_fp32_cpu_offload=False
)

try:
    tokenizer = AutoTokenizer.from_pretrained(LOCAL_DIR)
    if tokenizer.pad_token_id is None:
        tokenizer.pad_token_id = tokenizer.eos_token_id

    model = AutoModelForCausalLM.from_pretrained(
        LOCAL_DIR, 
        quantization_config=bnb_config,
        device_map="cuda:0",
        trust_remote_code=True,
        local_files_only=True
    )
    
    print("Model berhasil masuk ke VRAM GPU!")
    
except Exception as e:
    print(f"\nGAGAL LOAD (Kemungkinan VRAM Kurang): {e}")
    print("Solusi: Gunakan script sebelumnya dengan CPU Offload.")
    exit()

def generate_related_work(input_text):
    system_msg = "You are an academic writing assistant. Write a 'Related Work' section based on the provided text. The input contains the Current Abstract followed by References (marked with @cite_n). Synthesize these references and highlight the novelty of the Current Abstract."
    
    prompt = f"""<|begin_of_text|><|start_header_id|>system<|end_header_id|>

{system_msg}<|eot_id|><|start_header_id|>user<|end_header_id|>

{input_text}<|eot_id|><|start_header_id|>assistant<|end_header_id|>
"""
    
    inputs = tokenizer(prompt, return_tensors="pt").to("cuda")

    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=300,
            do_sample=True,
            temperature=0.1,
            top_p=0.9,
            repetition_penalty=1.2,
            pad_token_id=tokenizer.eos_token_id
        )
    
    return tokenizer.decode(outputs[0][inputs.input_ids.shape[1]:], skip_special_tokens=True).strip()

if __name__ == "__main__":
    print("\n=== RUNNING ON GPU ===")
    while True:
        text = input("\nMasukkan Text:\n")
        if text.lower() in ['exit', 'quit']: break
        if not text.strip(): continue
        
        try:
            print("\nGenerating (Cepat)...")
            print(generate_related_work(text))
        except Exception as e:
            print(f"Error: {e}")

  from .autonotebook import tqdm as notebook_tqdm


✅ GPU Terdeteksi: NVIDIA GeForce RTX 3060 Laptop GPU
   VRAM Tersedia: 6.00 GB


For more details, check out https://huggingface.co/docs/huggingface_hub/main/en/guides/download#download-files-to-local-folder.
Fetching 9 files: 100%|██████████| 9/9 [00:00<00:00, 1381.78it/s]



--- Memuat Model ke GPU (Full VRAM) ---


Loading checkpoint shards: 100%|██████████| 2/2 [00:08<00:00,  4.24s/it]


✅ Model berhasil masuk ke VRAM GPU!

=== RUNNING ON GPU ===

Generating (Cepat)...
The work presented in @cite_13 is one such example where they have used Twitter data collected over 3 months period using hashtags related to #BlackLivesMatter movement. Their findings show that there exists a strong correlation between racial slurs and other forms of offensive words like swearwords, insults etc. This shows how important it is to consider context while classifying tweets into different categories. In addition, they also found that most of the users who use racist terms do so repeatedly which makes them easy targets for detection algorithms. Another interesting finding was that people tend to be more tolerant towards racism when compared to sexism. It can be attributed to the fact that women face discrimination at all levels whereas men don't suffer much because of their gender. Hence, we need to take care about what kind of training examples our models see during development phase otherw