In [None]:
import os
from huggingface_hub import HfApi, create_repo
from dotenv import load_dotenv

load_dotenv()

HF_TOKEN = os.getenv("HF_TOKEN")

MODEL_REPO_NAME = "distilbert-lora-topic-classification"
FULL_MODEL_ID = f"iskandarmrp/distilbert-lora-paper-topic-classification"

MODEL_PATH = "./hf_finetune_results_lora_baru/distilbert/lora_model"
REPO_TYPE = "model"

def upload_model():
    if not HF_TOKEN:
        print("Error: HF_TOKEN tidak ditemukan di .env")
        return

    print(f"Mempersiapkan upload model ke: {FULL_MODEL_ID}")
    
    api = HfApi(token=HF_TOKEN)

    if not os.path.exists(MODEL_PATH):
        print(f"Error: Folder model tidak ditemukan di '{MODEL_PATH}'")
        print("   Pastikan Anda sudah menjalankan training dan folder tersebut ada.")
        return

    try:
        create_repo(
            repo_id=FULL_MODEL_ID, 
            token=HF_TOKEN, 
            repo_type=REPO_TYPE,
            exist_ok=True,
            private=False 
        )
        print("Repo Model siap.")
    except Exception as e:
        print(f"Info Repo: {e}")

    print(f"Mengupload adapter LoRA dari '{MODEL_PATH}'...")
    
    try:
        api.upload_folder(
            folder_path=MODEL_PATH,
            repo_id=FULL_MODEL_ID,
            repo_type=REPO_TYPE,
            path_in_repo=".", 
            ignore_patterns=[".git", "__pycache__", "checkpoint-*"]
        )
        print("Upload Model BERHASIL!")
        print(f"Lihat model Anda di sini: https://huggingface.co/{FULL_MODEL_ID}")

    except Exception as e:
        print(f"Upload Gagal: {e}")

if __name__ == "__main__":
    upload_model()

  from .autonotebook import tqdm as notebook_tqdm


Mempersiapkan upload model ke: iskandarmrp/distilbert-lora-paper-topic-classification
Repo Model siap.
Mengupload adapter LoRA dari './hf_finetune_results_lora_baru/distilbert/lora_model'...


adapter_model.safetensors: 100%|██████████| 3.61M/3.61M [00:02<00:00, 1.41MB/s]


Upload Model BERHASIL!
Lihat model Anda di sini: https://huggingface.co/iskandarmrp/distilbert-lora-paper-topic-classification


In [1]:
import os
from huggingface_hub import HfApi, create_repo
from dotenv import load_dotenv

load_dotenv()

HF_TOKEN = os.getenv("HF_TOKEN")
USERNAME = "iskandarmrp"

MODEL_REPO_NAME = "llama-3.2-1b-related-works-generation"
FULL_MODEL_ID = f"{USERNAME}/{MODEL_REPO_NAME}"

MODEL_PATH = "summarization/related_works_generation_model" 
REPO_TYPE = "model"

def upload_model():
    if not HF_TOKEN:
        print("Error: HF_TOKEN tidak ditemukan di .env")
        return

    print(f"Mempersiapkan upload model ke: {FULL_MODEL_ID}")
    
    api = HfApi(token=HF_TOKEN)

    if not os.path.exists(MODEL_PATH):
        print(f"Error: Folder model tidak ditemukan di '{MODEL_PATH}'")
        print("   Pastikan proses 'save_pretrained' di kode training sudah selesai.")
        return

    try:
        create_repo(
            repo_id=FULL_MODEL_ID, 
            token=HF_TOKEN, 
            repo_type=REPO_TYPE,
            exist_ok=True,
            private=False 
        )
        print("Repo Model siap.")
    except Exception as e:
        print(f"Info Repo: {e}")

    print(f"Mengupload model dari '{MODEL_PATH}'...")
    
    try:
        api.upload_folder(
            folder_path=MODEL_PATH,
            repo_id=FULL_MODEL_ID,
            repo_type=REPO_TYPE,
            path_in_repo=".", 
            ignore_patterns=[".git", "__pycache__", "checkpoint-*", "*.ipynb_checkpoints"]
        )
        print("Upload Model BERHASIL!")
        print(f"Lihat model Anda di sini: https://huggingface.co/{FULL_MODEL_ID}")
        
        print("\nCARA PAKAI (INFERENCE):")
        print(f"from transformers import AutoTokenizer, AutoModelForCausalLM")
        print(f"tokenizer = AutoTokenizer.from_pretrained('{FULL_MODEL_ID}')")
        print(f"model = AutoModelForCausalLM.from_pretrained('{FULL_MODEL_ID}', device_map='auto')")

    except Exception as e:
        print(f"Upload Gagal: {e}")

if __name__ == "__main__":
    upload_model()

  from .autonotebook import tqdm as notebook_tqdm


Mempersiapkan upload model ke: iskandarmrp/llama-3.2-1b-related-works-generation
Repo Model siap.
Mengupload model dari 'summarization/related_works_generation_model'...


Upload 2 LFS files:   0%|          | 0/2 [00:00<?, ?it/s]
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
tokenizer.json: 100%|██████████| 17.2M/17.2M [00:06<00:00, 2.77MB/s]
model.safetensors: 100%|██████████| 1.55G/1.55G [03:18<00:00, 7.83MB/s]
Upload 2 LFS files: 100%|██████████| 2/2 [03:19<00:00, 99.52s/it] 


Upload Model BERHASIL!
Lihat model Anda di sini: https://huggingface.co/iskandarmrp/llama-3.2-1b-related-works-generation

CARA PAKAI (INFERENCE):
from transformers import AutoTokenizer, AutoModelForCausalLM
tokenizer = AutoTokenizer.from_pretrained('iskandarmrp/llama-3.2-1b-related-works-generation')
model = AutoModelForCausalLM.from_pretrained('iskandarmrp/llama-3.2-1b-related-works-generation', device_map='auto')


In [1]:
import os
from huggingface_hub import HfApi, create_repo
from dotenv import load_dotenv

load_dotenv()

HF_TOKEN = os.getenv("HF_TOKEN")
REPO_ID = "iskandarmrp/nlp-papermatch-dataset"
REPO_TYPE = "dataset"

uploads = [
    ("./train_raw_dataset", "raw_data/train"),
    ("./test_raw_dataset", "raw_data/test"),
    ("./topic_classification/train_dataset", "topic_classification/train"),
    ("./topic_classification/test_dataset", "topic_classification/test"),
    ("./processed_multixscience_data", "processed_multixscience_data"),
    ("./chroma_db", "chroma_db")
]

def upload_datasets():
    if not HF_TOKEN or not REPO_ID:
        print("Error: Pastikan HF_TOKEN dan REPO_ID ada")
        return

    api = HfApi(token=HF_TOKEN)

    try:
        create_repo(
            repo_id=REPO_ID, 
            token=HF_TOKEN, 
            repo_type=REPO_TYPE,
            exist_ok=True,
            private=False 
        )
        print(f"Repo '{REPO_ID}' siap.")
    except Exception as e:
        print(f"Info Repo: {e}")

    for local_path, target_path in uploads:
        if not os.path.exists(local_path):
            print(f"Folder lokal tidak ditemukan: {local_path} (Dilewati)")
            continue

        print(f"\nMengupload '{local_path}' ke folder '{target_path}'...")
        
        try:
            api.upload_folder(
                folder_path=local_path,
                repo_id=REPO_ID,
                repo_type=REPO_TYPE,
                path_in_repo=target_path,
                ignore_patterns=[".git", "__pycache__", ".DS_Store", "*.lock"]
            )
            print(f"Sukses upload {target_path}!")
        except Exception as e:
            print(f"Gagal upload {target_path}: {e}")

    print(f"\nSemua proses selesai! Cek di: https://huggingface.co/datasets/{REPO_ID}/tree/main")

if __name__ == "__main__":
    upload_datasets()

  from .autonotebook import tqdm as notebook_tqdm


Repo 'iskandarmrp/nlp-papermatch-dataset' siap.

Mengupload './train_raw_dataset' ke folder 'raw_data/train'...


Upload 0 LFS files: 0it [00:00, ?it/s]
No files have been modified since last commit. Skipping to prevent empty commit.


Sukses upload raw_data/train!

Mengupload './test_raw_dataset' ke folder 'raw_data/test'...


Upload 0 LFS files: 0it [00:00, ?it/s]
No files have been modified since last commit. Skipping to prevent empty commit.


Sukses upload raw_data/test!

Mengupload './topic_classification/train_dataset' ke folder 'topic_classification/train'...


Upload 0 LFS files: 0it [00:00, ?it/s]
No files have been modified since last commit. Skipping to prevent empty commit.


Sukses upload topic_classification/train!

Mengupload './topic_classification/test_dataset' ke folder 'topic_classification/test'...


Upload 0 LFS files: 0it [00:00, ?it/s]
No files have been modified since last commit. Skipping to prevent empty commit.


Sukses upload topic_classification/test!

Mengupload './processed_multixscience_data' ke folder 'processed_multixscience_data'...


Upload 0 LFS files: 0it [00:00, ?it/s]
No files have been modified since last commit. Skipping to prevent empty commit.


Sukses upload processed_multixscience_data!

Mengupload './chroma_db' ke folder 'chroma_db'...


data_level0.bin:   0%|          | 0.00/144M [00:00<?, ?B/s]
[A


[A[A[A

[A[A



data_level0.bin:   0%|          | 164k/144M [00:00<01:32, 1.55MB/s]
[A



[A[A[A[A

data_level0.bin:   0%|          | 639k/144M [00:00<00:49, 2.92MB/s]



[A[A[A[A
[A



data_level0.bin:   1%|          | 934k/144M [00:00<02:23, 999kB/s] 
data_level0.bin:   1%|          | 1.11M/144M [00:00<02:10, 1.10MB/s]
data_level0.bin:   1%|          | 1.43M/144M [00:01<01:41, 1.41MB/s]
data_level0.bin:   1%|          | 1.74M/144M [00:01<01:21, 1.74MB/s]
header.bin: 100%|██████████| 100/100 [00:01<00:00, 82.7B/s]
data_level0.bin:   1%|▏         | 2.15M/144M [00:01<01:05, 2.18MB/s]
data_level0.bin:   2%|▏         | 2.51M/144M [00:01<00:56, 2.50MB/s]
data_level0.bin:   2%|▏         | 3.18M/144M [00:01<00:55, 2.52MB/s]
[A
length.bin: 100%|██████████| 344k/344k [00:01<00:00, 181kB/s] 8MB/s]
link_lists.bin: 100%|██████████| 735k/735k [00:02<00:00, 353kB/s] s]
index_metadata.pickle: 100%|██████████| 7.91M/7.9

Sukses upload chroma_db!

Semua proses selesai! Cek di: https://huggingface.co/datasets/iskandarmrp/nlp-papermatch-dataset/tree/main
