# Emoji Recognition VLM LoRA 학습

RunPod Pod을 생성하여 Qwen3-VL bf16 LoRA 학습을 실행합니다.
학습 완료 후 LoRA 어댑터를 HF Hub에 업로드하고 Pod이 자가 종료됩니다.

| GPU | VRAM | ~$/hr | 용도 |
|-----|------|-------|------|
| RTX 4090 | 24GB | 0.39 | 2B 모델 QLoRA |
| **L40S** | **48GB** | **0.74** | **2B 모델 bf16 LoRA (기본)** |
| A100 80GB | 80GB | 1.64 | 대형 모델 |

In [1]:
import os, sys
from dotenv import load_dotenv

load_dotenv()
sys.path.insert(0, os.path.abspath(".."))

from utils.runpod_client import GPUType, pods, delete
from utils.emoji_vlm_train_client import launch_training_pod

print("학습 클라이언트 로드 완료")

학습 클라이언트 로드 완료


In [2]:
# 학습 파라미터 정의
params = dict(
    hf_dataset_repo="adwel94/vision-emoji-recognition-v1",
    hf_output_repo="adwel94/vision-emoji-recognition-lora",
    hf_output_branch="main",
    model_id="Qwen/Qwen3-VL-2B-Thinking",
    lora_r=16,
    lora_alpha=32,
    num_train_epochs=3,
    per_device_train_batch_size=2,
    gradient_accumulation_steps=4,
    learning_rate=2e-4,
    max_seq_length=8192,
    gpu_type=GPUType.NVIDIA_L40S,
    volume=100,
    image_name="adwel94/emoji-vlm-train:latest",
    prefect_api_url=os.getenv("PREFECT_API_URL", ""),
    prefect_api_key=os.getenv("PREFECT_API_KEY", ""),
    safari_webhook_url=os.getenv("SAFARI_WEBHOOK_URL", ""),
    wandb_project="emoji-vlm-train",
    wandb_entity=os.getenv("WANDB_ENTITY", ""),
    wandb_api_key=os.getenv("WANDB_API_KEY", ""),
)

# 출력용 마스킹 로직
display_params = params.copy()
mask_keys = ["prefect_api_key", "wandb_api_key", "safari_webhook_url"]
for k in mask_keys:
    if k in display_params and display_params[k]:
        val = str(display_params[k])
        display_params[k] = val[:4] + "*" * (len(val) - 4) if len(val) > 4 else "********"

import json
display_params

{'hf_dataset_repo': 'adwel94/vision-emoji-recognition-v1',
 'hf_output_repo': 'adwel94/vision-emoji-recognition-lora',
 'hf_output_branch': 'main',
 'model_id': 'Qwen/Qwen3-VL-2B-Thinking',
 'lora_r': 16,
 'lora_alpha': 32,
 'num_train_epochs': 3,
 'per_device_train_batch_size': 2,
 'gradient_accumulation_steps': 4,
 'learning_rate': 0.0002,
 'max_seq_length': 8192,
 'gpu_type': <GPUType.NVIDIA_L40S: 'NVIDIA_L40S'>,
 'volume': 100,
 'image_name': 'adwel94/emoji-vlm-train:latest',
 'prefect_api_url': 'https://api.prefect.cloud/api/accounts/b3cddc38-67d9-4d19-ab31-2ffdec1720a2/workspaces/ada670b3-8386-4c17-b572-d35611f1a427',
 'prefect_api_key': 'pnu_************************************',
 'safari_webhook_url': 'http*********************************************************************************************************************',
 'wandb_project': 'emoji-vlm-train',
 'wandb_entity': 'adwel94-personal',
 'wandb_api_key': 'wand***********************************************************

In [3]:
# 학습 Pod 생성
pod_id = launch_training_pod(**params)
print(f"학습 Pod 생성 완료: {pod_id}")

학습 Pod 생성 완료: qtvkpwtg3l1aff


In [4]:
# Pod 상태 모니터링
import pandas as pd

pod_list = pods()
if pod_list:
    df = pd.DataFrame(pod_list)
    display(df)
else:
    print("실행 중인 Pod이 없습니다.")

Unnamed: 0,consumerUserId,containerDiskInGb,costPerHr,createdAt,desiredStatus,env,gpuCount,id,imageName,lastStartedAt,...,machine,machineId,memoryInGb,name,ports,publicIp,templateId,vcpuCount,volumeInGb,volumeMountPath
0,user_2y1XhaHEL6PQMIk2nezLVZ6pYGB,30,0.79,2026-02-27 04:13:23.442 +0000 UTC,RUNNING,"{'BF16': 'True', 'GRADIENT_ACCUMULATION_STEPS'...",1,qtvkpwtg3l1aff,adwel94/emoji-vlm-train:latest,2026-02-27 04:13:23.441 +0000 UTC,...,{},y82jhq4ll6g3,251,emoji-vlm-train,"[8888/http, 22/tcp]",,,24,100,/workspace


In [5]:
# 긴급 정지
# pod_id = "POD_ID"
# result = delete(pod_id)
# print(f"삭제 결과: {result}")