In [None]:
# prompt: 구글 마운트

from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!pip install peft

Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=1.13.0->peft)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=1.13.0->peft)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=1.13.0->peft)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=1.13.0->peft)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch>=1.13.0->peft)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch>=1.13.0->peft)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting

In [None]:
# ===============================================
# 0) 라이브러리 임포트 & 시드 고정
# ===============================================
import pandas as pd
import numpy as np
import torch
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from transformers import AutoTokenizer, AutoModelForSequenceClassification, get_linear_schedule_with_warmup
from torch.optim import AdamW
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import roc_auc_score
import random

# ===============================================
# 1) CFG 세팅
# ===============================================
class CFG:
    model_name = "monologg/koelectra-base-discriminator"
    max_len = 256
    batch_size = 32
    val_batch_size = 64
    learning_rate = 2e-5
    weight_decay = 0.01
    warmup_ratio = 0.1
    epochs = 3
    n_fold = 7
    seed = 42

cfg = CFG()

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

def set_seed(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

set_seed(cfg.seed)

# ===============================================
# 2) 데이터 로딩
# ===============================================
train = pd.read_csv('/content/drive/MyDrive/2025AI/train.csv')
test = pd.read_csv('/content/drive/MyDrive/2025AI/test.csv')
sample_submission = pd.read_csv('/content/drive/MyDrive/2025AI/sample_submission.csv')

test = test.rename(columns={'paragraph_text': 'full_text'})
train['text'] = train['title'].fillna('') + ' ' + train['full_text'].fillna('')
test['text'] = test['title'].fillna('') + ' ' + test['full_text'].fillna('')

X = train['text'].tolist()
y = train['generated'].tolist()
test_texts = test['text'].tolist()

# ===============================================
# 3) Dataset
# ===============================================
class TextDataset(Dataset):
    def __init__(self, texts, labels=None, tokenizer=None, max_len=256):
        self.texts = texts
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_len = max_len

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        inputs = self.tokenizer(
            self.texts[idx],
            truncation=True,
            padding='max_length',
            max_length=self.max_len,
            return_tensors="pt"
        )
        item = {k: v.squeeze(0) for k, v in inputs.items()}
        if self.labels is not None:
            item['labels'] = torch.tensor(self.labels[idx], dtype=torch.long)
        return item

tokenizer = AutoTokenizer.from_pretrained(cfg.model_name)

# ===============================================
# 4) 5-Fold CV
# ===============================================
skf = StratifiedKFold(n_splits=cfg.n_fold, shuffle=True, random_state=cfg.seed)
fold_auc_scores = []

for fold, (train_idx, val_idx) in enumerate(skf.split(X, y)):
    print(f"\n📌 Fold {fold+1} 시작!")

    X_train = [X[i] for i in train_idx]
    y_train = [y[i] for i in train_idx]
    X_val = [X[i] for i in val_idx]
    y_val = [y[i] for i in val_idx]

    train_ds = TextDataset(X_train, y_train, tokenizer, max_len=cfg.max_len)
    val_ds = TextDataset(X_val, y_val, tokenizer, max_len=cfg.max_len)
    val_loader = DataLoader(val_ds, batch_size=cfg.val_batch_size)

    model = AutoModelForSequenceClassification.from_pretrained(
        cfg.model_name, num_labels=2
    ).to(device)

    optimizer = AdamW(model.parameters(), lr=cfg.learning_rate, weight_decay=cfg.weight_decay)

    total_steps = len(train_ds) // cfg.batch_size * cfg.epochs
    warmup_steps = int(total_steps * cfg.warmup_ratio)

    scheduler = get_linear_schedule_with_warmup(
        optimizer, num_warmup_steps=warmup_steps, num_training_steps=total_steps
    )

    model.train()
    for epoch in range(cfg.epochs):
        print(f"  Epoch {epoch+1}")
        train_loader = DataLoader(train_ds, batch_size=cfg.batch_size, shuffle=True)
        for i, batch in enumerate(train_loader):
            batch = {k: v.to(device) for k, v in batch.items()}
            outputs = model(**batch)
            loss = outputs.loss
            loss.backward()
            optimizer.step()
            scheduler.step()
            optimizer.zero_grad()
            if i % 100 == 0:
                print(f"    Step {i}/{len(train_loader)} | Loss: {loss.item():.4f}")

    model.eval()
    val_probs = []
    with torch.no_grad():
        for batch in val_loader:
            batch = {k: v.to(device) for k, v in batch.items()}
            logits = model(**batch).logits
            probs = F.softmax(logits, dim=1)[:, 1].detach().cpu().numpy()
            val_probs.extend(probs)

    fold_auc = roc_auc_score(y_val, val_probs)
    fold_auc_scores.append(fold_auc)
    print(f"✅ Fold {fold+1} ROC-AUC: {fold_auc:.5f}")

# ===============================================
# 5) Fold별 평균
# ===============================================
mean_auc = np.mean(fold_auc_scores)
print("\n📊 각 Fold ROC-AUC:", ["{:.5f}".format(score) for score in fold_auc_scores])
print(f"📈 5-Fold 평균 ROC-AUC: {mean_auc:.5f}")

# ===============================================
# 6) 전체 데이터로 풀 학습 후 test 예측
# ===============================================
print("\n🚀 전체 학습 후 Test 예측!")

full_train_ds = TextDataset(X, y, tokenizer, max_len=cfg.max_len)
full_train_loader = DataLoader(full_train_ds, batch_size=cfg.batch_size, shuffle=True)

test_ds = TextDataset(test_texts, None, tokenizer, max_len=cfg.max_len)
test_loader = DataLoader(test_ds, batch_size=cfg.val_batch_size)

model = AutoModelForSequenceClassification.from_pretrained(
    cfg.model_name, num_labels=2
).to(device)

optimizer = AdamW(model.parameters(), lr=cfg.learning_rate, weight_decay=cfg.weight_decay)
total_steps = len(full_train_ds) // cfg.batch_size * cfg.epochs
warmup_steps = int(total_steps * cfg.warmup_ratio)
scheduler = get_linear_schedule_with_warmup(optimizer, warmup_steps, total_steps)

model.train()
for epoch in range(cfg.epochs):
    print(f"  Full Epoch {epoch+1}")
    for i, batch in enumerate(full_train_loader):
        batch = {k: v.to(device) for k, v in batch.items()}
        outputs = model(**batch)
        loss = outputs.loss
        loss.backward()
        optimizer.step()
        scheduler.step()
        optimizer.zero_grad()
        if i % 100 == 0:
            print(f"    Step {i}/{len(full_train_loader)} | Loss: {loss.item():.4f}")

model.eval()
test_probs = []
with torch.no_grad():
    for batch in test_loader:
        batch = {k: v.to(device) for k, v in batch.items()}
        logits = model(**batch).logits
        probs = F.softmax(logits, dim=1)[:, 1].detach().cpu().numpy()
        test_probs.extend(probs)

sample_submission['generated'] = test_probs
sample_submission.to_csv('/content/drive/MyDrive/2025AI/KoELECTRA_CFG_FULL_KFold7.csv', index=False)
print("\n✅ 최종 제출 파일 저장 완료!")


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/51.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/467 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]


📌 Fold 1 시작!


pytorch_model.bin:   0%|          | 0.00/443M [00:00<?, ?B/s]

Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at monologg/koelectra-base-discriminator and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


  Epoch 1
    Step 0/2603 | Loss: 0.6670


model.safetensors:   0%|          | 0.00/443M [00:00<?, ?B/s]

    Step 100/2603 | Loss: 0.4439
    Step 200/2603 | Loss: 0.3972
    Step 300/2603 | Loss: 0.1837
    Step 400/2603 | Loss: 0.1458
    Step 500/2603 | Loss: 0.1415
    Step 600/2603 | Loss: 0.1295
    Step 700/2603 | Loss: 0.0433
    Step 800/2603 | Loss: 0.0527
    Step 900/2603 | Loss: 0.1443
    Step 1000/2603 | Loss: 0.0371
    Step 1100/2603 | Loss: 0.3398
    Step 1200/2603 | Loss: 0.1180
    Step 1300/2603 | Loss: 0.2668
    Step 1400/2603 | Loss: 0.0475
    Step 1500/2603 | Loss: 0.2381
    Step 1600/2603 | Loss: 0.1454
    Step 1700/2603 | Loss: 0.1403
    Step 1800/2603 | Loss: 0.2370
    Step 1900/2603 | Loss: 0.1605
    Step 2000/2603 | Loss: 0.0419
    Step 2100/2603 | Loss: 0.2985
    Step 2200/2603 | Loss: 0.1765
    Step 2300/2603 | Loss: 0.0222
    Step 2400/2603 | Loss: 0.3351
    Step 2500/2603 | Loss: 0.3343
    Step 2600/2603 | Loss: 0.1375
  Epoch 2
    Step 0/2603 | Loss: 0.0271
    Step 100/2603 | Loss: 0.1608
    Step 200/2603 | Loss: 0.1633
    Step 300/2603 

Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at monologg/koelectra-base-discriminator and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


  Epoch 1
    Step 0/2603 | Loss: 0.6909
    Step 100/2603 | Loss: 0.4472
    Step 200/2603 | Loss: 0.5309
    Step 300/2603 | Loss: 0.1708
    Step 400/2603 | Loss: 0.0409
    Step 500/2603 | Loss: 0.2560
    Step 600/2603 | Loss: 0.1360
    Step 700/2603 | Loss: 0.0416
    Step 800/2603 | Loss: 0.3332
    Step 900/2603 | Loss: 0.0456
    Step 1000/2603 | Loss: 0.4661
    Step 1100/2603 | Loss: 0.0369
    Step 1200/2603 | Loss: 0.3145
    Step 1300/2603 | Loss: 0.1455
    Step 1400/2603 | Loss: 0.1585
    Step 1500/2603 | Loss: 0.0258
    Step 1600/2603 | Loss: 0.1343
    Step 1700/2603 | Loss: 0.0439
    Step 1800/2603 | Loss: 0.1520
    Step 1900/2603 | Loss: 0.0257
    Step 2000/2603 | Loss: 0.2508
    Step 2100/2603 | Loss: 0.1407
    Step 2200/2603 | Loss: 0.2760
    Step 2300/2603 | Loss: 0.2211
    Step 2400/2603 | Loss: 0.3248
    Step 2500/2603 | Loss: 0.2098
    Step 2600/2603 | Loss: 0.0279
  Epoch 2
    Step 0/2603 | Loss: 0.2373
    Step 100/2603 | Loss: 0.1689
    Step 2

Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at monologg/koelectra-base-discriminator and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


  Epoch 1
    Step 0/2603 | Loss: 0.8071
    Step 100/2603 | Loss: 0.5094
    Step 200/2603 | Loss: 0.3833
    Step 300/2603 | Loss: 0.3697
    Step 400/2603 | Loss: 0.0488
    Step 500/2603 | Loss: 0.0894
    Step 600/2603 | Loss: 0.0722
    Step 700/2603 | Loss: 0.1397
    Step 800/2603 | Loss: 0.2255
    Step 900/2603 | Loss: 0.1537
    Step 1000/2603 | Loss: 0.0331
    Step 1100/2603 | Loss: 0.0301
    Step 1200/2603 | Loss: 0.3276
    Step 1300/2603 | Loss: 0.1479
    Step 1400/2603 | Loss: 0.2053
    Step 1500/2603 | Loss: 0.2346
    Step 1600/2603 | Loss: 0.0282
    Step 1700/2603 | Loss: 0.2396
    Step 1800/2603 | Loss: 0.1198
    Step 1900/2603 | Loss: 0.2766
    Step 2000/2603 | Loss: 0.2405
    Step 2100/2603 | Loss: 0.1575
    Step 2200/2603 | Loss: 0.1832
    Step 2300/2603 | Loss: 0.0481
    Step 2400/2603 | Loss: 0.1782
    Step 2500/2603 | Loss: 0.0242
    Step 2600/2603 | Loss: 0.0852
  Epoch 2
    Step 0/2603 | Loss: 0.1249
    Step 100/2603 | Loss: 0.0350
    Step 2

Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at monologg/koelectra-base-discriminator and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


  Epoch 1
    Step 0/2603 | Loss: 0.7387
    Step 100/2603 | Loss: 0.4830
    Step 200/2603 | Loss: 0.1525
    Step 300/2603 | Loss: 0.1330
    Step 400/2603 | Loss: 0.0438
    Step 500/2603 | Loss: 0.1385
    Step 600/2603 | Loss: 0.0623
    Step 700/2603 | Loss: 0.2351
    Step 800/2603 | Loss: 0.1556
    Step 900/2603 | Loss: 0.1498
    Step 1000/2603 | Loss: 0.1451
    Step 1100/2603 | Loss: 0.1365
    Step 1200/2603 | Loss: 0.2087
    Step 1300/2603 | Loss: 0.1500
    Step 1400/2603 | Loss: 0.1140
    Step 1500/2603 | Loss: 0.1323
    Step 1600/2603 | Loss: 0.0366
    Step 1700/2603 | Loss: 0.1409
    Step 1800/2603 | Loss: 0.2705
    Step 1900/2603 | Loss: 0.1615
    Step 2000/2603 | Loss: 0.0278
    Step 2100/2603 | Loss: 0.1981
    Step 2200/2603 | Loss: 0.0483
    Step 2300/2603 | Loss: 0.1809
    Step 2400/2603 | Loss: 0.1527
    Step 2500/2603 | Loss: 0.1163
    Step 2600/2603 | Loss: 0.1442
  Epoch 2
    Step 0/2603 | Loss: 0.1234
    Step 100/2603 | Loss: 0.2062
    Step 2

Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at monologg/koelectra-base-discriminator and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


  Epoch 1
    Step 0/2603 | Loss: 0.6420
    Step 100/2603 | Loss: 0.3647
    Step 200/2603 | Loss: 0.0839
    Step 300/2603 | Loss: 0.0870
    Step 400/2603 | Loss: 0.3382
    Step 500/2603 | Loss: 0.0821
    Step 600/2603 | Loss: 0.0900
    Step 700/2603 | Loss: 0.0382
    Step 800/2603 | Loss: 0.1454
    Step 900/2603 | Loss: 0.2493
    Step 1000/2603 | Loss: 0.0419
    Step 1100/2603 | Loss: 0.1597
    Step 1200/2603 | Loss: 0.4517
    Step 1300/2603 | Loss: 0.5471
    Step 1400/2603 | Loss: 0.0672
    Step 1500/2603 | Loss: 0.2095
    Step 1600/2603 | Loss: 0.1962
    Step 1700/2603 | Loss: 0.1305
    Step 1800/2603 | Loss: 0.0766
    Step 1900/2603 | Loss: 0.2067
    Step 2000/2603 | Loss: 0.0226
    Step 2100/2603 | Loss: 0.1176
    Step 2200/2603 | Loss: 0.1392
    Step 2300/2603 | Loss: 0.2021
    Step 2400/2603 | Loss: 0.2653
    Step 2500/2603 | Loss: 0.2768
    Step 2600/2603 | Loss: 0.1829
  Epoch 2
    Step 0/2603 | Loss: 0.0329
    Step 100/2603 | Loss: 0.1541
    Step 2

Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at monologg/koelectra-base-discriminator and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


  Epoch 1
    Step 0/2603 | Loss: 0.7254
    Step 100/2603 | Loss: 0.3762
    Step 200/2603 | Loss: 0.3044
    Step 300/2603 | Loss: 0.4178
    Step 400/2603 | Loss: 0.4296
    Step 500/2603 | Loss: 0.4558
    Step 600/2603 | Loss: 0.2414
    Step 700/2603 | Loss: 0.1480
    Step 800/2603 | Loss: 0.1375
    Step 900/2603 | Loss: 0.1677
    Step 1000/2603 | Loss: 0.2321
    Step 1100/2603 | Loss: 0.2434
    Step 1200/2603 | Loss: 0.2437
    Step 1300/2603 | Loss: 0.2431
    Step 1400/2603 | Loss: 0.0263
    Step 1500/2603 | Loss: 0.1253
    Step 1600/2603 | Loss: 0.1332
    Step 1700/2603 | Loss: 0.1186
    Step 1800/2603 | Loss: 0.1464
    Step 1900/2603 | Loss: 0.0518
    Step 2000/2603 | Loss: 0.0235
    Step 2100/2603 | Loss: 0.2872
    Step 2200/2603 | Loss: 0.1096
    Step 2300/2603 | Loss: 0.1444
    Step 2400/2603 | Loss: 0.2940
    Step 2500/2603 | Loss: 0.0820
    Step 2600/2603 | Loss: 0.0955
  Epoch 2
    Step 0/2603 | Loss: 0.1355
    Step 100/2603 | Loss: 0.2092
    Step 2

Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at monologg/koelectra-base-discriminator and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


  Epoch 1
    Step 0/2603 | Loss: 0.7291
    Step 100/2603 | Loss: 0.4424
    Step 200/2603 | Loss: 0.1141
    Step 300/2603 | Loss: 0.1513
    Step 400/2603 | Loss: 0.0357
    Step 500/2603 | Loss: 0.1349
    Step 600/2603 | Loss: 0.2355
    Step 700/2603 | Loss: 0.1373
    Step 800/2603 | Loss: 0.2545
    Step 900/2603 | Loss: 0.1384
    Step 1000/2603 | Loss: 0.2496
    Step 1100/2603 | Loss: 0.2344
    Step 1200/2603 | Loss: 0.1394
    Step 1300/2603 | Loss: 0.0494
    Step 1400/2603 | Loss: 0.2279
    Step 1500/2603 | Loss: 0.1621
    Step 1600/2603 | Loss: 0.2536
    Step 1700/2603 | Loss: 0.2823
    Step 1800/2603 | Loss: 0.2494
    Step 1900/2603 | Loss: 0.0189
    Step 2000/2603 | Loss: 0.0236
    Step 2100/2603 | Loss: 0.1825
    Step 2200/2603 | Loss: 0.1553
    Step 2300/2603 | Loss: 0.1042
    Step 2400/2603 | Loss: 0.2649
    Step 2500/2603 | Loss: 0.1550
    Step 2600/2603 | Loss: 0.1002
  Epoch 2
    Step 0/2603 | Loss: 0.1468
    Step 100/2603 | Loss: 0.0202
    Step 2

Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at monologg/koelectra-base-discriminator and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


  Full Epoch 1
    Step 0/3037 | Loss: 0.6533
    Step 100/3037 | Loss: 0.4469
    Step 200/3037 | Loss: 0.2413
    Step 300/3037 | Loss: 0.2766
    Step 400/3037 | Loss: 0.1481
    Step 500/3037 | Loss: 0.2319
    Step 600/3037 | Loss: 0.2606
    Step 700/3037 | Loss: 0.1445
    Step 800/3037 | Loss: 0.1433
    Step 900/3037 | Loss: 0.0346
    Step 1000/3037 | Loss: 0.0452
    Step 1100/3037 | Loss: 0.0250
    Step 1200/3037 | Loss: 0.0336
    Step 1300/3037 | Loss: 0.1579
    Step 1400/3037 | Loss: 0.0269
    Step 1500/3037 | Loss: 0.0482
    Step 1600/3037 | Loss: 0.0411
    Step 1700/3037 | Loss: 0.1022
    Step 1800/3037 | Loss: 0.3446
    Step 1900/3037 | Loss: 0.2040
    Step 2000/3037 | Loss: 0.0391
    Step 2100/3037 | Loss: 0.1021
    Step 2200/3037 | Loss: 0.0587
    Step 2300/3037 | Loss: 0.1626
    Step 2400/3037 | Loss: 0.0435
    Step 2500/3037 | Loss: 0.1528
    Step 2600/3037 | Loss: 0.1707
    Step 2700/3037 | Loss: 0.1670
    Step 2800/3037 | Loss: 0.1157
    Step 29