# ConvNeXt V2 Feature Extraction


## 1. 환경 설정 및 데이터 준비

In [None]:
# timm 라이브러리 설치
!pip install -q timm

# 기본 라이브러리 임포트
from google.colab import drive
drive.mount('/content/drive')

import torch, torch.nn as nn, torch.optim as optim, torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, Subset
import torchvision.transforms as transforms
import numpy as np, pandas as pd, os, zipfile, random, timm
from sklearn.model_selection import StratifiedKFold
from tqdm.auto import tqdm

# 환경 설정
SEED = 42
def set_seed(seed):
    random.seed(seed); np.random.seed(seed); torch.manual_seed(seed)
    if torch.cuda.is_available(): torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True; torch.backends.cudnn.benchmark = False
set_seed(SEED)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# 데이터 로드
zip_file_path = '/content/drive/MyDrive/2025-ai-challenge.zip'
data_dir = '/content/dataset'
if os.path.exists(data_dir): import shutil; shutil.rmtree(data_dir)
os.makedirs(data_dir);
with zipfile.ZipFile(zip_file_path, 'r') as zip_ref: zip_ref.extractall(data_dir)
full_data_path = data_dir
train_data = np.load(os.path.join(full_data_path, 'trainset.npy'))
train_labels = np.load(os.path.join(full_data_path, 'trainlabel.npy'))
test_data = np.load(os.path.join(full_data_path, 'testset.npy'))

# 데이터셋 클래스
class ImageDataset(Dataset):
    def __init__(self, data, labels=None, transform=None):
        self.data = data; self.labels = labels; self.transform = transform
    def __len__(self): return len(self.data)
    def __getitem__(self, idx):
        image = self.data[idx]
        if self.transform: image = self.transform(image)
        if self.labels is not None: return image, torch.tensor(self.labels[idx], dtype=torch.long)
        else: return image

Mounted at /content/drive
Using device: cuda


## 2. 1단계: 특징 추출 (ConvNeXt V2)
최강의 사전 학습 모델을 사용하여 모든 이미지를 벡터로 변환하고 저장합니다.

In [None]:
def extract_features(data, labels=None, model_name='convnextv2_tiny.fcmae_ft_in22k_in1k'):
    # 1. 특징 추출을 위한 사전 학습 모델 로드
    feature_extractor = timm.create_model(model_name, pretrained=True, num_classes=0).to(device)
    feature_extractor.eval()

    # ★★★ 추가된 부분: 특징 추출기 파라미터 확인 ★★★
    if 'extractor_params_printed' not in globals():
        total_params = sum(p.numel() for p in feature_extractor.parameters())
        print("--- Parameter Check Stage 1: Feature Extractor ---")
        print(f"Total parameters of {model_name} (for extraction only): {total_params:,}")
        print("NOTE: This model is DISCARDED after feature extraction and is NOT part of the final submission.")
        print("-" * 50)
        globals()['extractor_params_printed'] = True
    # ★★★★★★★★★★★★★★★★★★★★★★★★★★★★★★★★★★★★★

    # 2. 모델에 맞는 데이터 전처리
    data_config = feature_extractor.default_cfg
    transform = transforms.Compose([
        transforms.ToPILImage(),
        transforms.Resize(data_config['input_size'][1:]),
        transforms.ToTensor(),
        transforms.Normalize(mean=data_config['mean'], std=data_config['std'])
    ])

    dataset = ImageDataset(data, labels=labels, transform=transform)
    dataloader = DataLoader(dataset, batch_size=64, shuffle=False, num_workers=2)

    # 3. 특징 추출 실행
    all_features = []
    all_labels = []
    with torch.no_grad():
        for batch in tqdm(dataloader, desc=f"Extracting with {model_name}"):
            images = batch[0] if labels is not None else batch
            images = images.to(device)
            features = feature_extractor(images)
            all_features.append(features.cpu().numpy())
            if labels is not None: all_labels.append(batch[1].cpu().numpy())

    if labels is not None: return np.concatenate(all_features), np.concatenate(all_labels)
    else: return np.concatenate(all_features)

# 특징 추출 실행 및 저장
train_features_path = '/content/drive/MyDrive/train_features_convnext.npy'
if not os.path.exists(train_features_path):
    print("Extracting features from training data...")
    train_features, train_feature_labels = extract_features(train_data, train_labels)
    np.save(train_features_path, train_features)
    np.save('/content/drive/MyDrive/train_feature_labels_convnext.npy', train_feature_labels)

    print("\nExtracting features from test data...")
    test_features = extract_features(test_data)
    np.save('/content/drive/MyDrive/test_features_convnext.npy', test_features)
    print("\nFeature extraction complete!")
else:
    print("ConvNeXt feature files already exist. Skipping extraction.")

Extracting features from training data...


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


model.safetensors:   0%|          | 0.00/115M [00:00<?, ?B/s]

--- Parameter Check Stage 1: Feature Extractor ---
Total parameters of convnextv2_tiny.fcmae_ft_in22k_in1k (for extraction only): 27,866,496
NOTE: This model is DISCARDED after feature extraction and is NOT part of the final submission.
--------------------------------------------------


Extracting with convnextv2_tiny.fcmae_ft_in22k_in1k:   0%|          | 0/782 [00:00<?, ?it/s]


Extracting features from test data...


Extracting with convnextv2_tiny.fcmae_ft_in22k_in1k:   0%|          | 0/157 [00:00<?, ?it/s]


Feature extraction complete!


## 3. 2단계: 초경량 모델 훈련

In [None]:
train_features = np.load('/content/drive/MyDrive/train_features_convnext.npy')
train_feature_labels = np.load('/content/drive/MyDrive/train_feature_labels_convnext.npy')
test_features = np.load('/content/drive/MyDrive/test_features_convnext.npy')
print(f"Loaded ConvNeXt train features: {train_features.shape}")

class FeatureDataset(Dataset):
    def __init__(self, features, labels=None): self.features = features; self.labels = labels
    def __len__(self): return len(self.features)
    def __getitem__(self, idx):
        feature = torch.tensor(self.features[idx], dtype=torch.float)
        if self.labels is not None: return feature, torch.tensor(self.labels[idx], dtype=torch.long)
        else: return feature

# ConvNeXt에 맞는 경량 MLP 모델 정의
class SimpleMLP(nn.Module):
    def __init__(self, input_size=768, num_classes=100):
        super().__init__()
        # 768 -> 114 -> 100 구조로 약 99k 파라미터
        self.layers = nn.Sequential(nn.Linear(input_size, 114), nn.ReLU(), nn.Dropout(0.5), nn.Linear(114, num_classes))
    def forward(self, x): return self.layers(x)

# 최종 모델 파라미터 확인
temp_model = SimpleMLP()
final_model_params = sum(p.numel() for p in temp_model.parameters() if p.requires_grad)
print("--- Parameter Check Stage 2: Final Model ---")
print(f"Total parameters of the final submitted model (SimpleMLP): {final_model_params:,}")
if final_model_params < 100000:
    print(f"SUCCESS: The total parameter count ({final_model_params:,}) is under the 100k limit.")
else:
    print("WARNING: Parameter limit exceeded!")
assert final_model_params < 100000, "PARAMETER RULE VIOLATED!"
print("-" * 50)
del temp_model

Loaded ConvNeXt train features: (50000, 768)
--- Parameter Check Stage 2: Final Model ---
Total parameters of the final submitted model (SimpleMLP): 99,166
SUCCESS: The total parameter count (99,166) is under the 100k limit.
--------------------------------------------------


In [None]:
# 4. K-Fold 교차 검증
N_SPLITS = 5; EPOCHS = 100; BATCH_SIZE = 256; LEARNING_RATE = 1e-3

skf = StratifiedKFold(n_splits=N_SPLITS, shuffle=True, random_state=SEED)
feature_dataset = FeatureDataset(train_features, train_feature_labels)

for fold, (train_idx, val_idx) in enumerate(skf.split(train_features, train_feature_labels)):
    print(f'=============== [MLP on ConvNeXt] FOLD {fold+1}/{N_SPLITS} ===============')
    model = SimpleMLP().to(device)
    optimizer = optim.AdamW(model.parameters(), lr=LEARNING_RATE, weight_decay=1e-4)
    scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=EPOCHS)
    criterion = nn.CrossEntropyLoss(label_smoothing=0.1)

    # 체크포인트 로드
    start_epoch = 0; best_val_accuracy = 0.0
    checkpoint_path = f'/content/drive/MyDrive/checkpoint_mlp_convnext_fold_{fold}.pth'
    if os.path.exists(checkpoint_path):
        checkpoint = torch.load(checkpoint_path, map_location=device)
        model.load_state_dict(checkpoint['model_state_dict']); optimizer.load_state_dict(checkpoint['optimizer_state_dict']); scheduler.load_state_dict(checkpoint['scheduler_state_dict'])
        start_epoch = checkpoint['epoch'] + 1; best_val_accuracy = checkpoint['best_val_accuracy']
        print(f'Resuming from Fold {fold+1}, Epoch {start_epoch}...')
        if start_epoch >= EPOCHS: print(f"Fold {fold+1} completed. Skipping..."); continue

    train_subset = Subset(feature_dataset, train_idx); val_subset = Subset(feature_dataset, val_idx)
    train_loader = DataLoader(train_subset, batch_size=BATCH_SIZE, shuffle=True)
    val_loader = DataLoader(val_subset, batch_size=BATCH_SIZE, shuffle=False)
    model_save_path = f'/content/drive/MyDrive/mlp_model_convnext_fold_{fold}.pth'

    for epoch in tqdm(range(start_epoch, EPOCHS), desc=f"MLP Fold {fold+1}"):
        model.train()
        for features, labels in train_loader:
            features, labels = features.to(device), labels.to(device)
            outputs = model(features); loss = criterion(outputs, labels)
            optimizer.zero_grad(); loss.backward(); optimizer.step()
        scheduler.step()

        if (epoch + 1) % 10 == 0:
            model.eval(); val_corrects = 0
            with torch.no_grad():
                for features, labels in val_loader:
                    features, labels = features.to(device), labels.to(device)
                    outputs = model(features); _, preds = torch.max(outputs, 1)
                    val_corrects += torch.sum(preds == labels.data)
            epoch_val_acc = val_corrects.double() / len(val_subset)
            if epoch_val_acc > best_val_accuracy: best_val_accuracy = epoch_val_acc; torch.save(model.state_dict(), model_save_path)

        torch.save({'epoch': epoch, 'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), 'scheduler_state_dict': scheduler.state_dict(), 'best_val_accuracy': best_val_accuracy}, checkpoint_path)
    print(f'Fold {fold+1} Finished. Best Val Acc: {best_val_accuracy:.4f}')



MLP Fold 1:   0%|          | 0/100 [00:00<?, ?it/s]

Fold 1 Finished. Best Val Acc: 0.8277


MLP Fold 2:   0%|          | 0/100 [00:00<?, ?it/s]

Fold 2 Finished. Best Val Acc: 0.8353


MLP Fold 3:   0%|          | 0/100 [00:00<?, ?it/s]

Fold 3 Finished. Best Val Acc: 0.8340


MLP Fold 4:   0%|          | 0/100 [00:00<?, ?it/s]

Fold 4 Finished. Best Val Acc: 0.8343


MLP Fold 5:   0%|          | 0/100 [00:00<?, ?it/s]

Fold 5 Finished. Best Val Acc: 0.8319


## 4. 최종 앙상블 추론 및 제출

In [None]:
final_models = []
for fold in range(N_SPLITS):
    model_path = f'/content/drive/MyDrive/mlp_model_convnext_fold_{fold}.pth'
    model = SimpleMLP(input_size=768).to(device)
    model.load_state_dict(torch.load(model_path, map_location=device))
    model.eval(); final_models.append(model)
print(f'{len(final_models)} final MLP models loaded for ensemble.')

test_loader_final = DataLoader(FeatureDataset(test_features), batch_size=BATCH_SIZE, shuffle=False)
all_predictions = []
with torch.no_grad():
    for features in tqdm(test_loader_final, desc="Final Predicting with MLP"):
        features = features.to(device)
        ensemble_outputs = torch.zeros(features.size(0), 100).to(device)
        for model in final_models: ensemble_outputs += F.softmax(model(features), dim=1)
        _, preds = torch.max(ensemble_outputs, 1)
        all_predictions.extend(preds.cpu().numpy())

sample_submission_path = os.path.join(full_data_path, 'sample_submission.csv')
submission_df = pd.read_csv(sample_submission_path)
submission_df['label'] = all_predictions
submission_file_path = '/content/drive/MyDrive/submission_convnext.csv'
submission_df.to_csv(submission_file_path, index=False)
print(f"Final submission file created at: {submission_file_path}")

5 final MLP models loaded for ensemble.


Final Predicting with MLP:   0%|          | 0/40 [00:00<?, ?it/s]

Final submission file created at: /content/drive/MyDrive/submission_convnext.csv
