In [5]:
# data 준비

import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import torchvision.models as models
from transformers import BertTokenizer, BertModel
import random
import numpy as np
from collections import deque
from torch.utils.data import Dataset, DataLoader
import matplotlib.pyplot as plt

# ---------- 설정 ---------- #
BATCH_SIZE = 64
EPOCHS = 1   # 빠른 테스트용, 실제는 더 크게
GAMMA = 0.9
LR = 1e-3
MEMORY_SIZE = 1000
EPSILON = 0.1

# GPU 사용 확인
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# ---------- 데이터셋 로드 ---------- #
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])

# 예시로 CIFAR10 사용, 다른 데이터셋에 맞게 변경 가능
trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)

trainloader = torch.utils.data.DataLoader(trainset, batch_size=BATCH_SIZE, shuffle=True)
testloader = torch.utils.data.DataLoader(testset, batch_size=BATCH_SIZE, shuffle=False)


  from .autonotebook import tqdm as notebook_tqdm
  torch.utils._pytree._register_pytree_node(


In [33]:
# 모델 준비
# ---------- 모델 후보 ---------- #
def prepare_image_model(name):
    if name == "resnet18":
        model = models.resnet18(pretrained=True)
        model.fc = nn.Linear(model.fc.in_features, 10)
    elif name == "googlenet":
        model = models.googlenet(pretrained=True)
        model.fc = nn.Linear(model.fc.in_features, 10)
    elif name == "mobilenet":
        model = models.mobilenet_v2(pretrained=True)
        model.classifier[1] = nn.Linear(model.classifier[1].in_features, 10)
    return model

def prepare_text_model(name):
    if name == "bert":
        model = BertModel.from_pretrained('bert-base-uncased')
    return model, tokenizer

model_list_img = ["resnet18", "googlenet", "mobilenet"]
model_list_txt = ["bert"]


In [8]:
#상태 벡터
# ---------- 상태 벡터 ---------- #
def extract_state(dataset):
    num_classes = len(set(dataset.targets))
    num_samples = len(dataset)
    return torch.tensor([num_samples / 10000, num_classes / 10]).float().to(device)


In [10]:
#DQN model
# ---------- DQN 네트워크 ---------- #
class DQN(nn.Module):
    def __init__(self, state_dim, action_dim):
        super(DQN, self).__init__()
        self.net = nn.Sequential(
            nn.Linear(state_dim, 64),
            nn.ReLU(),
            nn.Linear(64, action_dim)
        )

    def forward(self, x):
        return self.net(x)


In [36]:
def evaluate_model(model, dataloader, is_image_model=True, tokenizer=None):
    model.eval()
    correct = 0
    total = 0

    with torch.no_grad():
        for data in dataloader:
            if is_image_model:
                inputs, labels = data
                inputs = inputs.to(device)
                labels = labels.to(device)
                outputs = model(inputs)
            else:
                texts, labels = data
                inputs = tokenizer(texts, return_tensors="pt", padding=True, truncation=True).to(device)
                labels = labels.to(device)
                outputs = model(**inputs).last_hidden_state[:, 0, :]

            # 간단한 accuracy 계산용 임시 로직
            pred = torch.argmax(outputs, dim=1) if is_image_model else torch.zeros_like(labels)  # ← 텍스트 모델용 정확도 임시 처리
            correct += (pred == labels).sum().item()
            total += labels.size(0)

    return correct / total if total > 0 else 0


In [37]:
#강화학습
# ---------- 강화학습 루프 ---------- #
state_dim = 2  # 상태 벡터의 크기
action_dim_img = len(model_list_img)
action_dim_txt = len(model_list_txt)

dqn_img = DQN(state_dim, action_dim_img).to(device)
dqn_txt = DQN(state_dim, action_dim_txt).to(device)

optimizer_img = optim.Adam(dqn_img.parameters(), lr=LR)
optimizer_txt = optim.Adam(dqn_txt.parameters(), lr=LR)

loss_fn = nn.MSELoss()

memory_img = deque(maxlen=MEMORY_SIZE)
memory_txt = deque(maxlen=MEMORY_SIZE)

# DQN 훈련 루프
for episode in range(10):  # 에피소드 반복
    state = extract_state(trainset)  # 상태 벡터
    
    # 이미지 모델 선택
    if random.random() < EPSILON:
        action_img = random.randint(0, action_dim_img - 1)
    else:
        with torch.no_grad():
            q_values = dqn_img(state)
            action_img = torch.argmax(q_values).item()

    # 텍스트 모델 선택
    if random.random() < EPSILON:
        action_txt = random.randint(0, action_dim_txt - 1)
    else:
        with torch.no_grad():
            q_values = dqn_txt(state)
            action_txt = torch.argmax(q_values).item()

    # 모델 준비
    img_model = prepare_image_model(model_list_img[action_img])
    txt_model, tokenizer = prepare_text_model(model_list_txt[action_txt])

    

    # 모델 평가
    img_accuracy = evaluate_model(img_model, trainloader, is_image_model=True)
    txt_accuracy = evaluate_model(txt_model, trainloader, is_image_model=False, tokenizer=tokenizer)

    reward = (img_accuracy + txt_accuracy) / 2  # 두 모델 성능 평균

    # 상태 업데이트 및 메모리 저장
    next_state = state.clone()

    memory_img.append((state, action_img, reward, next_state))
    memory_txt.append((state, action_txt, reward, next_state))

    # 이미지 모델 경험 리플레이 학습
    if len(memory_img) >= BATCH_SIZE:
        batch = random.sample(memory_img, BATCH_SIZE)
        batch_states, batch_actions, batch_rewards, batch_next_states = zip(*batch)

        batch_states = torch.stack(batch_states).to(device)
        batch_actions = torch.tensor(batch_actions).to(device)
        batch_rewards = torch.tensor(batch_rewards).to(device)

        q_values = dqn_img(batch_states).gather(1, batch_actions.unsqueeze(1)).squeeze()
        loss = loss_fn(q_values, batch_rewards)

        optimizer_img.zero_grad()
        loss.backward()
        optimizer_img.step()

    # 텍스트 모델 경험 리플레이 학습
    if len(memory_txt) >= BATCH_SIZE:
        batch = random.sample(memory_txt, BATCH_SIZE)
        batch_states, batch_actions, batch_rewards, batch_next_states = zip(*batch)


        batch_states = torch.stack(batch_states).to(device)
        batch_actions = torch.tensor(batch_actions).to(device)
        batch_rewards = torch.tensor(batch_rewards).to(device)

        q_values = dqn_txt(batch_states).gather(1, batch_actions.unsqueeze(1)).squeeze()
        loss = loss_fn(q_values, batch_rewards)

        optimizer_txt.zero_grad()
        loss.backward()
        optimizer_txt.step()

    print(f"[에피소드 {episode}] 이미지 정확도: {img_accuracy:.4f}, 텍스트 정확도: {txt_accuracy:.4f}, 보상: {reward:.4f}")

print("훈련 완료")




ValueError: text input must of type `str` (single example), `List[str]` (batch or single pretokenized example) or `List[List[str]]` (batch of pretokenized examples).

In [40]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import torchvision.models as models
from transformers import BertTokenizer, BertModel
import random
import numpy as np
from collections import deque
from torch.utils.data import Dataset, DataLoader

# 설정
BATCH_SIZE = 8  # 작은 배치로 테스트
EPOCHS = 1
GAMMA = 0.9
LR = 1e-3
MEMORY_SIZE = 1000
EPSILON = 0.1
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# 데이터셋 로드 (CIFAR10)
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])
trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=BATCH_SIZE, shuffle=True)

# 이미지 모델 후보
def prepare_image_model(name):
    if name == "resnet18":
        model = models.resnet18(pretrained=True)
        model.fc = nn.Linear(model.fc.in_features, 10)
    elif name == "googlenet":
        model = models.googlenet(pretrained=True)
        model.fc = nn.Linear(model.fc.in_features, 10)
    elif name == "mobilenet":
        model = models.mobilenet_v2(pretrained=True)
        model.classifier[1] = nn.Linear(model.classifier[1].in_features, 10)
    return model.to(device).eval()

# 텍스트 모델 후보 (샘플용 토크나이저만 반환)
def prepare_text_model(name):
    tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
    model = BertModel.from_pretrained('bert-base-uncased').to(device).eval()
    return model, tokenizer

# 상태 벡터 추출
def extract_state(dataset):
    num_classes = len(set(dataset.targets))
    num_samples = len(dataset)
    return torch.tensor([num_samples / 10000, num_classes / 10]).float().to(device)

# DQN 정의
class DQN(nn.Module):
    def __init__(self, state_dim, action_dim):
        super(DQN, self).__init__()
        self.net = nn.Sequential(
            nn.Linear(state_dim, 64),
            nn.ReLU(),
            nn.Linear(64, action_dim)
        )
    def forward(self, x):
        return self.net(x)

# 평가 함수 (텍스트 모델은 더미 정확도 반환)
def evaluate_model(model, dataloader, is_image_model=True, tokenizer=None):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for data in dataloader:
            if is_image_model:
                inputs, labels = data
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                pred = outputs.argmax(dim=1)
                correct += (pred == labels).sum().item()
                total += labels.size(0)
            else:
                # 텍스트 모델 더미 정확도
                return 0.5  # 임의 값
    return correct / total if total > 0 else 0

# DQN 학습 설정
model_list_img = ["resnet18", "googlenet", "mobilenet"]
model_list_txt = ["bert"]
state_dim = 2
action_dim_img = len(model_list_img)
action_dim_txt = len(model_list_txt)

dqn_img = DQN(state_dim, action_dim_img).to(device)
dqn_txt = DQN(state_dim, action_dim_txt).to(device)
optimizer_img = optim.Adam(dqn_img.parameters(), lr=LR)
optimizer_txt = optim.Adam(dqn_txt.parameters(), lr=LR)
loss_fn = nn.MSELoss()
memory_img = deque(maxlen=MEMORY_SIZE)
memory_txt = deque(maxlen=MEMORY_SIZE)

# 훈련 루프
for episode in range(3):
    state = extract_state(trainset)

    # 모델 선택 (Epsilon-greedy)
    with torch.no_grad():
        q_values_img = dqn_img(state)
    action_img = torch.argmax(q_values_img).item() if random.random() > EPSILON else random.randint(0, action_dim_img - 1)

    with torch.no_grad():
        q_values_txt = dqn_txt(state)
    action_txt = torch.argmax(q_values_txt).item() if random.random() > EPSILON else random.randint(0, action_dim_txt - 1)

    # 모델 로딩 및 평가
    img_model = prepare_image_model(model_list_img[action_img])
    txt_model, tokenizer = prepare_text_model(model_list_txt[action_txt])
    img_accuracy = evaluate_model(img_model, trainloader, is_image_model=True)
    txt_accuracy = evaluate_model(txt_model, trainloader, is_image_model=False, tokenizer=tokenizer)

    reward = (img_accuracy + txt_accuracy) / 2
    next_state = state.clone()
    memory_img.append((state, action_img, reward, next_state))
    memory_txt.append((state, action_txt, reward, next_state))

    # 학습 (이미지 DQN)
    if len(memory_img) >= BATCH_SIZE:
        batch = random.sample(memory_img, BATCH_SIZE)
        batch_states, batch_actions, batch_rewards, _ = zip(*batch)
        batch_states = torch.stack(batch_states).to(device)
        batch_actions = torch.tensor(batch_actions).to(device)
        batch_rewards = torch.tensor(batch_rewards).to(device)
        q_values = dqn_img(batch_states).gather(1, batch_actions.unsqueeze(1)).squeeze()
        loss = loss_fn(q_values, batch_rewards)
        optimizer_img.zero_grad()
        loss.backward()
        optimizer_img.step()

    # 학습 (텍스트 DQN)
    if len(memory_txt) >= BATCH_SIZE:
        batch = random.sample(memory_txt, BATCH_SIZE)
        batch_states, batch_actions, batch_rewards, _ = zip(*batch)
        batch_states = torch.stack(batch_states).to(device)
        batch_actions = torch.tensor(batch_actions).to(device)
        batch_rewards = torch.tensor(batch_rewards).to(device)
        q_values = dqn_txt(batch_states).gather(1, batch_actions.unsqueeze(1)).squeeze()
        loss = loss_fn(q_values, batch_rewards)
        optimizer_txt.zero_grad()
        loss.backward()
        optimizer_txt.step()

    print(f"[에피소드 {episode}] 이미지 정확도: {img_accuracy:.4f}, 텍스트 정확도(더미): {txt_accuracy:.4f}, 보상: {reward:.4f}")

print("훈련 완료")





[에피소드 0] 이미지 정확도: 0.0634, 텍스트 정확도(더미): 0.5000, 보상: 0.2817
[에피소드 1] 이미지 정확도: 0.0855, 텍스트 정확도(더미): 0.5000, 보상: 0.2927


KeyboardInterrupt: 

In [50]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torchvision.datasets import Caltech101
from transformers import AutoModelForSequenceClassification, AutoTokenizer
from datasets import load_dataset
from torch.utils.data import DataLoader, Dataset
from sklearn.metrics import accuracy_score
from collections import deque
import numpy as np
import random
from torchvision.datasets import CIFAR10
# ---------- 설정 ---------- #
BATCH_SIZE = 32
EPOCHS = 1  # 빠른 테스트용
GAMMA = 0.95
LR = 1e-4
MEMORY_SIZE = 1000
EPSILON = 0.2
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# ---------- 이미지 데이터셋 (Caltech101) ---------- #
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])


image_dataset = CIFAR10(root="./data", train=True, download=True, transform=transform)
image_loader = DataLoader(image_dataset, batch_size=BATCH_SIZE, shuffle=True)

# ---------- 텍스트 데이터셋 (AG News) ---------- #
ag_dataset = load_dataset("ag_news")
tokenizer_text = AutoTokenizer.from_pretrained("bert-base-uncased")

class AGNewsDataset(Dataset):
    def __init__(self, split):
        self.texts = ag_dataset[split]["text"]
        self.labels = ag_dataset[split]["label"]

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        return self.texts[idx], self.labels[idx]

text_dataset = AGNewsDataset("train")
text_loader = DataLoader(text_dataset, batch_size=BATCH_SIZE, shuffle=True)

# ---------- 상태 벡터 ---------- #
def extract_state(dataset, data_type="image"):
    if data_type == "image":
        num_classes = len(dataset.classes)
        num_samples = len(dataset)
    else:
        num_classes = 4  # AG News는 고정
        num_samples = len(dataset)
    return torch.tensor([num_samples / 10000, num_classes / 10]).float().to(device)

# ---------- 모델 로딩 ---------- #
def load_text_model(name="bert-base-uncased"):
    model = AutoModelForSequenceClassification.from_pretrained(name, num_labels=4).to(device)
    tokenizer = AutoTokenizer.from_pretrained(name)
    return model, tokenizer

def load_image_model(name):
    if name == "resnet18":
        model = torchvision.models.resnet18(pretrained=True)
        model.fc = nn.Linear(model.fc.in_features, 102)
    elif name == "mobilenet":
        model = torchvision.models.mobilenet_v2(pretrained=True)
        model.classifier[1] = nn.Linear(model.classifier[1].in_features, 102)
    elif name == "densenet":
        model = torchvision.models.densenet121(pretrained=True)
        model.classifier = nn.Linear(model.classifier.in_features, 102)
    return model.to(device)

# ---------- 평가 함수 ---------- #
def evaluate_image_model(model, dataloader):
    model.eval()
    preds, labels = [], []
    with torch.no_grad():
        for x, y in dataloader:
            x, y = x.to(device), y.to(device)
            output = model(x)
            pred = torch.argmax(output, dim=1)
            preds.extend(pred.cpu().numpy())
            labels.extend(y.cpu().numpy())
    return accuracy_score(labels, preds)

def evaluate_text_model(model, dataloader, tokenizer):
    model.eval()
    correct, total = 0, 0
    with torch.no_grad():
        for texts, labels in dataloader:
            inputs = tokenizer(list(texts), return_tensors="pt", padding=True, truncation=True, max_length=128).to(device)
            labels = labels.to(device)
            output = model(**inputs)
            pred = torch.argmax(output.logits, dim=1)
            correct += (pred == labels).sum().item()
            total += len(labels)
    return correct / total

# ---------- DQN 정의 ---------- #
class DQN(nn.Module):
    def __init__(self, state_dim, action_dim):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(state_dim, 128),
            nn.ReLU(),
            nn.Linear(128, action_dim)
        )
    def forward(self, x):
        return self.net(x)

dqn_img = DQN(state_dim=2, action_dim=3).to(device)
dqn_txt = DQN(state_dim=2, action_dim=1).to(device)
optimizer_img = optim.Adam(dqn_img.parameters(), lr=LR)
optimizer_txt = optim.Adam(dqn_txt.parameters(), lr=LR)
loss_fn = nn.MSELoss()
memory_img, memory_txt = deque(maxlen=MEMORY_SIZE), deque(maxlen=MEMORY_SIZE)

# ---------- 학습 루프 ---------- #
for episode in range(5):
    state_img = extract_state(image_dataset, "image")
    state_txt = extract_state(text_dataset, "text")

    # 이미지 모델 선택
    action_img = random.randint(0, 2) if random.random() < EPSILON else torch.argmax(dqn_img(state_img)).item()
    # 텍스트 모델은 1개 고정
    action_txt = 0

    img_model = load_image_model(["resnet18", "mobilenet", "densenet"][action_img])
    txt_model, tokenizer = load_text_model("bert-base-uncased")

    # 성능 평가
    img_acc = evaluate_image_model(img_model, image_loader)
    txt_acc = evaluate_text_model(txt_model, text_loader, tokenizer)
    reward = (img_acc + txt_acc) / 2

    memory_img.append((state_img, action_img, reward, state_img.clone()))
    memory_txt.append((state_txt, action_txt, reward, state_txt.clone()))

    # 경험 리플레이 - 이미지
    if len(memory_img) >= BATCH_SIZE:
        batch = random.sample(memory_img, BATCH_SIZE)
        states, actions, rewards, next_states = zip(*batch)
        states = torch.stack(states)
        actions = torch.tensor(actions).unsqueeze(1).to(device)
        rewards = torch.tensor(rewards).to(device)

        q_vals = dqn_img(states).gather(1, actions).squeeze()
        loss = loss_fn(q_vals, rewards)
        optimizer_img.zero_grad()
        loss.backward()
        optimizer_img.step()

    # 경험 리플레이 - 텍스트
    if len(memory_txt) >= BATCH_SIZE:
        batch = random.sample(memory_txt, BATCH_SIZE)
        states, actions, rewards, next_states = zip(*batch)
        states = torch.stack(states)
        actions = torch.tensor(actions).unsqueeze(1).to(device)
        rewards = torch.tensor(rewards).to(device)

        q_vals = dqn_txt(states).gather(1, actions).squeeze()
        loss = loss_fn(q_vals, rewards)
        optimizer_txt.zero_grad()
        loss.backward()
        optimizer_txt.step()

    print(f"[EP {episode}] 이미지 정확도: {img_acc:.4f}, 텍스트 정확도: {txt_acc:.4f}, 보상: {reward:.4f}")
print("✅ 훈련 완료")


Downloading: "https://download.pytorch.org/models/densenet121-a639ec97.pth" to C:\Users\bokyung/.cache\torch\hub\checkpoints\densenet121-a639ec97.pth
100%|██████████| 30.8M/30.8M [00:02<00:00, 11.7MB/s]
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[EP 0] 이미지 정확도: 0.0010, 텍스트 정확도: 0.2513, 보상: 0.1262


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[EP 1] 이미지 정확도: 0.0036, 텍스트 정확도: 0.3053, 보상: 0.1545


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[EP 2] 이미지 정확도: 0.0047, 텍스트 정확도: 0.2534, 보상: 0.1290


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[EP 3] 이미지 정확도: 0.0001, 텍스트 정확도: 0.2469, 보상: 0.1235


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[EP 4] 이미지 정확도: 0.0083, 텍스트 정확도: 0.2404, 보상: 0.1244
✅ 훈련 완료


In [52]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import torch.nn.functional as F

from torch.utils.data import DataLoader, Subset
from transformers import BertTokenizer, BertForSequenceClassification, DistilBertForSequenceClassification, RobertaForSequenceClassification
from transformers import AdamW
from sklearn.model_selection import train_test_split

import numpy as np
import random

# -------------------------------
# 1. 환경 설정
# -------------------------------
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
torch.manual_seed(42)
np.random.seed(42)
random.seed(42)

# -------------------------------
# 2. 모델 후보
# -------------------------------

IMAGE_MODELS = {
    "resnet18": torchvision.models.resnet18,
    "mobilenet": torchvision.models.mobilenet_v2,
    "densenet": torchvision.models.densenet121,
}

TEXT_MODELS = {
    "bert-base-uncased": BertForSequenceClassification,
    "distilbert-base-uncased": DistilBertForSequenceClassification,
    "roberta-base": RobertaForSequenceClassification,
}

# -------------------------------
# 3. DQN (선택기)
# -------------------------------

class DQN(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(DQN, self).__init__()
        self.fc1 = nn.Linear(input_dim, 32)
        self.fc2 = nn.Linear(32, output_dim)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        return self.fc2(x)

# -------------------------------
# 4. 간단한 이미지 데이터셋 로딩 (CIFAR10)
# -------------------------------

def get_image_dataset():
    transform = transforms.Compose([
        transforms.Resize((64, 64)),
        transforms.ToTensor()
    ])
    train_set = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
    test_set = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
    return train_set, test_set

# -------------------------------
# 5. 데이터 특성 추출 함수
# -------------------------------

def extract_image_state(dataset):
    loader = DataLoader(dataset, batch_size=64, shuffle=True)
    images, labels = next(iter(loader))
    mean = images.mean().item()
    std = images.std().item()
    num_classes = len(set(labels.numpy()))
    return torch.tensor([mean, std, num_classes], dtype=torch.float32)

# -------------------------------
# 6. 모델 학습 및 평가 함수
# -------------------------------

def train_and_evaluate_image_model(model_fn, train_set, test_set, epochs=1):
    model = model_fn(pretrained=False, num_classes=10).to(device)
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    criterion = nn.CrossEntropyLoss()

    train_loader = DataLoader(train_set, batch_size=64, shuffle=True)
    test_loader = DataLoader(test_set, batch_size=64, shuffle=False)

    model.train()
    for epoch in range(epochs):
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            output = model(images)
            loss = criterion(output, labels)
            loss.backward()
            optimizer.step()

    # 평가
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            output = model(images)
            _, predicted = torch.max(output.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    accuracy = correct / total
    return accuracy

# -------------------------------
# 7. 메인 루프: 모델 선택기 훈련
# -------------------------------

def train_model_selector():
    train_set, test_set = get_image_dataset()

    dqn = DQN(input_dim=3, output_dim=len(IMAGE_MODELS)).to(device)
    optimizer = optim.Adam(dqn.parameters(), lr=0.01)
    criterion = nn.MSELoss()

    for episode in range(10):
        state = extract_image_state(train_set).to(device)

        # DQN이 선택한 모델
        q_values = dqn(state)
        action = torch.argmax(q_values).item()
        selected_model_key = list(IMAGE_MODELS.keys())[action]
        model_fn = IMAGE_MODELS[selected_model_key]

        # 선택된 모델로 학습 및 평가
        reward = train_and_evaluate_image_model(model_fn, train_set, test_set, epochs=1)

        # Q 업데이트
        target = q_values.clone().detach()
        target[action] = reward

        # 학습
        output = dqn(state)
        loss = criterion(output, target)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        print(f"Episode {episode + 1}: Selected={selected_model_key}, Accuracy={reward:.4f}")

    print("모델 선택기 학습 완료.")

train_model_selector()




Episode 1: Selected=resnet18, Accuracy=0.5843
Episode 2: Selected=resnet18, Accuracy=0.4973
Episode 3: Selected=densenet, Accuracy=0.6301
Episode 4: Selected=densenet, Accuracy=0.6242
Episode 5: Selected=densenet, Accuracy=0.6016
Episode 6: Selected=mobilenet, Accuracy=0.5062
Episode 7: Selected=densenet, Accuracy=0.5819
Episode 8: Selected=densenet, Accuracy=0.5933
Episode 9: Selected=densenet, Accuracy=0.6084
Episode 10: Selected=densenet, Accuracy=0.6667
모델 선택기 학습 완료.


In [54]:
# Adaptive Model Selector using DQN (Text Version)
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from transformers import BertTokenizer, BertForSequenceClassification, \
                         DistilBertTokenizer, DistilBertForSequenceClassification, \
                         RobertaTokenizer, RobertaForSequenceClassification
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader, Dataset
import numpy as np
import random
from datasets import load_dataset

# AG News 데이터 불러오기 (train만 사용)
dataset = load_dataset("ag_news")

# 텍스트 및 라벨 추출
raw_texts = dataset['train']['text']
raw_labels = dataset['train']['label']  # 0~3: World, Sports, Business, Sci/Tech

# 이진 분류용으로 라벨 변경 (예: World/Sports vs Business/SciTech)
texts = raw_texts[:1000]  # 빠른 테스트를 위해 1000개만 사용
labels = [1 if label in [0, 1] else 0 for label in raw_labels[:1000]]


# 모델 후보
TEXT_MODELS = {
    "bert": (BertTokenizer.from_pretrained("bert-base-uncased"), BertForSequenceClassification),
    "distilbert": (DistilBertTokenizer.from_pretrained("distilbert-base-uncased"), DistilBertForSequenceClassification),
    "roberta": (RobertaTokenizer.from_pretrained("roberta-base"), RobertaForSequenceClassification),
}

# DQN 정의
class DQN(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(DQN, self).__init__()
        self.fc1 = nn.Linear(input_dim, 32)
        self.fc2 = nn.Linear(32, output_dim)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        return self.fc2(x)


# 커스텀 Dataset
class TextDataset(Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        return {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}, torch.tensor(self.labels[idx])

# 특성 추출
def extract_text_state(texts):
    lengths = [len(t.split()) for t in texts]
    mean_len = np.mean(lengths)
    std_len = np.std(lengths)
    vocab_size = len(set(word for text in texts for word in text.lower().split()))
    return torch.tensor([mean_len, std_len, vocab_size], dtype=torch.float32)

# 모델 학습 및 평가
def train_and_evaluate_text_model(tokenizer, model_cls, train_texts, train_labels, test_texts, test_labels, epochs=1):
    model = model_cls.from_pretrained(tokenizer.name_or_path, num_labels=2).to(device)
    optimizer = optim.Adam(model.parameters(), lr=2e-5)

    train_encodings = tokenizer(train_texts, truncation=True, padding=True, return_tensors="pt")
    test_encodings = tokenizer(test_texts, truncation=True, padding=True, return_tensors="pt")

    train_dataset = TextDataset(train_encodings, train_labels)
    test_dataset = TextDataset(test_encodings, test_labels)

    train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=4)

    model.train()
    for epoch in range(epochs):
        for batch in train_loader:
            inputs, labels = batch
            inputs = {k: v.to(device) for k, v in inputs.items()}
            labels = labels.to(device)
            outputs = model(**inputs, labels=labels)
            loss = outputs.loss
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()

    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for batch in test_loader:
            inputs, labels = batch
            inputs = {k: v.to(device) for k, v in inputs.items()}
            labels = labels.to(device)
            outputs = model(**inputs)
            logits = outputs.logits
            preds = torch.argmax(logits, dim=1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)

    return correct / total

# DQN 훈련 루프
def train_text_model_selector():
    train_texts, test_texts, train_labels, test_labels = train_test_split(texts, labels, test_size=0.3)

    dqn = DQN(input_dim=3, output_dim=len(TEXT_MODELS)).to(device)
    optimizer = optim.Adam(dqn.parameters(), lr=0.01)
    criterion = nn.MSELoss()

    for episode in range(5):
        state = extract_text_state(train_texts).to(device)

        q_values = dqn(state)
        action = torch.argmax(q_values).item()
        selected_key = list(TEXT_MODELS.keys())[action]
        tokenizer, model_cls = TEXT_MODELS[selected_key]

        reward = train_and_evaluate_text_model(tokenizer, model_cls, train_texts, train_labels, test_texts, test_labels)

        target = q_values.clone().detach()
        target[action] = reward

        output = dqn(state)
        loss = criterion(output, target)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        print(f"Episode {episode+1}: Selected={selected_key}, Accuracy={reward:.4f}")

    print("텍스트 모델 선택기 학습 완료.")

train_text_model_selector()


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  return {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}, torch.tensor(self.labels[idx])


Episode 1: Selected=bert, Accuracy=0.9333


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Episode 2: Selected=bert, Accuracy=0.9600


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.out_proj.bias', 'classifier.dense.weight', 'classifier.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Episode 3: Selected=roberta, Accuracy=0.9033


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['pre_classifier.bias', 'classifier.bias', 'classifier.weight', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Episode 4: Selected=distilbert, Accuracy=0.9533


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['pre_classifier.bias', 'classifier.bias', 'classifier.weight', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Episode 5: Selected=distilbert, Accuracy=0.9400
텍스트 모델 선택기 학습 완료.


In [55]:
from torchvision.datasets import STL10
from torchvision import transforms
from torch.utils.data import DataLoader

# STL10 데이터셋 로드 및 전처리
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])

stl_train = STL10(root='./data', split='train', download=True, transform=transform)
stl_test = STL10(root='./data', split='test', download=True, transform=transform)

train_loader = DataLoader(stl_train, batch_size=64, shuffle=True)
test_loader = DataLoader(stl_test, batch_size=64, shuffle=False)

# 이미지 상태 벡터 생성 함수 호출
first_batch = next(iter(train_loader))[0]
image_state = extract_image_state(first_batch)

# 모델 선택기 훈련
train_image_model_selector(train_loader, test_loader, image_state)


100%|██████████| 2.64G/2.64G [09:54<00:00, 4.44MB/s]  


ValueError: too many values to unpack (expected 2)

In [None]:
from datasets import load_dataset

# Yelp 데이터셋 로딩 (샘플 제한 가능)
dataset = load_dataset("yelp_polarity")
texts = dataset['train']['text'][:2000]
labels = dataset['train']['label'][:2000]

# 상태 벡터 추출
state = extract_text_state(texts)

# 텍스트 모델 선택기 훈련
train_text_model_selector(texts, labels)


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from transformers import BertTokenizer, BertForSequenceClassification, \
                         DistilBertTokenizer, DistilBertForSequenceClassification, \
                         RobertaTokenizer, RobertaForSequenceClassification
from torch.utils.data import DataLoader, Dataset
import numpy as np
import random
import torchvision.models as models
from sklearn.model_selection import train_test_split
from torchvision import transforms
from PIL import Image
from datasets import load_dataset


# 환경 설정
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
torch.manual_seed(42)
np.random.seed(42)
random.seed(42)

# 텍스트 모델 후보
TEXT_MODELS = {
    "bert": (BertTokenizer.from_pretrained("bert-base-uncased"), BertForSequenceClassification),
    "distilbert": (DistilBertTokenizer.from_pretrained("distilbert-base-uncased"), DistilBertForSequenceClassification),
    "roberta": (RobertaTokenizer.from_pretrained("roberta-base"), RobertaForSequenceClassification),
}

# 이미지 모델 후보
IMAGE_MODELS = {
    "resnet18": models.resnet18(pretrained=True),
    "vgg16": models.vgg16(pretrained=True),
}

# DQN 정의 (모델 선택기)
class DQN(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(DQN, self).__init__()
        self.fc1 = nn.Linear(input_dim, 32)
        self.fc2 = nn.Linear(32, output_dim)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        return self.fc2(x)


# AG News 데이터 불러오기 (train만 사용)
dataset = load_dataset("ag_news")

# 텍스트 및 라벨 추출
raw_texts = dataset['train']['text']
raw_labels = dataset['train']['label']  # 0~3: World, Sports, Business, Sci/Tech

# 이진 분류용으로 라벨 변경 (예: World/Sports vs Business/SciTech)
texts = raw_texts[:1000]  # 빠른 테스트를 위해 1000개만 사용
labels = [1 if label in [0, 1] else 0 for label in raw_labels[:1000]]

# 이미지 데이터 예시 (단순화된 예시)
image_paths = ["image1.jpg", "image2.jpg"]  # 실제 이미지 경로를 입력하세요
image_labels = [0, 1]  # 예시 라벨

# 텍스트 데이터셋 정의
class TextDataset(Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        return {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}, torch.tensor(self.labels[idx])

# 이미지 데이터셋 정의
class ImageDataset(Dataset):
    def __init__(self, image_paths, labels, transform=None):
        self.image_paths = image_paths
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        image = Image.open(self.image_paths[idx])
        if self.transform:
            image = self.transform(image)
        return image, torch.tensor(self.labels[idx])

# 이미지 데이터에 대한 전처리
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])

# 텍스트 모델 학습 및 평가
def train_and_evaluate_text_model(tokenizer, model_cls, train_texts, train_labels, test_texts, test_labels, epochs=1):
    model = model_cls.from_pretrained(tokenizer.name_or_path, num_labels=2).to(device)
    optimizer = optim.Adam(model.parameters(), lr=2e-5)

    train_encodings = tokenizer(train_texts, truncation=True, padding=True, return_tensors="pt")
    test_encodings = tokenizer(test_texts, truncation=True, padding=True, return_tensors="pt")

    train_dataset = TextDataset(train_encodings, train_labels)
    test_dataset = TextDataset(test_encodings, test_labels)

    train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=4)

    model.train()
    for epoch in range(epochs):
        for batch in train_loader:
            inputs, labels = batch
            inputs = {k: v.to(device) for k, v in inputs.items()}
            labels = labels.to(device)
            outputs = model(**inputs, labels=labels)
            loss = outputs.loss
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()

    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for batch in test_loader:
            inputs, labels = batch
            inputs = {k: v.to(device) for k, v in inputs.items()}
            labels = labels.to(device)
            outputs = model(**inputs)
            logits = outputs.logits
            preds = torch.argmax(logits, dim=1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)

    return correct / total

# 이미지 모델 학습 및 평가
def train_and_evaluate_image_model(model, train_images, train_labels, test_images, test_labels, epochs=1):
    model = model.to(device)
    model.train()
    optimizer = optim.Adam(model.parameters(), lr=1e-4)
    criterion = nn.CrossEntropyLoss()

    train_dataset = ImageDataset(train_images, train_labels, transform=transform)
    test_dataset = ImageDataset(test_images, test_labels, transform=transform)

    train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=4)

    for epoch in range(epochs):
        for batch in train_loader:
            images, labels = batch
            images = images.to(device)
            labels = labels.to(device)

            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for batch in test_loader:
            images, labels = batch
            images = images.to(device)
            labels = labels.to(device)
            outputs = model(images)
            _, preds = torch.max(outputs, 1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)

    return correct / total

# 데이터 유형에 맞는 모델 선택
def select_model(data_type, data, train_labels, test_labels):
    if data_type == 'text':
        # 텍스트 데이터에 적합한 모델을 선택
        tokenizer, model_cls = TEXT_MODELS["bert"]
        accuracy = train_and_evaluate_text_model(tokenizer, model_cls, data, train_labels, test_labels)
    elif data_type == 'image':
        # 이미지 데이터에 적합한 모델을 선택
        model = IMAGE_MODELS["resnet18"]
        accuracy = train_and_evaluate_image_model(model, data, train_labels, test_labels)
    else:
        raise ValueError("Unknown data type")

    return accuracy

# 텍스트 모델 선택기 학습
def train_text_model_selector():
    train_texts, test_texts, train_labels, test_labels = train_test_split(texts, labels, test_size=0.3)
    
    dqn = DQN(input_dim=3, output_dim=len(TEXT_MODELS)).to(device)
    optimizer = optim.Adam(dqn.parameters(), lr=0.01)
    criterion = nn.MSELoss()

    for episode in range(5):
        state = torch.tensor([1.0, 0.5, 10.0], dtype=torch.float32).to(device)  # 예시 상태
        q_values = dqn(state)
        action = torch.argmax(q_values).item()
        selected_key = list(TEXT_MODELS.keys())[action]
        tokenizer, model_cls = TEXT_MODELS[selected_key]

        reward = train_and_evaluate_text_model(tokenizer, model_cls, train_texts, train_labels, test_texts, test_labels)

        target = q_values.clone().detach()
        target[action] = reward

        output = dqn(state)
        loss = criterion(output, target)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        print(f"Episode {episode+1}: Selected={selected_key}, Accuracy={reward:.4f}")

    print("텍스트 모델 선택기 학습 완료.")

# 예시 실행
train_text_model_selector()

# 이미지 모델 선택기 사용 예시
image_accuracy = select_model('image', image_paths, image_labels, image_labels)
print("이미지 모델 정확도:", image_accuracy)


Downloading: "https://download.pytorch.org/models/vgg16-397923af.pth" to C:\Users\bokyung/.cache\torch\hub\checkpoints\vgg16-397923af.pth
100%|██████████| 528M/528M [00:48<00:00, 11.5MB/s] 
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['pre_classifier.bias', 'classifier.bias', 'classifier.weight', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  return {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}, torch.tensor(self.labels[idx])


Episode 1: Selected=distilbert, Accuracy=0.9433


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['pre_classifier.bias', 'classifier.bias', 'classifier.weight', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Episode 2: Selected=distilbert, Accuracy=0.9167


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['pre_classifier.bias', 'classifier.bias', 'classifier.weight', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Episode 3: Selected=distilbert, Accuracy=0.9467


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['pre_classifier.bias', 'classifier.bias', 'classifier.weight', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
