In [1]:
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"  # Arrange GPU devices starting from 0
os.environ["CUDA_VISIBLE_DEVICES"]= "5"

In [2]:
!export PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:128

In [3]:
import pandas as pd

data = pd.read_csv("final_wikiart_data.csv")
data.head()

Unnamed: 0,filename,artist,genre,description,phash,width,height,genre_count,subset,artist_ko,title,year
0,Abstract_Expressionism/aaron-siskind_acolman-1...,aaron siskind,['Abstract Expressionism'],acolman-1-1955,bebbeb018a7d80a8,1922,1382,1,train,아론 시스킨드,acolman-1,1955.0
1,Abstract_Expressionism/aaron-siskind_chicago-6...,aaron siskind,['Abstract Expressionism'],chicago-6-1961,d7d0781be51fc00e,1382,1746,1,train,아론 시스킨드,chicago-6,1961.0
2,Abstract_Expressionism/aaron-siskind_glouceste...,aaron siskind,['Abstract Expressionism'],gloucester-16a-1944,9f846e5a6c639325,1382,1857,1,train,아론 시스킨드,gloucester-16a,1944.0
3,Abstract_Expressionism/aaron-siskind_jerome-ar...,aaron siskind,['Abstract Expressionism'],jerome-arizona-1949,a5d691f85ac5e4d0,1382,1849,1,train,아론 시스킨드,jerome-arizona,1949.0
4,Abstract_Expressionism/aaron-siskind_kentucky-...,aaron siskind,['Abstract Expressionism'],kentucky-4-1951,880df359e6b11db1,1382,1625,1,train,아론 시스킨드,kentucky-4,1951.0


In [4]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import transforms
from PIL import Image
import faiss
import numpy as np
from tqdm import tqdm
from concurrent.futures import ThreadPoolExecutor
from torch.cuda.amp import autocast

In [5]:
class SEBlock(nn.Module):
    def __init__(self, in_channels, reduce_ratio=4):
        super(SEBlock, self).__init__()
        reduced_channels = in_channels // reduce_ratio
        self.fc1 = nn.Conv2d(in_channels, reduced_channels, kernel_size=1)
        self.fc2 = nn.Conv2d(reduced_channels, in_channels, kernel_size=1)

    def forward(self, x):
        se = F.adaptive_avg_pool2d(x, 1)
        se = torch.relu(self.fc1(se))
        se = torch.sigmoid(self.fc2(se))
        return x * se


class MBConv(nn.Module):
    def __init__(self, in_channels, out_channels, expansion, stride, se_ratio=0.25):
        super(MBConv, self).__init__()
        self.use_residual = stride == 1 and in_channels == out_channels
        mid_channels = in_channels * expansion

        self.expand_conv = nn.Conv2d(in_channels, mid_channels, kernel_size=1, bias=False)
        self.bn1 = nn.BatchNorm2d(mid_channels)
        self.deptwise_conv = nn.Conv2d(mid_channels, mid_channels, kernel_size=3, stride=stride, padding=1, groups=mid_channels, bias=False)
        self.bn2 = nn.BatchNorm2d(mid_channels)
        self.se = SEBlock(mid_channels, reduce_ratio=int(1 / se_ratio))
        self.project_conv = nn.Conv2d(mid_channels, out_channels, kernel_size=1, bias=False)
        self.bn3 = nn.BatchNorm2d(out_channels)
        self.act = nn.SiLU()

    def forward(self, x):
        residual = x
        x = self.act(self.bn1(self.expand_conv(x)))
        x = self.act(self.bn2(self.deptwise_conv(x)))
        x = self.se(x)
        x = self.bn3(self.project_conv(x))
        if self.use_residual:
            x += residual
        return x


class FusedMBConv(nn.Module):
    def __init__(self, in_channels, out_channels, expansion, stride):
        super(FusedMBConv, self).__init__()
        self.use_residual = stride == 1 and in_channels == out_channels
        mid_channels = in_channels * expansion

        self.expand_conv = nn.Conv2d(in_channels, mid_channels, kernel_size=3, stride=stride, padding=1, bias=False) if expansion != 1 else None
        self.bn1 = nn.BatchNorm2d(mid_channels if expansion != 1 else in_channels)
        self.project_conv = nn.Conv2d(mid_channels if expansion != 1 else in_channels, out_channels, kernel_size=1 if expansion != 1 else 3, stride=1, padding=1 if expansion == 1 else 0, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channels)
        self.act = nn.SiLU()

    def forward(self, x):
        residual = x
        if self.expand_conv:
            x = self.act(self.bn1(self.expand_conv(x)))
        else:
            x = self.act(self.bn1(x))
        x = self.bn2(self.project_conv(x))
        if self.use_residual:
            x += residual
        return x


class EfficientNetV2(nn.Module):
    def __init__(self, num_classes=27):
        super(EfficientNetV2, self).__init__()

        self.stem = nn.Sequential(
            nn.Conv2d(3, 24, kernel_size=3, stride=2, padding=1, bias=False),
            nn.BatchNorm2d(24),
            nn.SiLU()
        )
        self.block_config = [
            (FusedMBConv, 2, 24, 24, 1, 1),
            (FusedMBConv, 4, 24, 48, 4, 2),
            (FusedMBConv, 4, 48, 64, 4, 2),
            (MBConv, 6, 64, 128, 4, 2),
            (MBConv, 9, 128, 160, 6, 1),
            (MBConv, 15, 160, 256, 6, 2)
        ]

        layers = []
        for block, repeats, in_channels, out_channels, expansion, stride in self.block_config:
            for i in range(repeats):
                if i == 0:
                    layers.append(block(in_channels, out_channels, expansion, stride))
                else:
                    layers.append(block(out_channels, out_channels, expansion, 1))
        self.blocks = nn.Sequential(*layers)

        self.head = nn.Sequential(
            nn.Conv2d(256, 1280, kernel_size=1, bias=False),
            nn.BatchNorm2d(1280),
            nn.SiLU(),
            nn.AdaptiveAvgPool2d(1),
            nn.Flatten(),
            nn.Linear(1280, num_classes)
        )

    def forward(self, x):
        x = self.stem(x)
        x = self.blocks(x)
        x = self.head(x)
        return x

In [6]:
def load_model(path='path', device='cuda'):
    model = EfficientNetV2(num_classes=27)
    model.load_state_dict(torch.load(path, map_location=device, weights_only=True))
    model.eval()
    return model

In [7]:
def load_and_transform_image(artwork_path, transform, device):
    if not os.path.exists(artwork_path):
        return np.zeros((512,))
    img = Image.open(artwork_path).convert('RGB')
    img_tensor = transform(img).unsqueeze(0)  # CPU에서 변환
    return img_tensor  # GPU로 옮기지 않음

def compute_gallery_vector_batch(artworks, model, device, num_workers=4, batch_size=4):
    vectors = []
    transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])

    imgs = []
    with ThreadPoolExecutor(max_workers=num_workers) as executor:
        img_futures = [executor.submit(load_and_transform_image, os.path.join(artwork), transform, device) for artwork in artworks]
        
        for future in tqdm(img_futures, desc="Loading and transforming images"):
            img_tensor = future.result()
            if isinstance(img_tensor, torch.Tensor):
                imgs.append(img_tensor)

    if len(imgs) > 0:
        for i in tqdm(range(0, len(imgs), batch_size), desc="Loading Transfer vectors"):
            img_batch = torch.cat(imgs[i:i+batch_size], dim=0).to(device)  # Move batch to the device (GPU/CPU)
            with torch.no_grad():
                model = model.to(device)  # Ensure the model is on the correct device (GPU/CPU)
                embedding_vectors = model(img_batch).to(device).numpy()  # Move the output back to the CPU
                vectors.extend([embedding.flatten() for embedding in embedding_vectors])

            torch.cuda.empty_cache()  # Clear memory after each batch

    if vectors:
        save_np = np.array(vectors)
        np.save('save_np', save_np)
        return save_np
    else:
        return None


In [8]:
# def load_and_transform_image(artwork_path, transform, device):
#     if not os.path.exists(artwork_path):
#         # 이미지가 없으면 빈 벡터를 반환
#         return np.zeros((512,))
#     img = Image.open(artwork_path).convert('RGB')
#     img_tensor = transform(img).unsqueeze(0).to(device)
#     return img_tensor

# def compute_gallery_vector_batch(artworks, model, device, num_workers=4, batch_size=8):
#     vectors = []
#     transform = transforms.Compose([
#         transforms.Resize((224, 224)),
#         transforms.ToTensor(),
#         transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
#     ])

#     imgs = []
#     with ThreadPoolExecutor(max_workers=num_workers) as executor:
#         img_futures = [executor.submit(load_and_transform_image, os.path.join(artwork), transform, device) for artwork in artworks]
        
#         for future in tqdm(img_futures, desc="Loading and transforming images"):
#             img_tensor = future.result()
#             if isinstance(img_tensor, torch.Tensor):
#                 imgs.append(img_tensor)

#     if len(imgs) > 0:
#         for i in range(0, len(imgs), batch_size):
#             img_batch = torch.cat(imgs[i:i+batch_size], dim=0)
#             with torch.no_grad():
#                 with autocast():  # Mixed Precision 적용
#                     embedding_vectors = model(img_batch).to(device).cpu().numpy()
#                     vectors.extend([embedding.flatten() for embedding in embedding_vectors])
#             torch.cuda.empty_cache()  # 메모리 비우기

#     if vectors:
#         save_np = np.array(vectors)
#         np.save('save_np', save_np)
#         return save_np
#     else:
#         return None

In [9]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = load_model(r'best_model(EfficientNetV2).pth', device)

  return self.fget.__get__(instance, owner)()


In [10]:
user_gallery_vector = compute_gallery_vector_batch(list(data['filename']), model, 'cuda')

Loading and transforming images: 100%|██████████| 80042/80042 [15:27<00:00, 86.26it/s] 
Loading Transfer vectors:   0%|          | 0/19837 [00:00<?, ?it/s]


TypeError: can't convert cuda:0 device type tensor to numpy. Use Tensor.cpu() to copy the tensor to host memory first.

In [None]:
np_load = np.load('save_np.npy')
np_load