In [1]:
import gc
import os
import random
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import cv2

from sklearn import preprocessing
from sklearn.model_selection import train_test_split

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torchsummary import summary

from tqdm.auto import tqdm

import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2

import torchvision.models as models

from sklearn.metrics import f1_score

import warnings
warnings.filterwarnings(action='ignore') 

In [2]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

In [3]:
CFG = {
    'IMG_SIZE': 64,
    'EPOCHS': 100,
    'G_LEARNING_RATE': 5e-4,
    'D_LEARNING_RATE': 5e-4,
    'BATCH_SIZE': 32,
    'SEED': 2023,
    'LATENT_SIZE': 256
}

In [4]:
def clear_mem():
    gc.collect()
    torch.cuda.empty_cache()

In [5]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

seed_everything(CFG['SEED']) # Seed 고정

In [6]:
data_path = '../../data'
train_df = pd.read_csv(os.path.join(data_path, 'train.csv'))
train_df.loc[3896, 'artist'] = 'Titian'
train_df.loc[3986, 'artist'] = 'Alfred Sisley'

In [7]:
# Label Encoding
le = preprocessing.LabelEncoder()
train_df['artist'] = le.fit_transform(train_df['artist'].values)

In [8]:
train_df = train_df.sort_values(by=['id'])
train_df.head()

Unnamed: 0,id,img_path,artist
0,0,./train/0000.jpg,9
1,1,./train/0001.jpg,48
2,2,./train/0002.jpg,7
3,3,./train/0003.jpg,10
4,4,./train/0004.jpg,24


In [9]:
train_df['artist'].value_counts()

artist
48    629
10    489
33    303
38    233
0     220
35    220
15    204
42    181
46    173
30    173
1     165
36    142
43    137
2     132
4     132
21    121
45    120
32    118
24    115
29    101
44     99
37     97
28     91
40     85
16     84
9      81
27     76
3      74
41     73
18     72
19     69
13     65
5      64
26     64
11     62
23     61
47     60
39     59
7      59
22     52
8      50
12     44
49     44
20     42
31     34
34     33
6      32
17     30
14     26
25     21
Name: count, dtype: int64

In [10]:
def get_data(df, infer=False):
    if infer:
        return df['img_path'].apply(lambda p: os.path.join(data_path, p)).values
    return df['img_path'].apply(lambda p: os.path.join(data_path, p)).values, df['artist'].values

In [11]:
train_img_paths, train_labels = get_data(train_df)

In [12]:
from torchvision.transforms import ToTensor


class CustomDataset(Dataset):
    def __init__(self, img_paths, labels, transforms=None):
        self.img_paths = img_paths
        self.labels = labels
        self.transforms = transforms

    def __getitem__(self, index):
        img_path = self.img_paths[index]
        image = cv2.imread(img_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        if transforms:
            image = self.transforms(image)
        
        if self.labels is not None:
            label = self.labels[index]
            return image, label
        else:
            return image
    
    def __len__(self):
        return len(self.img_paths)

In [13]:
from torchvision import transforms
train_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Resize(CFG['IMG_SIZE']),
    transforms.CenterCrop(64),
    transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)),
])

In [14]:
def seed_worker(worker_id):
    worker_seed = torch.initial_seed() % 2 ** 32
    np.random.seed(worker_seed)
    random.seed(worker_seed)
g = torch.Generator()
g.manual_seed(0)

<torch._C.Generator at 0x1be3ec75c90>

In [15]:
train_dataset = CustomDataset(train_img_paths, train_labels, train_transform)
train_loader = DataLoader(train_dataset, batch_size = CFG['BATCH_SIZE'], shuffle=True, worker_init_fn=seed_worker, generator=g, num_workers=0)

# 모델 정의

In [16]:
# GAN 생성자 정의하기
class Generator(torch.nn.Module):
    def __init__(self):
        super(Generator, self).__init__()
        self.gen = nn.Sequential(
             # input is Z, going into a convolution
            nn.ConvTranspose2d(CFG['LATENT_SIZE'], 512, kernel_size=4, bias=False),
            nn.BatchNorm2d(512),
            nn.ReLU(),
            
            nn.ConvTranspose2d(512, 256, kernel_size=4, stride=2, padding=1, bias=False),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            
            nn.ConvTranspose2d(256, 128, kernel_size=4, stride=2, padding=1, bias=False),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            
            nn.ConvTranspose2d(128, 64, kernel_size=4, stride=2, padding=1, bias=False),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            
            nn.ConvTranspose2d(64, 3, kernel_size=4, stride=2, padding=1, bias=False),
            nn.Tanh(),
        )
    def forward(self, x):
        return self.gen(x)

In [17]:
summary(Generator(), (CFG['LATENT_SIZE'],1,1))

Layer (type:depth-idx)                   Output Shape              Param #
├─Sequential: 1-1                        [-1, 3, 64, 64]           --
|    └─ConvTranspose2d: 2-1              [-1, 512, 4, 4]           2,097,152
|    └─BatchNorm2d: 2-2                  [-1, 512, 4, 4]           1,024
|    └─ReLU: 2-3                         [-1, 512, 4, 4]           --
|    └─ConvTranspose2d: 2-4              [-1, 256, 8, 8]           2,097,152
|    └─BatchNorm2d: 2-5                  [-1, 256, 8, 8]           512
|    └─ReLU: 2-6                         [-1, 256, 8, 8]           --
|    └─ConvTranspose2d: 2-7              [-1, 128, 16, 16]         524,288
|    └─BatchNorm2d: 2-8                  [-1, 128, 16, 16]         256
|    └─ReLU: 2-9                         [-1, 128, 16, 16]         --
|    └─ConvTranspose2d: 2-10             [-1, 64, 32, 32]          131,072
|    └─BatchNorm2d: 2-11                 [-1, 64, 32, 32]          128
|    └─ReLU: 2-12                        [-1, 64, 32, 3

Layer (type:depth-idx)                   Output Shape              Param #
├─Sequential: 1-1                        [-1, 3, 64, 64]           --
|    └─ConvTranspose2d: 2-1              [-1, 512, 4, 4]           2,097,152
|    └─BatchNorm2d: 2-2                  [-1, 512, 4, 4]           1,024
|    └─ReLU: 2-3                         [-1, 512, 4, 4]           --
|    └─ConvTranspose2d: 2-4              [-1, 256, 8, 8]           2,097,152
|    └─BatchNorm2d: 2-5                  [-1, 256, 8, 8]           512
|    └─ReLU: 2-6                         [-1, 256, 8, 8]           --
|    └─ConvTranspose2d: 2-7              [-1, 128, 16, 16]         524,288
|    └─BatchNorm2d: 2-8                  [-1, 128, 16, 16]         256
|    └─ReLU: 2-9                         [-1, 128, 16, 16]         --
|    └─ConvTranspose2d: 2-10             [-1, 64, 32, 32]          131,072
|    └─BatchNorm2d: 2-11                 [-1, 64, 32, 32]          128
|    └─ReLU: 2-12                        [-1, 64, 32, 3

In [18]:
# GAN 감별자 정의하기
class Discriminator(nn.Module):
    def __init__(self):
        super(Discriminator, self).__init__()
        
        self.disc =  nn.Sequential(            
            nn.Conv2d(3, 64, kernel_size=4, stride=2, padding=1, bias=False),
            nn.BatchNorm2d(64),
            nn.LeakyReLU(0.2),

            nn.Conv2d(64, 128, kernel_size=4, stride=2, padding=1, bias=False),
            nn.BatchNorm2d(128),
            nn.LeakyReLU(0.2),
            
            nn.Conv2d(128, 256, kernel_size=4, stride=2, padding=1, bias=False),
            nn.BatchNorm2d(256),
            nn.LeakyReLU(0.2),
            
            nn.Conv2d(256, 512, kernel_size=4, stride=2, padding=1, bias=False),
            nn.BatchNorm2d(512),
            nn.LeakyReLU(0.2),
            
            nn.Conv2d(512, 1, kernel_size=4),
            nn.Sigmoid()
        )
        
    def forward(self, x):
        return self.disc(x)

In [19]:
summary(Discriminator(), (3,64,64))

Layer (type:depth-idx)                   Output Shape              Param #
├─Sequential: 1-1                        [-1, 1, 1, 1]             --
|    └─Conv2d: 2-1                       [-1, 64, 32, 32]          3,072
|    └─BatchNorm2d: 2-2                  [-1, 64, 32, 32]          128
|    └─LeakyReLU: 2-3                    [-1, 64, 32, 32]          --
|    └─Conv2d: 2-4                       [-1, 128, 16, 16]         131,072
|    └─BatchNorm2d: 2-5                  [-1, 128, 16, 16]         256
|    └─LeakyReLU: 2-6                    [-1, 128, 16, 16]         --
|    └─Conv2d: 2-7                       [-1, 256, 8, 8]           524,288
|    └─BatchNorm2d: 2-8                  [-1, 256, 8, 8]           512
|    └─LeakyReLU: 2-9                    [-1, 256, 8, 8]           --
|    └─Conv2d: 2-10                      [-1, 512, 4, 4]           2,097,152
|    └─BatchNorm2d: 2-11                 [-1, 512, 4, 4]           1,024
|    └─LeakyReLU: 2-12                   [-1, 512, 4, 4]   

Layer (type:depth-idx)                   Output Shape              Param #
├─Sequential: 1-1                        [-1, 1, 1, 1]             --
|    └─Conv2d: 2-1                       [-1, 64, 32, 32]          3,072
|    └─BatchNorm2d: 2-2                  [-1, 64, 32, 32]          128
|    └─LeakyReLU: 2-3                    [-1, 64, 32, 32]          --
|    └─Conv2d: 2-4                       [-1, 128, 16, 16]         131,072
|    └─BatchNorm2d: 2-5                  [-1, 128, 16, 16]         256
|    └─LeakyReLU: 2-6                    [-1, 128, 16, 16]         --
|    └─Conv2d: 2-7                       [-1, 256, 8, 8]           524,288
|    └─BatchNorm2d: 2-8                  [-1, 256, 8, 8]           512
|    └─LeakyReLU: 2-9                    [-1, 256, 8, 8]           --
|    └─Conv2d: 2-10                      [-1, 512, 4, 4]           2,097,152
|    └─BatchNorm2d: 2-11                 [-1, 512, 4, 4]           1,024
|    └─LeakyReLU: 2-12                   [-1, 512, 4, 4]   

In [20]:
# weight 초기화
def weights_init(m):
    classname = m.__class__.__name__
    if classname.find('Conv') != -1: # conv
        nn.init.normal_(m.weight.data, 0.0, 0.02)
    elif classname.find('BatchNorm') != -1: # batch norm
        nn.init.normal_(m.weight.data, 1.0, 0.02)
        nn.init.constant_(m.bias.data, 0)

# 모델 훈련

In [21]:
from torch.optim.adam import Adam

# 생성자 정의
G = Generator().to(device)
G.apply(weights_init)

# 감별자 정의
D = Discriminator().to(device)
D.apply(weights_init)

G_optim = Adam(G.parameters(), lr=CFG['G_LEARNING_RATE'], betas=(0.5, 0.999))
D_optim = Adam(D.parameters(), lr=CFG['D_LEARNING_RATE'], betas=(0.5, 0.999))

In [1]:
# 감별자 학습 루프
min_loss = 9999
for epoch in range(CFG['EPOCHS']):
    iterator = tqdm(enumerate(train_loader, 0), total=len(train_loader))
    d_loss_list = []
    g_loss_list = []
    
    for i, data in iterator:
        D_optim.zero_grad()
        label = torch.ones_like(data[1], dtype=torch.float32).to(device)
        label_fake = torch.zeros_like(data[1], dtype=torch.float32).to(device)
        
        # 진짜 이미지 학습
        real = D(data[0].to(device))
        Dloss_real = nn.BCELoss()(torch.squeeze(real), label)
        Dloss_real.backward()
        
        # 가짜 이미지 학습
        # (N, 256, 1, 1)
        noise = torch.randn(label.shape[0], CFG['LATENT_SIZE'], 1, 1, device=device)
        # (N, 3, 64, 64)
        fake = G(noise)
        output = D(fake.detach()) # 여기서는 생성자 학습을 하지 않기 때문에 detach
        Dloss_fake = nn.BCELoss()(torch.squeeze(output), label_fake)
        Dloss_fake.backward()
        
        # 간별자의 전체 오차 계산
        Dloss = Dloss_real + Dloss_fake
        D_optim.step()
        
        # 생성자 학습
        G_optim.zero_grad()
        output = D(fake)
        Gloss = nn.BCELoss()(torch.squeeze(output), label)
        Gloss.backward()
        G_optim.step()
        
        d_current_loss = Dloss.detach().cpu().item()
        g_current_loss = Gloss.detach().cpu().item()
        d_loss_list.append(d_current_loss)
        g_loss_list.append(g_current_loss)
        g_loss_list.append(Gloss.detach().item())
        iterator.set_description(f'epoch:{epoch+1} iteration:{i+1} D_loss:{d_current_loss:.4f} G_loss:{g_current_loss:.4f}')
    
    d_loss_value = np.mean(d_loss_list)
    g_loss_value = np.mean(g_loss_list)
    total_loss = d_loss_value+g_loss_value
    if total_loss < min_loss:
        min_loss = total_loss
        print(f'**epoch : {epoch+1}, total_loss : {total_loss:.4f}, d_loss : {d_loss_value:.4f}, g_loss : {g_loss_value:.4f}')
        torch.save(G.state_dict(), os.path.join(data_path, 'Generator.pth'))
        torch.save(D.state_dict(), os.path.join(data_path, 'Discriminator.pth'))
    else:
        print(f'epoch : {epoch+1}, total_loss : {total_loss:.4f}, d_loss : {d_loss_value:.4f}, g_loss : {g_loss_value:.4f}')    
    clear_mem()

NameError: name 'CFG' is not defined

In [None]:
with torch.no_grad():
    G.load_state_dict(torch.load(os.path.join(data_path, 'Generator.pth'), map_location=device))
    feature_vector = torch.randn(1, CFG['LATENT_SIZE'], 1, 1).to(device)
    pred = G(feature_vector).squeeze()
    pred = pred.permute(1,2,0).cpu().numpy()
    plt.imshow(pred)
    plt.title('preditected image')
    plt.grid(False)
    plt.show()

In [None]:
list(next(iter(train_loader))[0].numpy())