In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from PIL import Image

import torch
import torchvision
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torchvision import transforms, utils
import random
from torchvision.transforms import Resize, ToTensor, Normalize
from torch.utils.data import Dataset, DataLoader, random_split, SubsetRandomSampler, WeightedRandomSampler

In [2]:
def seed_everything(seed):
    """
    동일한 조건으로 학습을 할 때, 동일한 결과를 얻기 위해 seed를 고정시킵니다.
    
    Args:
        seed: seed 정수값
    """
    torch.manual_seed(seed) # pytorch의 random seed 고정
    torch.cuda.manual_seed(seed) # GPU 에서 사용하는 난수 생성 시드 고정
    # torch.cuda.manual_seed_all(seed)  # if use multi-GPU
    torch.backends.cudnn.deterministic = True # CuDNN 부분고정
    torch.backends.cudnn.benchmark = False # CuDNN 부분고정
    np.random.seed(seed) # Numpy 부분
    random.seed(seed) # transforms에서 random 라이브러리를 사용하기 때문에 random 라이브러리를 불러서 고정

seed = 42
seed_everything(seed)

## Dataset

In [3]:
def search(dirname, result):  # 하위목록의 모든 파일을 찾는 함수
    try:
        filenames = os.listdir(dirname)
        print(f'file 개수 : {len(filenames)}')
        for filename in filenames:
            full_filename = os.path.join(dirname, filename)
            if os.path.isdir(full_filename):
                if full_filename.startswith('.'):
                    continue
                search(full_filename, result)
            else:
                ext = os.path.splitext(full_filename)[-1]  # 확장자 체크
                if ext:
                    result.append(full_filename)
                else:
                    print(full_filename)
    except PermissionError:
        print('error')

illust_all_path = []
search("./img_data/illustrations", illust_all_path)
scenery_all_path = []
search("./img_data/scenery", scenery_all_path)
vector_all_path = []
search("./img_data/vectors", vector_all_path)

file 개수 : 19495
file 개수 : 21958
file 개수 : 14799


In [4]:
len(illust_all_path), len(scenery_all_path), len(vector_all_path)

(19494, 21958, 14799)

In [5]:
vector_all_path[:5]

['./img_data/vectors/phone-6032011__340.png',
 './img_data/vectors/fireplace-160963__340.png',
 './img_data/vectors/lighthouse-5575259__340.png',
 './img_data/vectors/dices-160654__340.png',
 './img_data/vectors/beaker-159176__340.png']

In [6]:
def labeling(dirname, result, prefix):  # 라벨링하는 함수
    try:
        filenames = os.listdir(dirname)
        for filename in filenames:
            if filename.startswith('.'):
                continue
            tmp_str = filename.split(".")[0].split("__")[0]
            keyword = prefix + ' '.join(tmp_str.split("-")[:-1])
            result.append(keyword)
    except PermissionError:
        print('error')

In [7]:
dirname, illust_label = "./img_data/illustrations", []
labeling(dirname, illust_label, "an illustration image of ")
dirname, vector_label = "./img_data/vectors", []
labeling(dirname, vector_label, "a vector image of ")
dirname, scenery_label = "./img_data/scenery", []
labeling(dirname, scenery_label, "a scenery of ")

In [8]:
len(illust_label), len(scenery_label),len(vector_label)

(19494, 21958, 14799)

In [9]:
illust_label[:5]

['an illustration image of bicycle',
 'an illustration image of cave',
 'an illustration image of watercolour',
 'an illustration image of digital paper',
 'an illustration image of cartoon']

In [10]:
illust_df = pd.DataFrame(illust_all_path, columns = ['path'])

illust_df['label'] = illust_label
display(illust_df)

Unnamed: 0,path,label
0,./img_data/illustrations/bicycle-4307184__340.png,an illustration image of bicycle
1,./img_data/illustrations/cave-5523266__340.png,an illustration image of cave
2,./img_data/illustrations/watercolour-4744419__...,an illustration image of watercolour
3,./img_data/illustrations/digital-paper-5262381...,an illustration image of digital paper
4,./img_data/illustrations/cartoon-5852530__340.png,an illustration image of cartoon
...,...,...
19489,./img_data/illustrations/gold-foil-tree-of-lif...,an illustration image of gold foil tree of life
19490,./img_data/illustrations/santa-claus-5785751__...,an illustration image of santa claus
19491,./img_data/illustrations/double-decker-3283422...,an illustration image of double decker
19492,./img_data/illustrations/swing-6143981__340.png,an illustration image of swing


지우기 전 19498 rows × 2 columns

In [11]:
scenery_df = pd.DataFrame(scenery_all_path, columns = ['path'])

scenery_df['label'] = scenery_label
scenery_df

Unnamed: 0,path,label
0,./img_data/scenery/lindau-7124493__340.png,a scenery of lindau
1,./img_data/scenery/summer-845468__340.png,a scenery of summer
2,./img_data/scenery/forest-4246141__340.png,a scenery of forest
3,./img_data/scenery/water-549311__480.png,a scenery of water
4,./img_data/scenery/canyon-2420827__340.png,a scenery of canyon
...,...,...
21953,./img_data/scenery/rock-outcrop-1599212__340.png,a scenery of rock outcrop
21954,./img_data/scenery/cayambe-2635372__340.png,a scenery of cayambe
21955,./img_data/scenery/sea-3168282__340.png,a scenery of sea
21956,./img_data/scenery/grass-3239456__340.png,a scenery of grass


지우기 전 22125 rows × 2 columns

In [12]:
vector_df = pd.DataFrame(vector_all_path, columns = ['path'])

vector_df['label'] = vector_label
vector_df

Unnamed: 0,path,label
0,./img_data/vectors/phone-6032011__340.png,a vector image of phone
1,./img_data/vectors/fireplace-160963__340.png,a vector image of fireplace
2,./img_data/vectors/lighthouse-5575259__340.png,a vector image of lighthouse
3,./img_data/vectors/dices-160654__340.png,a vector image of dices
4,./img_data/vectors/beaker-159176__340.png,a vector image of beaker
...,...,...
14794,./img_data/vectors/escape-4143232__340.png,a vector image of escape
14795,./img_data/vectors/forest-5616880__340.png,a vector image of forest
14796,./img_data/vectors/dishes-576376__340.png,a vector image of dishes
14797,./img_data/vectors/bear-3544195__480.png,a vector image of bear


지우기 전 14876 rows × 2 columns

In [13]:
df = pd.concat([illust_df, vector_df, scenery_df], ignore_index=True)
df

Unnamed: 0,path,label
0,./img_data/illustrations/bicycle-4307184__340.png,an illustration image of bicycle
1,./img_data/illustrations/cave-5523266__340.png,an illustration image of cave
2,./img_data/illustrations/watercolour-4744419__...,an illustration image of watercolour
3,./img_data/illustrations/digital-paper-5262381...,an illustration image of digital paper
4,./img_data/illustrations/cartoon-5852530__340.png,an illustration image of cartoon
...,...,...
56246,./img_data/scenery/rock-outcrop-1599212__340.png,a scenery of rock outcrop
56247,./img_data/scenery/cayambe-2635372__340.png,a scenery of cayambe
56248,./img_data/scenery/sea-3168282__340.png,a scenery of sea
56249,./img_data/scenery/grass-3239456__340.png,a scenery of grass


### 깨진 이미지 확인

In [14]:
# for index in range(len(df)):
#     try:
#         image = Image.open(df['path'].iloc[index]).convert('RGB')
#         label = df['label'].iloc[index]
#     except e:
#         print(e)
#         print(f'Image load error : {df["path"].iloc[index]}')

### 깨진 이미지 지우기

In [15]:
# for index in range(len(df)):
#     try:
#         image = Image.open(df['path'].iloc[index]).convert('RGB')
#         label = df['label'].iloc[index]
#     except:
#         print(f'Image load error : {df["path"].iloc[index]}')
#         os.remove(df["path"].iloc[index])

### Custom Dataset

In [16]:
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])

data_transforms = {
    'train':
    transforms.Compose([
        transforms.Resize((256,256)),
        transforms.ToTensor(),
        normalize
    ]),
    'val':
    transforms.Compose([
        transforms.Resize((256,256)),
        transforms.ToTensor(),
        normalize
    ]),
}

In [17]:
class CustomDataset(Dataset):
    def __init__(self, img_paths_label, transform):
        self.X = img_paths_label['path']
        self.y = img_paths_label['label']
        self.transform = transform
    
    def __getitem__(self, index):
        # image = Image.open(self.X.iloc[index])
        try:
            image = Image.open(self.X.iloc[index]).convert('RGB')
            label = self.y.iloc[index]
            if self.transform:
                image = self.transform(image)
            return image, label
        except:
            print(f'Image load error : {self.X.iloc[index]}')
            
    def __len__(self):
        return len(self.X)

## Transfer Learning

In [18]:
import os
import sys
import argparse
from typing import Optional
from datetime import datetime

import torch
from torch.utils.data import DataLoader
import torchvision
import torchvision.transforms as transforms
import pytorch_lightning as pl
from pytorch_lightning.callbacks import ModelCheckpoint, Callback
from pytorch_lightning.loggers import TensorBoardLogger
from pytorch_lightning.utilities.distributed import rank_zero_only

# sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

from dalle.models import ImageGPT, Dalle, Rep_Dalle

In [31]:
torch.cuda.empty_cache()

In [19]:
import yaml

path_upstream = 'minDALL-E/1.3B'
config_file = './configs/transfer-imagenet-uncond-gen.yaml'
# config_file = './configs/transfer-imagenet-clscond-gen.yaml'
config_downstream = config_file
# with open(config_file) as f:
#     config_downstream = yaml.load(f,Loader=yaml.FullLoader)

result_path = './base_result'
data_dir = '../img_data'

n_gpus = 1

In [20]:
class ImageLogger(Callback):
    def __init__(self):
        super().__init__()

    @rank_zero_only
    def log_img(self, pl_module, batch, current_epoch, split="train"):
        with torch.no_grad():
            images, labels = batch
            recons = pl_module.stage1(images)
            images = images.cpu()
            recons = recons.cpu()

            grid_org = (torchvision.utils.make_grid(images, nrow=8) + 1.0) / 2.0
            grid_rec = (torchvision.utils.make_grid(recons, nrow=8) + 1.0) / 2.0
            grid_rec = torch.clip(grid_rec, min=0, max=1)

            pl_module.logger.experiment.add_image(f"images_org/{split}", grid_org, global_step=current_epoch)
            pl_module.logger.experiment.add_image(f"images_rec/{split}", grid_rec, global_step=current_epoch)

    def on_train_batch_end(self, trainer, pl_module, outputs, batch, batch_idx, dataloader_idx):
        if batch_idx == 0 and trainer.current_epoch < 5:
            self.log_img(pl_module, batch, current_epoch=trainer.current_epoch, split="train")

    def on_validation_batch_end(self, trainer, pl_module, outputs, batch, batch_idx, dataloader_idx):
        if batch_idx == 0 and trainer.current_epoch < 5:
            self.log_img(pl_module, batch, current_epoch=trainer.current_epoch, split="test")

In [21]:
class ImageNetDataModule(pl.LightningDataModule):
    def __init__(self,
                 data_dir: Optional[str] = None,
                 image_resolution: int = 256,
                 train_batch_size: int = 2,
                 valid_batch_size: int = 32,
                 num_workers: int = 8):
        super().__init__()

        self.data_dir = data_dir
        self.image_resolution = image_resolution
        self.train_batch_size = train_batch_size
        self.valid_batch_size = valid_batch_size
        self.num_workers = num_workers

        self.train_transform = transforms.Compose(
            [transforms.Resize(image_resolution),
             transforms.RandomCrop(image_resolution),
             transforms.ToTensor(),
             transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])]
        )
        self.valid_transform = transforms.Compose(
            [transforms.Resize(image_resolution),
             transforms.CenterCrop(image_resolution),
             transforms.ToTensor(),
             transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])]
        )

    def setup(self, stage=None):
        self.trainset = torchvision.datasets.ImageNet(root=self.data_dir, split='train', transform=self.train_transform)
        self.validset = torchvision.datasets.ImageNet(root=self.data_dir, split='val', transform=self.valid_transform)

    def train_dataloader(self):
        return DataLoader(self.trainset,
                          batch_size=self.train_batch_size,
                          num_workers=0,
                          # num_workers=self.num_workers,
                          pin_memory=True)

    def valid_dataloader(self):
        return DataLoader(self.validset,
                          batch_size=self.valid_batch_size,
                          num_workers=0,
                          # num_workers=self.num_workers,
                          pin_memory=True)

In [22]:
from sklearn.model_selection import train_test_split

train, valid = train_test_split(df, test_size=0.2,
                               shuffle=True,
                               random_state=42)

In [23]:
class CustomDataModule(pl.LightningDataModule):
    def __init__(self,
                 data_dir: Optional[str] = None,
                 image_resolution: int = 256,
                 train_batch_size: int = 2,
                 valid_batch_size: int = 32,
                 num_workers: int = 8):
        super().__init__()

        self.data_dir = data_dir
        self.image_resolution = image_resolution
        self.train_batch_size = train_batch_size
        self.valid_batch_size = valid_batch_size
        self.num_workers = num_workers

        self.train_transform = transforms.Compose(
            [transforms.Resize(image_resolution),
             transforms.RandomCrop(image_resolution),
             transforms.ToTensor(),
             transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])]
        )
        self.valid_transform = transforms.Compose(
            [transforms.Resize(image_resolution),
             transforms.CenterCrop(image_resolution),
             transforms.ToTensor(),
             transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])]
        )

    def setup(self, stage=None):
        self.trainset = CustomDataset(train, data_transforms['train'])
        self.validset = CustomDataset(valid, data_transforms['val'])

    def train_dataloader(self):
        return DataLoader(self.trainset,
                          batch_size=self.train_batch_size,
                          num_workers=0,
                          # num_workers=self.num_workers,
                          pin_memory=True)

    def valid_dataloader(self):
        return DataLoader(self.validset,
                          batch_size=self.valid_batch_size,
                          num_workers=0,
                          # num_workers=self.num_workers,
                          pin_memory=True)

In [24]:
def setup_callbacks(config, args_result_path):
    # Setup callbacks
    now = datetime.now().strftime('%d%m%Y_%H%M%S')
    result_path = os.path.join(args_result_path,
                               os.path.basename(config_downstream).split('.')[0],
                               now)
    ckpt_path = os.path.join(result_path, 'ckpt')
    log_path = os.path.join(result_path, 'log')

    checkpoint_callback = ModelCheckpoint(
        dirpath=ckpt_path,
        filename="custom-clscond-gen-{epoch:02d}" if config.stage2.use_cls_cond else
                 "custom-uncond-gen-{epoch:02d}",
        every_n_epochs=config.experiment.save_ckpt_freq,
        save_weights_only=True,
        save_last=True
    )
    logger = TensorBoardLogger(log_path, name="iGPT")
    logger_img = ImageLogger()
    return checkpoint_callback, logger, logger_img

In [25]:
# def setup_callbacks(config, args_result_path):
#     # Setup callbacks
#     now = datetime.now().strftime('%d%m%Y_%H%M%S')
#     result_path = os.path.join(args_result_path,
#                                'config',
#                                now)
#     ckpt_path = os.path.join(result_path, 'ckpt')
#     log_path = os.path.join(result_path, 'log')

#     checkpoint_callback = ModelCheckpoint(
#         dirpath=ckpt_path,
#         filename="customdata-clscond-gen-{epoch:02d}" if config.stage2.use_cls_cond else
#                  "customdata-uncond-gen-{epoch:02d}",
#         every_n_epochs=config.experiment.save_ckpt_freq,
#         save_weights_only=True,
#         save_last=True
#     )
#     logger = TensorBoardLogger(log_path, name="iGPT")
#     logger_img = ImageLogger()
#     return checkpoint_callback, logger, logger_img

In [26]:
# if __name__ == '__main__':
#     pl.seed_everything(seed)

#     # Build iGPT
#     model, config = ImageGPT.from_pretrained(path_upstream, config_downstream)
#     model = Dalle.from_pretrained(path_upstream)
#     config = config_downstream

#     # Setup callbacks
#     # ckpt_callback, logger, logger_img = setup_callbacks(config,result_path)

#     # Build data modules
#     dataset = CustomDataModule(data_dir=data_dir,
#                                  # image_resolution=config.dataset.image_resolution,
#                                  # train_batch_size=config.experiment.local_batch_size,
#                                  # valid_batch_size=config.experiment.valid_batch_size,
#                                  num_workers=16)
#     dataset.setup()
#     train_dataloader = dataset.train_dataloader()
#     valid_dataloader = dataset.valid_dataloader()
#     print(f"len(train_dataset) = {len(dataset.trainset)}")
#     print(f"len(valid_dataset) = {len(dataset.validset)}")

#     # Calculate how many batches are accumulated
#     # assert config.experiment.total_batch_size % (config.experiment.local_batch_size * args.n_gpus) == 0
#     grad_accm_steps = config['experiment']['total_batch_size'] // (config['experiment']['local_batch_size'] * n_gpus)
#     config['optimizer']['max_steps'] = len(dataset.trainset) // config['experiment']['total_batch_size'] * config['experiment']['epochs']

#     # Build trainer
#     trainer = pl.Trainer(max_epochs=config['experiment']['epochs'],
#                          accumulate_grad_batches=grad_accm_steps,
#                          gradient_clip_val=config['optimizer']['grad_clip_norm'],
#                          precision=32, # amp -> 16 아니면 32
#                          # callbacks=[ckpt_callback, logger_img],
#                          accelerator="gpu",
#                          devices=n_gpus,
#                          # strategy="ddp",
#                          # logger=logger
#                         )
#     trainer.fit(model, train_dataloader, valid_dataloader)

In [27]:
if __name__ == '__main__':
    pl.seed_everything(seed)

    # Build iGPT
    model, config = Rep_Dalle.from_pretrained(path_upstream, config_downstream)

    # Setup callbacks
    ckpt_callback, logger, logger_img = setup_callbacks(config,result_path)

    # Build data modules
    dataset = CustomDataModule(data_dir=data_dir,
                                 image_resolution=config.dataset.image_resolution,
                                 train_batch_size=config.experiment.local_batch_size,
                                 valid_batch_size=config.experiment.valid_batch_size,
                                 num_workers=16)
    dataset.setup()
    train_dataloader = dataset.train_dataloader()
    valid_dataloader = dataset.valid_dataloader()
    print(f"len(train_dataset) = {len(dataset.trainset)}")
    print(f"len(valid_dataset) = {len(dataset.validset)}")

    # Calculate how many batches are accumulated
    assert config.experiment.total_batch_size % (config.experiment.local_batch_size * n_gpus) == 0
    grad_accm_steps = config.experiment.total_batch_size // (config.experiment.local_batch_size * n_gpus)
    config.optimizer.max_steps = len(dataset.trainset) // config.experiment.total_batch_size * config.experiment.epochs
    
    # Build trainer
    trainer = pl.Trainer(max_epochs=config.experiment.epochs,
                         accumulate_grad_batches=grad_accm_steps,
                         gradient_clip_val=config.optimizer.grad_clip_norm,
                         precision=16 if config.experiment.use_amp else 32,
                         callbacks=[ckpt_callback, logger_img],
                         accelerator="gpu",
                         devices=n_gpus,
                         # strategy="ddp",
                         logger=logger)
    trainer.fit(model, train_dataloader, valid_dataloader)

Global seed set to 42


/opt/ml/.cache/minDALL-E/1.3B/tokenizer successfully restored..
/opt/ml/.cache/minDALL-E/1.3B/stage1_last.ckpt successfully restored..


Using 16bit native Automatic Mixed Precision (AMP)
GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(
  rank_zero_deprecation(
  rank_zero_deprecation(
Missing logger folder: ./base_result/transfer-imagenet-uncond-gen/29052022_081704/log/iGPT


/opt/ml/.cache/minDALL-E/1.3B/stage2_last.ckpt successfully restored..
len(train_dataset) = 45000
len(valid_dataset) = 11251


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name   | Type          | Params
-----------------------------------------
0 | stage1 | VQGAN         | 76.1 M
1 | stage2 | Transformer1d | 1.3 B 
-----------------------------------------
1.3 B     Trainable params
76.1 M    Non-trainable params
1.4 B     Total params
2,734.311 Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


torch.Size([32, 256])
torch.Size([32, 256, 16384])
torch.Size([32, 63, 16384])




torch.Size([32, 256])
torch.Size([32, 256, 16384])
torch.Size([32, 63, 16384])


  rank_zero_warn(


Training: 0it [00:00, ?it/s]

  rank_zero_warn("Detected KeyboardInterrupt, attempting graceful shutdown...")


In [30]:
%rm -rf ~/.local/share/Trash/files/*

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
