In [16]:
import os
import glob
import random
import itertools

In [19]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import cv2
from PIL import Image
%matplotlib inline

In [3]:
import torch
import torchvision as tv
from torchvision import transforms
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable
import torchvision.transforms.functional as TF
from torchvision.utils import save_image
from torch.utils.data import Dataset, DataLoader

In [4]:
# CUDA stuff
use_cuda = torch.cuda.is_available()
device = torch.device("cuda:0" if use_cuda else "cpu")


In [5]:
use_cuda

False

# use cycleGAN

## what to use?

for generator, use encoder/transformer/decoder combo

for discriminator, use patchGAN

# making generator

we gotta make residual blocks

In [6]:
# def activation_func(activation):
#     return  nn.ModuleDict([
#         ['relu', nn.ReLU(inplace=True)],
#         ['leaky_relu', nn.LeakyReLU(negative_slope=0.01, inplace=True)],
#         ['selu', nn.SELU(inplace=True)],
#         ['none', nn.Identity()]
#     ])[activation]

In [7]:
class ResidualBlock(nn.Module):
    def __init__(self, in_channels, out_channels, activation_fn):
        super(ResidualBlock, self).__init__()
        self.in_channels, self.out_channels, self.activation = in_channels, out_channels, activation
        self.blocks = nn.Identity()
        self.activation_fn = activation_fn
        self.shortcut = nn.Identity()
        
        
        
    def forward(self, x):
        residual = x
        if self.apply_shortcut:
            residual = self.shortcut(x)
        x += residual
        x = self.activate(x)
        return x
    
    @property
    def apply_shortcut(self):
        return self.in_channels != self.out_channels
        
        

In [8]:
class ResNetResidualBlock(ResidualBlock):
    def __init__(self, in_channels, out_channels, activation_fn):
        super(ResNetResidualBlock, self).__init__(in_channels, 
                                                       out_channels, 
                                                       *args,
                                                       **kwargs)
        
        
        
        

In [9]:
class ResNetBlock(nn.Module):
    def __init__(self, input_dim):
        super(ResNetBlock, self).__init__()
        # input and output dim will be the same for our uses
        self.conv1 = nn.Conv2d(input_dim, input_dim, kernel_size=3, padding=1, bias=True)
        self.norm1 = nn.InstanceNorm2d(input_dim)
        self.relu1 = nn.LeakyReLU(negative_slope=0.01, inplace=True)
        
        self.conv2 = nn.Conv2d(input_dim, input_dim, kernel_size=3, padding=1, bias=True)
        self.norm2 = nn.InstanceNorm2d(input_dim)
        
#         self.relu_final = nn.LeakyReLU(negative_slope=0.01, inplace=True)
        
    def forward(self, x):
        x_new = self.conv1(x)
        x_new = self.norm1(x_new)
        x_new = self.relu1(x_new)
        x_new = self.conv2(x_new)
        x_new = self.norm2(x_new)
        out = x + x_new
#         out = self.relu_final(x_new)
        return out

In [10]:
class CycleGenerator(nn.Module):
    def __init__(self):
        super(CycleGenerator, self).__init__()
#         self.activations = nn.ModuleDict({
#         'relu', nn.ReLU(inplace=True),
#         'leaky_relu', nn.LeakyReLU(negative_slope=0.01, inplace=True),
#         'selu', nn.SELU(inplace=True),
#         'none', nn.Identity()})
        
        # do we need this many filter channels 
        # if we're doing a 1 channel image rather than 3 channel?
        
#         #encoder section
#         self.conv1 = nn.Conv2d(in_channels=1, out_channels=64,
#                                kernel_size=(7, 7), padding=0)
#         self.conv2 = nn.Conv2d(64, 128, (3, 3), padding=(1, 1), stride=2)
#         self.conv3 = nn.Conv2d(128, 256, (3, 3), padding=(1, 1), stride=2)
        
#         # in the transformer
#         self.conv4 = nn.Conv2d(1, 128, (3, 3), padding=(1, 1), stride=2)
#         https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix/blob/master/models/networks.py
        model = [nn.ReflectionPad2d(3),
                 nn.Conv2d(in_channels=1, out_channels=64,
                           kernel_size=7, padding=0,
                           bias=True),
                 nn.InstanceNorm2d(64),
                 nn.LeakyReLU(negative_slope=0.01, inplace=True)]
    
        #downsampling layers
        n_downsampling = 2
        for i in range(n_downsampling):
            mult = 2 ** i
            model += [nn.Conv2d(in_channels=64*mult, out_channels=64*mult*2,
                           kernel_size=3, stride=2, padding=1,
                           bias=True),
                      nn.InstanceNorm2d(64*mult*2),
                      nn.LeakyReLU(negative_slope=0.01, inplace=True)]
            
        # resnet blocks layer
        num_resnet_blocks = 6
        for i in range(num_resnet_blocks):
            model += [ResNetBlock(64*mult*2)]
            
        # upsampling layers
        for i in range(n_downsampling):
            mult = 2 ** (n_downsampling - i)
            model += [nn.ConvTranspose2d(in_channels=64*mult, out_channels=int(64*mult/2),
                           kernel_size=3, stride=2, padding=1,
                           bias=True),
                      nn.InstanceNorm2d(int(64*mult/2)),
                      nn.LeakyReLU(negative_slope=0.01, inplace=True)]
            
        self.model = nn.Sequential(*model)
        
            
        
    def forward(self, x):
        return model(x)
        

In [11]:
class CycleDiscriminator(nn.Module):
    def __init__(self):
        super(CycleDiscriminator, self).__init__()
        #https://github.com/aitorzip/PyTorch-CycleGAN/blob/master/models.py
        
        model = [nn.Conv2d(1, 64, kernel_size=4, stride=2, padding=1), nn.LeakyReLU(0.2, True)]
        n_layers = 3
        
        nf_mult = 1
        nf_mult_prev = 1
        for n in range(1, n_layers):
            nf_mult_prev = nf_mult
            nf_mult = min(2 ** n, 8) # 2^n
            model += [
                nn.Conv2d(64 * nf_mult_prev, 64 * nf_mult, stride=2, padding=1, bias=True),
                nn.InstanceNorm2d(64*nf_mult),
                nn.LeakyReLU(0.2, True)
            ]
        nf_mult_prev = nf_mult
        nf_mult = min(2 ** n, 8) # 2^n
        model += [
            nn.Conv2d(64 * nf_mult_prev, 64 * nf_mult, stride=1, padding=1, bias=True),
            nn.InstanceNorm2d(64*nf_mult),
            nn.LeakyReLU(0.2, True)
        ]
        
        model += [nn.Conv2d(64*nf_mult, 1, kernel_size=4, stride=1, padding=1)] # 1 channel prediction map
        self.model = nn.Sequential(*model)
        
    def forward(self, x):
        out = self.model(x)
        return out

In [15]:

# https://github.com/aitorzip/PyTorch-CycleGAN/blob/master/utils.py
class ImageBuffer():
    def __init__(self, max_size=50):
        assert max_size > 0
        self.max_size = max_size
        self.data = []
    
    def push_and_pop(self, data):
        to_return = []
        for elem in data.data:
            elem = torch.unsqueeze(elem, 0)
            if len(self.data) < self.max_size:
                self.data.append(elem)
                to_return.append(elem)
            else:
                # half chance to randomly pick a data pt from history, 
                # otherwise just pick the selected data pt and don't add to history 
                if random.uniform(0, 1) < 0.5:
                    idx = random.randint(0, self.max_size - 1)
                    to_return.append(self.data[i].clone())
                    self.data[i] = elem
                else:
                    to_return.append(elem)
        return torch.cat(to_return)
                    
                

In [47]:
class DocumentsDataset(Dataset):
    def __init__(self, dirty_dir, clean_dir, transform=None, aligned=False):
        super(DocumentsDataset, self).__init__()
        self.dirty_arr = glob.glob(os.path.join(dirty_dir, '*.png'))
        self.clean_arr = glob.glob(os.path.join(clean_dir, '*.png'))
        self.dirty_dir = dirty_dir
        self.clean_dir = clean_dir
        self.transform = transform
        self.aligned = aligned
    
    def __len__(self):
        # could be dirty arr or clean arr, shouldn't matter
        return max(len(self.dirty_arr), len(self.clean_arr))
    
    def __getitem__(self, index):
        """Generate one sample of data, based on dirty data"""
        
        # grab a random index for each. the modulus allows overflow, and makes it unaligned if overflow
        idx_dirty = index % len(self.dirty_arr)
        if self.aligned:
            idx_clean = index % len(self.clean_arr)
        else:
            idx_clean = random.randint(0, len(self.clean_arr) - 1)
        
        dirty_img_name = os.path.basename(self.dirty_arr[idx_dirty])
        clean_img_name = os.path.basename(self.clean_arr[idx_clean])
        dirty_path = os.path.join(self.dirty_dir, dirty_img_name)
        clean_path = os.path.join(self.clean_dir, clean_img_name)
        transformed_dirty = Image.open(dirty_path)
        transformed_clean = Image.open(clean_path)
        
        if self.transform:
            # notice how with each transform, they are each independent.
            # this allows the random crop and flips to be different with each img
            # because cyclegan is meant for unpaired it won't matter
            transformed_dirty = self.transform(transformed_dirty)
            transformed_clean = self.transform(transformed_clean)
            
        # because the image is single channel, we need to unsqueeze so it shows that single channel.
        transformed_dirty = torch.unsqueeze(transformed_dirty, dim=0)
        transformed_clean = torch.unsqueeze(transformed_clean, dim=0)
        
        return {'dirty': transformed_dirty, 'clean': transformed_clean}

In [43]:
arr = Image.open('./data/train/101.png')

In [45]:
np.array(arr, dtype=np.float32).shape

(420, 540)

In [23]:
test_arr = glob.glob(os.path.join('./data/train', '*.png'))
test_arr


['./data/train\\101.png',
 './data/train\\102.png',
 './data/train\\104.png',
 './data/train\\105.png',
 './data/train\\107.png',
 './data/train\\108.png',
 './data/train\\11.png',
 './data/train\\110.png',
 './data/train\\111.png',
 './data/train\\113.png',
 './data/train\\114.png',
 './data/train\\116.png',
 './data/train\\117.png',
 './data/train\\119.png',
 './data/train\\12.png',
 './data/train\\120.png',
 './data/train\\122.png',
 './data/train\\123.png',
 './data/train\\125.png',
 './data/train\\126.png',
 './data/train\\128.png',
 './data/train\\129.png',
 './data/train\\131.png',
 './data/train\\132.png',
 './data/train\\134.png',
 './data/train\\135.png',
 './data/train\\137.png',
 './data/train\\138.png',
 './data/train\\14.png',
 './data/train\\140.png',
 './data/train\\141.png',
 './data/train\\143.png',
 './data/train\\144.png',
 './data/train\\146.png',
 './data/train\\147.png',
 './data/train\\149.png',
 './data/train\\15.png',
 './data/train\\150.png',
 './data/train\\

In [24]:
os.path.abspath(test_arr[2])

'C:\\Users\\adaml\\Documents\\denoising-dirty-documents\\data\\train\\104.png'

In [46]:
trans = transforms.Compose([
    transforms.Resize((int(img_h*1.12),int(img_w*1.12)), Image.BICUBIC), # make it bigger so random crop is more random
    transforms.RandomCrop((img_h, img_w)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(0.5, 0.5) # mean, std of each channel. can provice a tuple for 3 dim images
])
training_set = DocumentsDataset(dirty_dir='./data/train/', clean_dir='./data/train_cleaned/', transform=trans)
training_generator = DataLoader(training_set, **params)

NameError: name 'img_h' is not defined

In [None]:
# TODO: unsqueeze the resulting image

In [17]:
def custom_lr(n_epochs, start_epoch, decay_epoch):
    '''
    returns a function that calcs learning rate for given epoch.
    n_epochs (int) -> number of planned epochs
    start_epoch -> the epoch number that the optimizer starts on
    decay_epoch -> the epoch number to start decaying at
    '''
    return lambda epoch: 1.0 - max(0, epoch + start_epoch - decay_epoch)/(n_epochs - decay_epoch)

In [14]:
for i in range(1, 4):
    print(i)

1
2
3


In [None]:
# hyperparams
n_epochs = 0
decay_epoch = 0
lr = 0
batch_size = 0
# if resuming change this
starting_epoch = 0



In [18]:
class CycleGAN(nn.Module):
    def __init__(self):
        '''
        init the cycle gan. add the loss functions
        add the models.
        add img buffer
        '''
        
        #start with models
        generator_clean = CycleGenerator() # dirty to clean
        discriminator_clean = CycleDiscriminator() # clean is fake/real
        
        generator_dirty = CycleGenerator() # clean to dirty
        discriminator_clean = CycleDiscriminator() # dirty is fake/real
        
        # turn on cuda
        if use_cuda:
            generator_clean.cuda()
            discriminator_clean.cuda()
            generator_dirty.cuda()
            discriminator_dirty.cuda()
        
        # init weights for stuff, apparently kaiming is better for conv nets?
        generator_clean.apply(nn.init.kaiming_uniform)
        discriminator_clean.apply(nn.init.kaiming_uniform)
        generator_dirty.apply(nn.init.kaiming_uniform)
        discriminator_dirty.apply(nn.init.kaiming_uniform)
        
        # loss fns
        # also can add identity loss mentioned, but it is mainly for color channels. it is l1 loss as well
        loss_gan = nn.MSELoss()
        loss_cycle = nn.L1Loss()
        
        # optimizers
        # use default recommended values for first and second momentums on gradient accumulation
        opt_generators = optim.Adam(itertools.chain(generator_clean.parameters(),
                                                   generator_dirty.parameters()),
                                   lr=lr, betas=(0.5, 0.999))
        opt_d_clean = optim.Adam(discriminator_clean.parameters(), lr=lr, betas=(0.5, 0.999))
        opt_d_dirty = optim.Adam(discriminator_dirty.parameters(), lr=lr, betas=(0.5, 0.999))
        
        scheduler_g = optim.lr_scheduler.LambdaLR(opt_generators, lr_lambda=custom_lr(n_epochs, starting_epoch, decay_epoch))
        scheduler_d_clean = optim.lr_scheduler.LambdaLR(opt_d_clean, lr_lambda=custom_lr(n_epochs, starting_epoch, decay_epoch))
        scheduler_d_dirty = optim.lr_scheduler.LambdaLR(opt_d_dirty, lr_lambda=custom_lr(n_epochs, starting_epoch, decay_epoch))
        
        Tensor = torch.cuda.FloatTensor if opt.cuda else torch.Tensor
        
        
        
        # image buffers for gans
        fake_img_buffer_clean = ImageBuffer()
        fake_img_buffer_dirty = ImageBuffer()
        
        
        
        
        