In [1]:
!git clone https://github.com/thedon58/DS5660_project.git
!pip install scikit-image

Cloning into 'DS5660_project'...
remote: Enumerating objects: 14914, done.[K
remote: Counting objects: 100% (43/43), done.[K
remote: Compressing objects: 100% (40/40), done.[K
remote: Total 14914 (delta 11), reused 7 (delta 0), pack-reused 14871[K
Receiving objects: 100% (14914/14914), 357.71 MiB | 58.08 MiB/s, done.
Resolving deltas: 100% (15/15), done.
Updating files: 100% (14579/14579), done.
Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com
Collecting scikit-image
  Downloading scikit_image-0.19.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (14.0 MB)
[K     |████████████████████████████████| 14.0 MB 4.3 MB/s eta 0:00:01
[?25hCollecting PyWavelets>=1.1.1
  Downloading PyWavelets-1.4.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (6.9 MB)
[K     |████████████████████████████████| 6.9 MB 135.1 MB/s eta 0:00:01
Collecting tifffile>=2019.7.26
  Downloading tifffile-2022.10.10-py3-none-any.whl (210 kB)
[K     |██████████████████████

In [2]:
# For plotting
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
# For conversion
from skimage.color import lab2rgb, rgb2lab, rgb2gray
from skimage import io
# For everything
import torch
import torch.nn as nn
import torch.nn.functional as F
# For our model
import torchvision.models as models
from torchvision import datasets, transforms
# For utilities
import os, shutil, time

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
class GrayscaleImageFolder(datasets.ImageFolder):
  #Custom images folder, which converts images to grayscale before loading
  def __getitem__(self, index):
    path, target = self.imgs[index]
    img = self.loader(path)
    if self.transform is not None:
      img_original = self.transform(img)
      img_original = np.asarray(img_original)
      img_lab = rgb2lab(img_original)
      img_lab = (img_lab + 128) / 255
      img_ab = img_lab[:, :, 1:3]
      img_ab = torch.from_numpy(img_ab.transpose((2, 0, 1))).float()
      img_original = rgb2gray(img_original)
      img_original = torch.from_numpy(img_original).unsqueeze(0).float()
    if self.target_transform is not None:
      target = self.target_transform(target)
    return img_original, img_ab, target

In [4]:
use_gpu = torch.cuda.is_available()

In [5]:
train_transforms = transforms.Compose([transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip()])
train_imagefolder = GrayscaleImageFolder('DS5660_project/sport_photos/train', train_transforms)
train_loader = torch.utils.data.DataLoader(train_imagefolder, batch_size=64, shuffle=True)

val_transforms = transforms.Compose([transforms.Resize(256), transforms.CenterCrop(224)])
val_imagefolder = GrayscaleImageFolder('DS5660_project/sport_photos/valid' , val_transforms)
val_loader = torch.utils.data.DataLoader(val_imagefolder, batch_size=64, shuffle=False)

In [6]:
class ColorizationNet(nn.Module):
  def __init__(self, input_size=128):
    super(ColorizationNet, self).__init__()
    MIDLEVEL_FEATURE_SIZE = 128

    ## First half: ResNet
    resnet = models.resnet18(num_classes=365) 
    # Change first conv layer to accept single-channel (grayscale) input
    resnet.conv1.weight = nn.Parameter(resnet.conv1.weight.sum(dim=1).unsqueeze(1)) 
    # Extract midlevel features from ResNet-gray
    self.midlevel_resnet = nn.Sequential(*list(resnet.children())[0:6])

    ## Second half: Upsampling
    self.upsample = nn.Sequential(     
      nn.Conv2d(MIDLEVEL_FEATURE_SIZE, 128, kernel_size=3, stride=1, padding=1),
      nn.BatchNorm2d(128),
      nn.ReLU(),
      nn.Upsample(scale_factor=2),
      nn.Conv2d(128, 64, kernel_size=3, stride=1, padding=1),
      nn.BatchNorm2d(64),
      nn.ReLU(),
      nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1),
      nn.BatchNorm2d(64),
      nn.ReLU(),
      nn.Upsample(scale_factor=2),
      nn.Conv2d(64, 32, kernel_size=3, stride=1, padding=1),
      nn.BatchNorm2d(32),
      nn.ReLU(),
      nn.Conv2d(32, 2, kernel_size=3, stride=1, padding=1),
      nn.Upsample(scale_factor=2)
    )

  def forward(self, input):

    # Pass input through ResNet-gray to extract features
    midlevel_features = self.midlevel_resnet(input)

    # Upsample to get colors
    output = self.upsample(midlevel_features)
    return output

In [7]:
model = ColorizationNet()

In [8]:
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-2, weight_decay=0.0)


In [9]:
class AverageMeter(object):
  #A handy class from the PyTorch ImageNet tutoria
  def __init__(self):
    self.reset()
  def reset(self):
    self.val, self.avg, self.sum, self.count = 0, 0, 0, 0
  def update(self, val, n=1):
    self.val = val
    self.sum += val * n
    self.count += n
    self.avg = self.sum / self.count

def to_rgb(grayscale_input, ab_input, save_path=None, save_name=None):
  #'''Show/save rgb image from grayscale and ab channels
  #   Input save_path in the form {'grayscale': '/path/', 'colorized': '/path/'}'''
  plt.clf() # clear matplotlib 
  color_image = torch.cat((grayscale_input, ab_input), 0).numpy() # combine channels
  color_image = color_image.transpose((1, 2, 0))  # rescale for matplotlib
  color_image[:, :, 0:1] = color_image[:, :, 0:1] * 100
  color_image[:, :, 1:3] = color_image[:, :, 1:3] * 255 - 128   
  color_image = lab2rgb(color_image.astype(np.float64))
  grayscale_input = grayscale_input.squeeze().numpy()
  if save_path is not None and save_name is not None: 
    plt.imsave(arr=grayscale_input, fname='{}{}'.format(save_path['grayscale'], save_name), cmap='gray')
    plt.imsave(arr=color_image, fname='{}{}'.format(save_path['colorized'], save_name))

In [10]:
def validate(val_loader, model, criterion, save_images, epoch):
  model.eval()

  # Prepare value counters and timers
  batch_time, data_time, losses = AverageMeter(), AverageMeter(), AverageMeter()

  end = time.time()
  already_saved_images = False
  for i, (input_gray, input_ab, target) in enumerate(val_loader):
    data_time.update(time.time() - end)

    # Use GPU
    if use_gpu: input_gray, input_ab, target = input_gray.cuda(), input_ab.cuda(), target.cuda()

    # Run model and record loss
    output_ab = model(input_gray) # throw away class predictions
    loss = criterion(output_ab, input_ab)
    losses.update(loss.item(), input_gray.size(0))

    # Save images to file
    if save_images and not already_saved_images:
      already_saved_images = True
      for j in range(min(len(output_ab), 10)): # save at most 5 images
        save_path = {'grayscale': 'outputs/gray/', 'colorized': 'outputs/color/'}
        save_name = 'img-{}-epoch-{}.jpg'.format(i * val_loader.batch_size + j, epoch)
        to_rgb(input_gray[j].cpu(), ab_input=output_ab[j].detach().cpu(), save_path=save_path, save_name=save_name)

    # Record time to do forward passes and save images
    batch_time.update(time.time() - end)
    end = time.time()

    # Print model accuracy -- in the code below, val refers to both value and validation
    if i % 25 == 0:
      print('Validate: [{0}/{1}]\t'
            'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
            'Loss {loss.val:.4f} ({loss.avg:.4f})\t'.format(
             i, len(val_loader), batch_time=batch_time, loss=losses))

  print('Finished validation.')
  return losses.avg


In [11]:
def train(train_loader, model, criterion, optimizer, epoch):
  print('Starting training epoch {}'.format(epoch))
  model.train()
  
  # Prepare value counters and timers
  batch_time, data_time, losses = AverageMeter(), AverageMeter(), AverageMeter()

  end = time.time()
  for i, (input_gray, input_ab, target) in enumerate(train_loader):
    
    # Use GPU if available
    if use_gpu: input_gray, input_ab, target = input_gray.cuda(), input_ab.cuda(), target.cuda()

    # Record time to load data (above)
    data_time.update(time.time() - end)

    # Run forward pass
    output_ab = model(input_gray) 
    loss = criterion(output_ab, input_ab) 
    losses.update(loss.item(), input_gray.size(0))

    # Compute gradient and optimize
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    # Record time to do forward and backward passes
    batch_time.update(time.time() - end)
    end = time.time()

    # Print model accuracy -- in the code below, val refers to value, not validation
    if i % 25 == 0:
      print('Epoch: [{0}][{1}/{2}]\t'
            'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
            'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
            'Loss {loss.val:.4f} ({loss.avg:.4f})\t'.format(
              epoch, i, len(train_loader), batch_time=batch_time,
             data_time=data_time, loss=losses)) 

  print('Finished training epoch {}'.format(epoch))

In [12]:
# Move model and loss function to GPU
if use_gpu: 
  criterion = criterion.cuda()
  model = model.cuda()

In [13]:
# Make folders and set parameters
os.makedirs('outputs/color', exist_ok=True)
os.makedirs('outputs/gray', exist_ok=True)
os.makedirs('checkpoints', exist_ok=True)
save_images = True
best_losses = 1e10
epochs = 100

In [14]:
# Train model
for epoch in range(epochs):
  # Train for one epoch, then validate
  train(train_loader, model, criterion, optimizer, epoch)
  with torch.no_grad():
    losses = validate(val_loader, model, criterion, save_images, epoch)
  # Save checkpoint and replace old best model if current model is better
  if losses < best_losses:
    best_losses = losses
    torch.save(model.state_dict(), 'checkpoints/model-epoch-{}-losses-{:.3f}.pth'.format(epoch+1,losses))

    

Starting training epoch 0
Epoch: [0][0/213]	Time 2.828 (2.828)	Data 0.834 (0.834)	Loss 0.4222 (0.4222)	
Epoch: [0][25/213]	Time 0.711 (0.780)	Data 0.691 (0.682)	Loss 0.0296 (0.2626)	
Epoch: [0][50/213]	Time 0.619 (0.727)	Data 0.598 (0.666)	Loss 0.0066 (0.1395)	
Epoch: [0][75/213]	Time 0.630 (0.702)	Data 0.608 (0.655)	Loss 0.0046 (0.0953)	
Epoch: [0][100/213]	Time 0.659 (0.687)	Data 0.638 (0.646)	Loss 0.0049 (0.0729)	
Epoch: [0][125/213]	Time 0.603 (0.672)	Data 0.583 (0.635)	Loss 0.0043 (0.0593)	
Epoch: [0][150/213]	Time 0.598 (0.663)	Data 0.578 (0.628)	Loss 0.0044 (0.0502)	
Epoch: [0][175/213]	Time 0.584 (0.654)	Data 0.563 (0.621)	Loss 0.0038 (0.0437)	
Epoch: [0][200/213]	Time 0.598 (0.648)	Data 0.576 (0.617)	Loss 0.0046 (0.0388)	
Finished training epoch 0
Validate: [0/8]	Time 0.740 (0.740)	Loss 0.0048 (0.0048)	
Finished validation.
Starting training epoch 1
Epoch: [1][0/213]	Time 0.600 (0.600)	Data 0.572 (0.572)	Loss 0.0037 (0.0037)	
Epoch: [1][25/213]	Time 0.598 (0.592)	Data 0.576 (0

  return func(*args, **kwargs)
  return func(*args, **kwargs)
  return func(*args, **kwargs)
  return func(*args, **kwargs)
  return func(*args, **kwargs)


Validate: [0/8]	Time 0.930 (0.930)	Loss 0.0048 (0.0048)	
Finished validation.
Starting training epoch 4
Epoch: [4][0/213]	Time 0.806 (0.806)	Data 0.784 (0.784)	Loss 0.0042 (0.0042)	
Epoch: [4][25/213]	Time 0.579 (0.718)	Data 0.558 (0.697)	Loss 0.0042 (0.0043)	
Epoch: [4][50/213]	Time 0.575 (0.652)	Data 0.554 (0.630)	Loss 0.0039 (0.0041)	
Epoch: [4][75/213]	Time 0.580 (0.628)	Data 0.559 (0.607)	Loss 0.0046 (0.0041)	
Epoch: [4][100/213]	Time 0.587 (0.616)	Data 0.566 (0.595)	Loss 0.0045 (0.0042)	
Epoch: [4][125/213]	Time 0.588 (0.630)	Data 0.566 (0.608)	Loss 0.0042 (0.0042)	
Epoch: [4][150/213]	Time 0.596 (0.624)	Data 0.574 (0.602)	Loss 0.0044 (0.0042)	
Epoch: [4][175/213]	Time 0.594 (0.619)	Data 0.572 (0.598)	Loss 0.0034 (0.0042)	
Epoch: [4][200/213]	Time 0.609 (0.616)	Data 0.588 (0.595)	Loss 0.0035 (0.0042)	
Finished training epoch 4
Validate: [0/8]	Time 0.754 (0.754)	Loss 0.0042 (0.0042)	
Finished validation.
Starting training epoch 5
Epoch: [5][0/213]	Time 0.602 (0.602)	Data 0.582 (0.

<Figure size 432x288 with 0 Axes>