In [1]:
from math import log10
import matplotlib.pyplot as plt
import numpy as np

import pandas as pd
import os

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from dataset.data_loader_YCbCr_resize import *
from utils.pytorch_ssim import *

from VDSR_model import Net

torch.manual_seed(1)
device = torch.device("cuda")

In [21]:
# VDSR parameters

batch_size = 10
epochs = 2
lr = 0.1
threads = 4
upscale_factor = 4
step_size = 10
clip = 0.4

In [22]:
img_path_low = '/media/angelo/DATEN/Datasets/Experiment_Masters/300W-3D-crap-56/train'
img_path_ref = '/media/angelo/DATEN/Datasets/Experiment_Masters/300W-3D-low-res-224/train'

#img_path_low = '../dataset/300W-3D-crap-56/train'
#img_path_ref = '../dataset/300W-3D-low-res-224/train'

train_set = DatasetSuperRes(img_path_low, img_path_ref)
training_data_loader = DataLoader(dataset=train_set, num_workers=threads, batch_size=batch_size, shuffle=True)

In [23]:
print('===> Building model')
model = Net().to(device)
#criterion = nn.MSELoss()
criterion = nn.MSELoss(reduction='sum')

#optimizer = optim.Adam(model.parameters(), lr=lr)
optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9, weight_decay=1e-4)
#optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=1e-4)
#scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)

===> Building model


In [24]:
out_path = 'results/'
out_model_path = 'models/'

if not os.path.exists(out_path):
    os.makedirs(out_path)    

if not os.path.exists(out_model_path):
    os.makedirs(out_model_path)   
    
results = {'avg_loss': [], 'psnr': [], 'ssim': []}

In [25]:
def train(epoch):
    epoch_loss = 0
    
    # Sets the learning rate to the initial LR decayed by 10 every 10 epochs
    updated_lr = lr * (0.1 ** ((epoch-1) // step_size))
    optimizer.param_groups[0]['lr'] = updated_lr
    
    model.train()
    for iteration, batch in enumerate(training_data_loader, 1):
        input_, target = batch[0].to(device), batch[1].to(device)
        
        optimizer.zero_grad()
        upsampled_img = model(input_)
        loss = criterion(upsampled_img, target)
        epoch_loss += loss.item()
        loss.backward()
        nn.utils.clip_grad_norm_(model.parameters(), clip)
        optimizer.step()

        print("===> Epoch[{}]({}/{}): Loss: {:.4f}".format(epoch, iteration, len(training_data_loader), loss.item()))
    
    #scheduler.step() # Decrease learning rate after 10 epochs to 10% of its value
    
    psnr_epoch = 10*log10(1/(epoch_loss / len(training_data_loader)))
    ssim_epoch = ssim(upsampled_img, target).item()
    avg_loss_batch = epoch_loss/len(training_data_loader)
    
    results['psnr'].append(psnr_epoch)
    results['ssim'].append(ssim_epoch)
    results['avg_loss'].append(avg_loss_batch)
    
    print("===> Epoch {} Complete: Avg. Loss: {:.4f} / PSNR: {:.4f} / SSIM {:.4f}".format(epoch, 
                                                                                          avg_loss_batch, 
                                                                                          psnr_epoch,
                                                                                          ssim_epoch))
    if epoch % (epochs // 2) == 0:
    
        data_frame = pd.DataFrame(
                data={'Avg. Loss': results['avg_loss'], 'PSNR': results['psnr'], 'SSIM': results['ssim']},
                index=range(1, epoch + 1))

        #data_frame.to_csv(out_path + 'VDSR_x' + str(upscale_factor) + '_train_results.csv', index_label='Epoch')
        
        checkpoint(epoch)
    
def checkpoint(epoch):
    path = out_model_path + "VDSR_x{}_epoch_{}.pth".format(upscale_factor, epoch)
    #torch.save(model, path)
    print("Checkpoint saved to {}".format(path))

In [26]:
for epoch in range(1, epochs + 1):
    train(epoch)

===> Epoch[1](1/310): Loss: 416199.4688
===> Epoch[1](2/310): Loss: 270663.4062
===> Epoch[1](3/310): Loss: 132235.8438
===> Epoch[1](4/310): Loss: 77049.5547
===> Epoch[1](5/310): Loss: 64108.6602
===> Epoch[1](6/310): Loss: 59953.4258
===> Epoch[1](7/310): Loss: 48483.0547
===> Epoch[1](8/310): Loss: 57674.8984
===> Epoch[1](9/310): Loss: 48636.2656
===> Epoch[1](10/310): Loss: 77854.6406
===> Epoch[1](11/310): Loss: 95081.3984
===> Epoch[1](12/310): Loss: 53693.8594
===> Epoch[1](13/310): Loss: 45096.7109
===> Epoch[1](14/310): Loss: 41135.4219
===> Epoch[1](15/310): Loss: 36764.3203
===> Epoch[1](16/310): Loss: 51968.3516
===> Epoch[1](17/310): Loss: 46609.6680
===> Epoch[1](18/310): Loss: 47453.5234
===> Epoch[1](19/310): Loss: 69859.1562
===> Epoch[1](20/310): Loss: 88587.9297
===> Epoch[1](21/310): Loss: 54836.4688
===> Epoch[1](22/310): Loss: 51607.6953
===> Epoch[1](23/310): Loss: 46296.2148
===> Epoch[1](24/310): Loss: 53541.6094
===> Epoch[1](25/310): Loss: 47617.6992
===> E

Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
  File "/usr/lib/python3.6/multiprocessing/queues.py", line 240, in _feed
    send_bytes(obj)
  File "/usr/lib/python3.6/multiprocessing/connection.py", line 200, in send_bytes
    self._send_bytes(m[offset:offset + size])
  File "/usr/lib/python3.6/multiprocessing/connection.py", line 404, in _send_bytes
    self._send(header + buf)
  File "/usr/lib/python3.6/multiprocessing/connection.py", line 368, in _send
    n = write(self._handle, buf)
BrokenPipeError: [Errno 32] Broken pipe
  File "/usr/lib/python3.6/multiprocessing/queues.py", line 240, in _feed
    send_bytes(obj)
  File "/usr/lib/python3.6/multiprocessing/connection.py", line 200, in send_bytes
    self._send_bytes(m[offset:offset + size])
  File "/usr/lib/python3.6/multiprocessing/connection.py", line 404, in _send_bytes
    self._send(header + buf)
  File "/usr/lib/python3.6/multiprocessing/connection.py", line 368, in 

KeyboardInterrupt: 