In [4]:
# Load all the libraries required

import os
import cv2
import sys
import time
import glob
import shutil
import pickle
import torch
import torch.nn as nn
import numpy as np
import pandas as pd
import tqdm.notebook as tq
from tqdm import trange
import matplotlib.pyplot as plt
from torch.nn.utils import clip_grad_value_
from sklearn.model_selection import train_test_split
torch.manual_seed(0)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

In [6]:
# Functions and Modules, utilities and DataLoaders
# DataLoader is for loading training dataset
# Testloader is used for loading test files, while predicting or denoising individual files

def printDiagram(model, data, location):
    import hiddenlayer as hl
    transforms = [ hl.transforms.Prune('Constant') ] # Removes Constant nodes from graph.
    graph = hl.build_graph(model, data, transforms=transforms)
    graph.theme = hl.graph.THEMES['blue'].copy()
    graph.save(location, format='png')

def updateColor(image):
    return cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

def reformatTensor2Image(tensor):
     return torch.permute(tensor, (1, 2, 0)).cpu().detach().numpy()

class DataLoader():
    def __init__(self, device):
        self.device = device
        self.cacheX = []
        self.cacheY = []
        self.cacheInfo = ''
        
    def loadVideo(self, dataDirectory, dataInfo, cached=0):
        if cached==1:
            with open('x.pickle', 'rb') as fp:
                self.cacheX = pickle.load(fp)
            with open('y.pickle', 'rb') as fp:
                self.cacheY = pickle.load(fp)
        else:
            for i in tq.tqdm( dataInfo.index ) :
                if dataInfo['frames'][i] == 100:
                    location = dataDirectory + dataInfo['dirname'][i]
                    cacheX, cacheY = [], []
                    inPath = glob.glob( location + '/input/*.jpg' )
                    outPath = glob.glob( location + '/GT/*.jpg' )

                    for img in (inPath):
                        cacheX.append( updateColor( cv2.resize( cv2.imread(img), (640, 360) ) ) )
                    for img in (outPath):
                        cacheY.append( updateColor( cv2.resize( cv2.imread(img), (640, 360) ) ) )
                    self.cacheX.append(cacheX)
                    self.cacheY.append(cacheY)
        
    def loadUnit(self, index, offset):
        
        lb = max(0, offset - 5)
        rb = min(offset + 5, len(self.cacheX[index]) - 1)
        padleft = max(0, 5 - offset)
        padright = max(0, 5 - (len(self.cacheX[index])-1 - offset))
        
        #print(lb, rb, padleft, padright, len(self.cacheX[index]))
        X = [self.cacheX[index][lb]] * padleft + self.cacheX[index][lb: rb+1] + [self.cacheX[index][rb]] * padright
        Y = self.cacheY[index][offset] 
        
        return X, Y
    
    def loadBatch(self, xrange, offset):
        X, Y = [], []
        for i in range(xrange[0], xrange[1]):
            tempX, tempY = self.loadUnit(i, offset)
            X.append(tempX)
            Y.append(tempY)
        X, Y = np.array(X).astype('float32')/255, np.array(Y).astype('float32')/255
        tensorX, tensorY = torch.tensor(X).to(device), torch.tensor(Y).to(device)
        tensorX, tensorY = tensorX.permute( (0, 4, 1, 2, 3)), tensorY.permute( (0, 3, 1, 2) )
        return tensorX, tensor
    
class TestLoader():
    def __init__(self, location, device):
        self.device = device
        self.cacheX = []
        self.counter = 0
        self.cacheInfo = ''
        inPath = glob.glob( location + '/*.jpg' )
        for img in (inPath):
            self.counter = self.counter + 1
            self.cacheX.append( cv2.cvtColor( cv2.resize( cv2.imread(img), (640, 360) ), cv2.COLOR_BGR2RGB) ) 
    
    def getCountOfFrames(self):
        return self.counter
            
    def loadUnit(self, offset):
        if offset >= self.counter or offset < 0:
            return False
        lb = max(0, offset - 5)
        rb = min(offset + 5, len(self.cacheX) - 1)
        padleft = max(0, 5 - offset)
        padright = max(0, 5 - (len(self.cacheX)-1 - offset))
        
        X = [self.cacheX[lb]] * padleft + self.cacheX[lb: rb+1] + [self.cacheX[rb]] * padright
        X = np.array(X).astype('float32')/255
        
        tensorX = torch.tensor(X).to(device)
        tensorX = tensorX.permute( (3, 0, 1, 2))
        return torch.unsqueeze(tensorX, 0)
    

In [8]:
#  Neural Network Architecture

class AutoEncoder4(nn.Module):
    def __init__(self,channels=[7, 8, 16, 24, 30] ):
        super(AutoEncoder4, self).__init__()
        self.mpool = nn.MaxPool2d((2, 2))
        self.upsamp = nn.Upsample(scale_factor=(2, 2))
        self.mpoolodd = nn.MaxPool2d((3, 2))
        self.upsampodd = nn.Upsample(scale_factor=(3, 2))
        
        self.sigmoid, self.relu = nn.Sigmoid(), nn.ReLU()
        
        self.conv1  = nn.Conv2d(in_channels=channels[0], out_channels=channels[1], kernel_size=(3, 3), padding=1)
        self.bn1    = nn.BatchNorm2d(num_features=channels[1])
        self.conv2 = nn.Conv2d(in_channels=channels[1], out_channels=channels[2], kernel_size=(3, 3), padding=1)
        self.bn2 = nn.BatchNorm2d(num_features=channels[2])
        self.conv3 = nn.Conv2d(in_channels=channels[2], out_channels=channels[3], kernel_size=(3, 3), padding=1)
        self.bn3 = nn.BatchNorm2d(num_features=channels[3])
        self.conv4 = nn.Conv2d(in_channels=channels[3], out_channels=channels[4], kernel_size=(3, 3), padding=1)
        self.bn4 = nn.BatchNorm2d(num_features=channels[4])
        
        self.deconv4 = nn.ConvTranspose2d(in_channels=channels[4], out_channels=channels[3], kernel_size=(3, 3), padding=1 )
        self.rbn4 = nn.BatchNorm2d(num_features=channels[3])
        self.deconv3 = nn.ConvTranspose2d(in_channels=channels[3], out_channels=channels[2], kernel_size=(3, 3), padding=1 )
        self.rbn3 = nn.BatchNorm2d(num_features=channels[2])
        self.deconv2 = nn.ConvTranspose2d(in_channels=channels[2], out_channels=channels[1], kernel_size=(3, 3), padding=1 )
        self.rbn2 = nn.BatchNorm2d(num_features=channels[1])
        self.deconv1 = nn.ConvTranspose2d(in_channels=channels[1], out_channels=channels[0], kernel_size=(3, 3), padding=1 )
        self.rbn1 = nn.BatchNorm2d(num_features=channels[0])
        
        torch.nn.init.xavier_uniform_(self.conv1.weight)
        torch.nn.init.xavier_uniform_(self.conv2.weight)
        torch.nn.init.xavier_uniform_(self.conv3.weight)
        torch.nn.init.xavier_uniform_(self.conv4.weight)
        torch.nn.init.xavier_uniform_(self.deconv1.weight)
        torch.nn.init.xavier_uniform_(self.deconv2.weight)
        torch.nn.init.xavier_uniform_(self.deconv3.weight)
        torch.nn.init.xavier_uniform_(self.deconv4.weight)
        
    def forward(self, in_batch):

        encoded  = self.relu( self.bn1( self.conv1(in_batch) ) )
        
        encoded2 = self.mpool(encoded)
        encoded2 = self.relu( self.bn2( self.conv2(encoded2 ) ) )
        
        encoded3 = self.mpoolodd(encoded2)
        encoded3 = self.relu( self.bn3(self.conv3(encoded3 ) ) )
        
        encoded4 = self.relu( self.bn4(self.conv4(encoded3 ) ) )
        
        decoded4 = self.relu( self.rbn4( self.deconv4(encoded4) ) + encoded3 )
        decoded4 = self.upsampodd(decoded4)
        
        decoded3 = self.relu( self.rbn3( self.deconv3(decoded4) ) + encoded2 )
        decoded3 = self.upsamp(decoded3)
        
        decoded2 = self.relu( self.rbn2( self.deconv2(decoded3) ) + encoded )
        decoded2 = self.relu(decoded2)
        
        return self.relu( self.rbn1( self.deconv1(decoded2) ) + in_batch )

class DenoiserStackConv(nn.Module):
    def __init__(self,channels=[7,8,16,24,30]):
        super(DenoiserStackConv, self).__init__()
        
        self.sigmoid = nn.Sigmoid()
        self.relu    = nn.ReLU()
        self.mpool3d = nn.MaxPool3d((2, 2, 2))
        
        self.globalmpool1 = nn.MaxPool3d( (11, 1, 1) )
        self.globalmpool2 = nn.MaxPool3d( (4, 1, 1) )
        
        self.upsample     = nn.Upsample(scale_factor= (2, 2) )
        
        self.conv3d1      = nn.Conv3d(3, 6, (3, 3, 3), padding=(0, 1, 1))
        self.conv3d2      = nn.Conv3d(6, 8, (3, 3, 3), padding=(0, 1, 1))

        self.parNet       = AutoEncoder4( [8, 16, 30, 36, 64 ])

        self.conv1        = nn.Conv2d(in_channels=8, out_channels=6, kernel_size=(3, 3),padding=1)
        self.conv2        = nn.Conv2d(in_channels=6, out_channels=3, kernel_size=(3, 3),padding=1)
        self.sigmoid      = nn.Sigmoid()
        
        
        torch.nn.init.xavier_uniform_(self.conv3d1.weight)
        torch.nn.init.xavier_uniform_(self.conv3d2.weight)
        torch.nn.init.xavier_uniform_(self.conv1.weight)
        torch.nn.init.xavier_uniform_(self.conv2.weight)
        
    def forward(self, ip):
        
#         temp = self.globalmpool1(ip)
#         temp = temp.view( (-1, 3, 360, 640))
        
        temp = self.relu( self.conv3d1(ip) )
        out  = self.mpool3d( temp  )
        #print(ip.shape, temp.shape, out.shape)
    
        temp1 = out
#         temp1 = self.globalmpool2(out)
#         temp1 = temp1.view( (-1, 6, 180, 320) )

        out = self.mpool3d( self.relu( self.conv3d2(out) ) )
        out = out.view((-1,8,90,160))

        out = self.parNet(out)
        
        out = self.upsample(out) 
        out = self.relu( self.conv1(out) + temp1[:, :, int(temp1.shape[2]/2), :, :] )

        out = self.upsample(out) + temp[:, :, int(temp.shape[2]/2), :, :]
        out = self.conv2(out) 

        return self.sigmoid( out )

In [11]:
# Scripts to train, predict and a pipeline to convert original video to denoised video

def train(model, dataset, optimizer=None, lossFunction=None, numEpochs=10, val_split=0.05, savePath=''):
    
    if optimizer == None or lossFunction == None:
        raise Exception('Missing Parameters')
    
    loaderObject = dataset #DataLoader(device)
    
    trainLosses, valLosses = [], []
    for epoch in range(numEpochs):
        
        model.train()
        videoLosses = []
        for offset in tq.tqdm(range(100)):
            for index in range(0, 45, 5):
                xTrain, yTrain= loaderObject.loadBatch( (index, index + 5 ), offset)
                optimizer.zero_grad()
                yhat = model(xTrain)
                currloss = lossFunction(yTrain, yhat)
                videoLosses.append(currloss.item())
                
                currloss.backward()
                torch.nn.utils.clip_grad_value_(model.parameters(), 1)
                optimizer.step()

        epochTrainLoss = np.mean(videoLosses)
        torch.save(model.state_dict(), savePath + '/save_' + str(10 + epoch) + '_' + str(np.round(epochTrainLoss, decimals=4)) + '.pt')
        trainLosses.append( epochTrainLoss )
                
                 
        model.eval()
        videoLosses = []
        for offset in tq.tqdm(range(100)):
            for index in range(45, 50, 5):
                xVal, yVal = loaderObject.loadBatch( (index, index + 5 ), offset)
                with torch.no_grad():
                    yhat = model(xTrain)
                    currloss = lossFunction(yhat, yTrain)
                    videoLosses.append(currloss.item())
        valLosses.append( np.mean(videoLosses) )
        print('Epoch ', str(epoch), ' completed : TrainLoss/ValLoss', 
             np.round(trainLosses[-1], 6),'/', np.round(valLosses[-1], 6) )
            
    return trainLosses, valLosses

def predict(model, inputLocation, outputLocation=None):
    import os
    try:
        os.makedirs(outputLocation)
    except:
        pass
    tobj = TestLoader(inputLocation, device)
    frameCount = tobj.getCountOfFrames()
    output = []
    
    start = time.time()
    for i in range(frameCount):
        model.eval()
        with torch.no_grad():
            output.append( reformatTensor2Image( torch.squeeze(model(tobj.loadUnit(i) )) )  )
    end = time.time()
    
    
    if outputLocation != None:
        for i in range(frameCount):
            cv2.imwrite(outputLocation + '/' + str(i).zfill(5) + '.jpg', 
                        cv2.resize( cv2.cvtColor(output[i]*255, cv2.COLOR_RGB2BGR ), (1280, 720)) )
    print("Denoising Complete, Time Taken : ", end-start)
    return output

def pipeLinePredictor(model, inputLocation, outputLocation):
    getFrames(inputLocation, 'temporaryFrames')
    predict(model, 'temporaryFrames', 'temporaryOutFrames')
    makeVideo('temporaryOutFrames', outputLocation)
    
    print("Generating Video. Cleaning Files")
    shutil.rmtree('temporaryFrames')
    shutil.rmtree('temporaryOutFrames')

In [12]:
# Load Information of the directories stored in the csv files

dataDirectory = 'DeepVideoDeblurring_Dataset/quantitative_datasets/'
dataInfo = pd.read_csv(dataDirectory + 'Info.csv', index_col=0)
testDirectory = 'DeepVideoDeblurring_Dataset/qualitative_datasets/'
testInfo = pd.read_csv(testDirectory + 'Info.csv', index_col=0) 

In [13]:
# load the dataset in the memory,
# Currently the loader is set to load all videos in 360p resolution in RGB color
# Alter the Dataloader as required

dobj = DataLoader(device)
dobj.loadVideo(dataDirectory, dataInfo, 1)

In [5]:
# Initialise the model and load a saved weight
model = DenoiserStackConv().to(device)
model.load_state_dict(torch.load( 'con3dWeight.pt') )

<All keys matched successfully>

In [14]:
# Load a batch to test if the model is working

x, y = dobj.loadBatch((0, 5), 0)
ypred = model(x)
ypred.shape

In [23]:
# Initialise the loss functions and optimizers

lossfn = nn.MSELoss()
optSGD = torch.optim.SGD(model.parameters(), lr=0.0001, weight_decay=0.1)
optRMS = torch.optim.RMSprop(model.parameters(), lr=0.0001)
optAdam = torch.optim.Adam(model.parameters(), lr = 0.001)

In [None]:
# Train, change optimizer and epochs as required, can write your own training script
# Alter the training script above to adjust the validation and training sizes.

history = train(model, 
                dobj, 
                optimizer=optSGD, 
                lossFunction=lossfn, 
                numEpochs=5, 
                savePath = 'stackConv')

In [None]:
# Predicitng and saving the images for qualitiative dataset from Deep Video Deblurring Dataset

for dr in tq.tqdm( testInfo['dirname'] ):
    predict(model, testDirectory + dr + '/input', testDirectory + dr + '/conv3d')

In [None]:
printDiagram(model, x, 'stackConv.jpg')

In [None]:
# Can convert a noised video to clear video
pipeLinePredictor(model, 'input.mp4', 'out.mp4')