## Project 2: Black and White Colorization

### Section 1: Loading and modifying dataset

#### Helper functions

In [1]:
import cv2
import os
import glob
import torch
import numpy as np
from matplotlib import pyplot as plt
import torchvision.transforms as T

torch.set_default_tensor_type('torch.FloatTensor') #Sets default tensor value to float
 
def loadImgsToList():
    img_dir = "./AllImages/face_images/*.jpg" #Function which loads in the face images and returns a list containing RGB values, as well as the number of images read
    files = sorted(glob.glob(img_dir))

    data = []
    numImages = 0
    for f1 in files:
        image = cv2.imread(f1)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) #cv2 stores colors as BGR instead of RGB for some reason     
        data.append(image)
        numImages += 1 #probably unnecessary but I like to avoid using len() for arrays/lists of multiple dimensions
    
    return data, numImages #Capture return as array, number = loadImgsToList()
   
def createTensor(data, numImages): #Function to create an Tensor from a data list, data is unshuffled and unedited from raw
    shape = (numImages, 128, 128, 3) #Shape of the tensor = (number of images, height, width, number of channels), it was recommended online to do it in this order
    tempTensor = torch.zeros(shape) #Populate empty tensor of correct size

    count = 0
    for i in data:
        curData = torch.from_numpy(data[count]) #converts 
        tempTensor[count] = curData
        count += 1
    
    return tempTensor

def shuffleTensor(tempTensor, numImages):
    perm = torch.randperm(numImages) #Gives an array which is a permutation of the numbers [0:numImages] EG: 0, 1, 2, 3 can become 0, 3, 1, 2
    shape = (numImages, 128, 128, 3) 
    
    rgbTensor = torch.zeros(shape) #holder
    for i in perm: #set holder value to its permutated counterpart
        rgbTensor[i] = tempTensor[perm[i]]
    
    return rgbTensor

def randomCrop(inputTensor):
    length = np.random.randint(70,115) #crop size, minimum image size after crop is 70x70, max is 115x115, arbitrary and can/should be adjusted
    randCrop = T.RandomCrop(size = (length, length))
    resized = T.Resize(size = (128, 128)) #resizes images to 128x128
    tempTensor = inputTensor.permute(2, 0, 1) #because randomcrop doesn't like the 3 at the end
    tempTensor = resized(randCrop(tempTensor))
    tempTensor = tempTensor.permute(1, 2, 0) #fixing the other permute
    return tempTensor

def horizontalFlip(inputTensor):
    return torch.fliplr(inputTensor) #convenient built in

def scaleRGBValues(inputTensor):
    scalar = np.random.uniform(.6, 1)
    return inputTensor * scalar #This was way too complicated before

def populateAndScaleDataset(data, numImages, scaleFactor, saveTensor):
    myTensor = createTensor(data, numImages) #Create and shuffle a tensor from the loaded in data
    #myTensor = shuffleTensor(myTensor, numImages)
    
    newShape = (scaleFactor*numImages, 128, 128, 3) #Create a holder for the new, scaled Tensor
    newTensor = torch.zeros(newShape)
    
    count = 0 #horizontal counter
    for i in range(numImages): #for each data image
        for j in range(scaleFactor): #make {scaleFactor} more
            if j == 0: #Include the original
                newTensor[count] = myTensor[i]
            else: #else, randomly change the rest
                tempTensor = myTensor[i] #initialize to original
                
                val1 = np.random.uniform(0, 1) #rand val between 0 and 1
                if val1 >= .5: #half of the time, flip it
                    tempTensor = horizontalFlip(tempTensor)
                
                val2 = np.random.uniform(0, 1)
                if val2 >= .5: #half of the time, crop it
                    tempTensor = randomCrop(tempTensor)
                    
                val3 = np.random.uniform(0, 1)
                if val3 >= .5: #half of the time, scale it
                    tempTensor = scaleRGBValues(tempTensor)
            
                newTensor[count] = tempTensor #add it to the holder
            count += 1    #increment count
            
            #NOTE: obviously, the values for when to alter the new images can be adjusted. It's done this way so that multiple transformations can be done on one image
            # Has the flaw of sometimes repeating images, which may be bad for the dataset. Can be easily fixed by adding a default case at the end, but I have omitted that for now
            
    if saveTensor:
        x = "000000" 
        for i in range(len(newTensor)):
            filename = r"./AllImages/augmented/rgb_image" + x + ".jpg"
            x =  str(int(x) + 1).zfill(len(x)) #maintains 6 digits at end

            img = newTensor[i].numpy()
            cv2.cvtColor(img, cv2.COLOR_RGB2BGR) #convert to BGR so cv2 can save it
            s = cv2.imwrite(filename, img)

            if not s:
                print(s) #print failure

    return newTensor

def convertToLab(mytensor, saveTensor):
    holderTensor = torch.zeros(mytensor.shape) #holder tensor to return
    
    x = "000000" #used in filenames
    for i in range(len(mytensor)):
        test = cv2.cvtColor(mytensor[i].numpy()/255.0, cv2.COLOR_RGB2Lab) #actual conversion
        holderTensor[i] = torch.from_numpy(test) #save to holder
        
        if saveTensor:
            filenameL = r"./AllImages/L/L_image" + x + ".jpg" #initialize filenames
            filenameA = r"./AllImages/a/a_image" + x + ".jpg"
            filenameB = r"./AllImages/b/b_image" + x + ".jpg"
            x =  str(int(x) + 1).zfill(len(x)) #maintains 6 digits at end and increments x

            L, a, b = cv2.split(test) #split L, a, and b
            L_s = cv2.imwrite(filenameL, L) #and save them
            a_s = cv2.imwrite(filenameA, a)
            b_s = cv2.imwrite(filenameB, b)

            if not L_s or not a_s or not b_s :
                print(L_s, a_s, b_s) #print failure
            
    return holderTensor

def displayImage(data):
    image = data
    #Show the image with matplotlib
    plt.imshow(image/255)
    plt.show()

#### Call data initialization functions

In [2]:
#Initialize dataset, scale it
data, numImgs = loadImgsToList()
sf = 10

tensor = populateAndScaleDataset(data, numImgs, sf, False)

In [3]:
dataset = convertToLab(tensor, False) #convert color space (prev 2 functions also save files)

In [4]:
#dataset = shuffleTensor(dataset, numImgs*sf) #Shuffle after saving to preserve order in files (cataloging purposes)

### Regressor Section

In [102]:
import torch.nn as nn
import torch.nn.functional as F

def getMeanChrominance(a, b):
    aCount = 0
    bCount = 0
    num = 0
    
    a_ = a.numpy()
    b_ = b.numpy()
    
    for i in range(len(a_)):
        for j in range(len(a_[0])):
            aCount += a_[i][j][0]
            bCount += b_[i][j][0]
            num += 1
    return aCount/num, bCount/num

def splitChannelsAsTensors(dataset):
    L = torch.zeros((len(dataset), 128, 128))
    a = torch.zeros((len(dataset), 128, 128))
    b = torch.zeros((len(dataset), 128, 128))
    
    dataset = dataset.permute(0, 3, 1, 2)
    
    for i in range(len(dataset)):
        L[i] = dataset[i][0]/100
        a[i] = dataset[i][1]
        b[i] = dataset[i][2]
 
    L = L.reshape(len(dataset), 128, 128, 1)
    a = a.reshape(len(dataset), 128, 128, 1)
    b = b.reshape(len(dataset), 128, 128, 1)

    return L, a, b

def getAverageFromTensor(tensor):
    temp = torch.flatten(tensor.detach()).numpy()
        
    count = 0
    num = len(temp)
    for i in range(num):
        count += temp[i]
    
    avg = (100*count)/num
    return avg

def runNetwork(dataset):
    
    net = nn.Sequential(
            nn.Conv2d(1, 3, 3, stride=2, padding=1),
            nn.ReLU(),
            nn.Conv2d(3, 3, 3, stride=2, padding=1),
            nn.ReLU(),
            nn.Conv2d(3, 3, 3, stride=2, padding=1),
            nn.ReLU(),
            nn.Conv2d(3, 3, 3, stride=2, padding=1),
            nn.ReLU(),
            nn.Conv2d(3, 3, 3, stride=2, padding=1),
            nn.ReLU(),
            nn.Conv2d(3, 3, 3, stride=2, padding=1),
            nn.ReLU(),
            nn.Conv2d(3, 3, 3, stride=2, padding=1),
        )
    
    dataL, dataA, dataB = splitChannelsAsTensors(dataset)
   
    meanA, meanB = getMeanChrominance(dataA[0], dataB[0])
    print("Mean Chrominance from A = ", meanA)
    print("Mean Chrominance from B = ", meanB)
    
    dataL = dataL.reshape(7500, 1, 1, 128, 128)
    
    result = net(dataL[0])
    print("Predicted meagetAverageFromTensor(result))

In [101]:
print(net)



print(dataL.shape)

test = net(dataL[0])

print(test.shape)
print(test*100)

print(getAverageFromTensor(test*100))

Sequential(
  (0): Conv2d(1, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
  (1): ReLU()
  (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
  (3): ReLU()
  (4): Conv2d(64, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
  (5): ReLU()
  (6): Conv2d(32, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
  (7): ReLU()
  (8): Conv2d(64, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
  (9): ReLU()
  (10): Conv2d(64, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
  (11): ReLU()
  (12): Conv2d(64, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
)
11.730525016784668
9.50047779083252
torch.Size([7500, 128, 128, 1])
torch.Size([1, 64, 1, 1])
tensor([[[[ 4.0801]],

         [[ 2.2900]],

         [[ 2.2606]],

         [[-1.0132]],

         [[ 2.2332]],

         [[ 2.2591]],

         [[ 3.4208]],

         [[ 3.0341]],

         [[ 1.7964]],

         [[ 0.0516]],

         [[ 4.4976]],

         [[ 1.2477]],

         [[ 2.1847]