# Description

This preprocessing file works in 2 sections. It first normalizes the pictures into 256x256 images and generates the random pixel clicks for the foreground and background. Second, it then saves the generated foreground and background density map pictures in the foreground and background folders. Additionally, I save the foreground and background pixel cliks into a pickle file that can be easily downloaded. 

In [58]:
import glob
import cv2
import numpy as np
import random
import pickle

# Normalize Car Images

In [59]:
# SAVING CAR IMAGES

cars = glob.glob('Car Images/cars/*.png')

print(len(cars))

197


In [60]:
image_cars = []
#image_cars.append(cv2.imread(cars[0]))
for x in range(len(cars)):
    image_cars.append(cv2.imread(cars[x]))

image_cars = np.array(image_cars)
print(np.shape(image_cars))
print(image_cars[0].shape)

(197, 480, 640, 3)
(480, 640, 3)


In [61]:
width = 256
height = 256

dim = (width, height)
  
# resize image
resized_cars = []
for x in range(len(image_cars)):
    resized_cars.append(cv2.resize(image_cars[x], dim, interpolation = cv2.INTER_AREA))

In [62]:
print(np.shape(resized_cars))

(197, 256, 256, 3)


In [63]:
# THiS CELL CONVERTS THE SHAPE OF THE CARS TO INPUT CHANNELS FIRST
#for i in range(len(resized_cars)):
#    resized_cars[i] = np.moveaxis(resized_cars[i], -1, 0)   # hopefully work

print(np.shape(resized_cars))
print(np.shape(resized_cars[0]))
print(np.shape(resized_cars[1]))

(197, 256, 256, 3)
(256, 256, 3)
(256, 256, 3)


In [64]:
print(cars[0][16:])
print(cars[10][16:])

1.png
108.png


In [65]:
#SAVING RESIZED CAR IMAGES INTO FOLDER
for i in range(len(resized_cars)):
    carLoc = './normalizedCarImages/'+ str(cars[i][16:]) 
    cv2.imwrite(carLoc, resized_cars[i])

# Generate Foreground and Background Clicks

## Density Map Subsection

This is the Create Density Map function that takes in the number of foreground clicks, background clicks, and the mask size. Also, a side note, in this method, it assumes you take in a 3 channle input mask image; however, the mask channels are all assumed to be the same which is why only the first channel is taken at index 0 in the second dimension. (channel, r, c). Channel is always zero basically

In [66]:
# I'm assuming mask is a 3 color channel input image array/list, but all channels are the same value
def createDensity(foreClick, backClick, mask, densitySize):
    
    total  = np.zeros((len(mask[0][0]), len(mask[0][0])))
    for i in range(len(mask[0][0])):
        for j in range(len(mask[0][0])):
            a = (mask[0][i][j])
            if a > 0:
                total[i][j] = 1
    
    x = [0] * foreClick
    y = [0] * foreClick
    count = 0
    found = False
    while (found == False): 
        x[count] = random.randint(0, len(mask[0][0])-1)
        y[count] = random.randint(0, len(mask[0][0])-1)
        
        #print(x[count])
        #print(y[count])
        
        if (total[x[count]][y[count]] >= 1):
            count+=1
        if (count >= foreClick):
            found = True
    
    # Foreground pixel Locations
    foreground = np.concatenate(([x], [y]), axis=0)
    
    x = [0] * backClick
    y = [0] * backClick
    count = 0
    found = False
    while (found == False): 
        x[count] = random.randint(0, len(mask[0][0])-1)
        y[count] = random.randint(0, len(mask[0][0])-1)
        
        #print(x[count])
        #print(y[count])
        
        if (total[x[count]][y[count]] == 0):
            count+=1
        if (count >= backClick):
            found = True
    
    # Background pixel locations
    background = np.concatenate(([x], [y]), axis=0)
    
    print(foreground)
    print(background)
    
    # Density Maps
    foreDensMap = np.zeros((len(mask[0][0]), len(mask[0][0])))
    backDensMap = np.zeros((len(mask[0][0]), len(mask[0][0])))
    for i in range(len(mask[0][0])):
        for j in range(len(mask[0][0])):
            place = 0
            distance = ((i-foreground[0][0])**2+(j-foreground[1][0])**2)**.5
            for point in range(1, foreClick):
                a = ((i-foreground[0][point])**2+(j-foreground[1][point])**2)**.5
                if (a < distance):
                    distance = a
                    place = point
            #Z = 0.0
            #normalized = (((i-300)**2+(j-300)**2)**.5)
            Z = distance * densitySize
            #print(Z)
            #print(type(Z))
            #print(foreground[0][place])
            foreDensMap[i][j] = Z
    
    for i in range(len(mask[0][0])):
        for j in range(len(mask[0][0])):
            place = 0
            distance = ((i-background[0][0])**2+(j-background[1][0])**2)**.5
            for point in range(1, backClick):
                a = ((i-background[0][point])**2+(j-background[1][point])**2)**.5
                if (a < distance):
                    distance = a
                    place = point
            Z = distance * densitySize
            backDensMap[i][j] = Z
    
    
    return foreDensMap, backDensMap, foreground, background

## Total Mask Pixel Count

This method returns the total number of pixels that are in the activated mask divided by the total number of pixels in the mask immage. Assumes 3 channel input, but all channels are the same. This is to be used in the density map function to create proportional density maps to the size of the total pixel area in the mask.

ACTUALLY, THIS IS NOT GOOD BECAUSE IN PRACTICE YOU DO NOT KNOW THE TOTAL SIZE OF THE MASK. BETTER TRAINING ACCURACY BUT LOWER TESTING ACCURACY FOR SURE.

In [67]:
def maskPixelFraction(mask):
    totalMaskPixel = 0
    for i in range(len(mask[0][0])):
        for j in range(en(mask[0][0])):
            if mask[0][i][j] > 0:
                totalMaskPixel += 1
    totalPixel = len(mask[0][0])**2
    return totalMaskPixel / totalPixel
    

## Mask Normalization Subsection

This section basically reads in the masks and normalizes them.

In [68]:
masks = glob.glob('Car Images/masks/*.png')

print(len(masks))

197


In [69]:
image_masks = []
#image_masks.append(cv2.imread(masks[0]))
for x in range(len(masks)):
    image_masks.append(cv2.imread(masks[x]))

image_masks = np.array(image_masks)
print(np.shape(image_masks))
print(image_masks[0].shape)

(197, 480, 640, 3)
(480, 640, 3)


In [70]:
width = 256
height = 256

dim = (width, height)
  
# resize mask
resized_masks = []
for x in range(len(image_masks)):
    resized_masks.append(cv2.resize(image_masks[x], dim, interpolation = cv2.INTER_AREA))

In [71]:
print(np.shape(resized_masks))
#print(np.shape(image_masks[196, 300:, 300:]))
#print(image_masks[196, 300:, 300:])

(197, 256, 256, 3)


In [72]:
for i in range(len(resized_masks)):
    resized_masks[i] = np.moveaxis(resized_masks[i], -1, 0)   # hopefully works

print(np.shape(resized_masks))
print(np.shape(resized_masks[0]))
print(np.shape(resized_masks[1]))

(197, 3, 256, 256)
(3, 256, 256)
(3, 256, 256)


In [73]:
print(type(resized_masks[0]))

<class 'numpy.ndarray'>


In [74]:
# Saving the MASKS array to be used later into PICKLE

file_nameMasks = "Masks.pkl"

open_file = open(file_nameMasks, "wb")
pickle.dump(resized_masks, open_file)
open_file.close()

## Foreground and Background- Density Map and Click Array Generation Subsection

This section creates the randomly genereated foreground density maps, background density maps, foreground clicks, background clicks,

In [75]:
print(len(resized_masks))

197


In [None]:
foregroundArray = []
backgroundArray = []
foregroundClicks = []
backgroundClicks = []

x = random.randint(1, 3)
y = random.randint(1, 3)
print(len(resized_masks))
for i in range(len(resized_masks)):
    print("iteration: ", i + 1)
    a, b, c, d = (createDensity(x, y, resized_masks[i], 2))
    foregroundArray.append(a)
    backgroundArray.append(b)
    foregroundClicks.append(c)
    backgroundClicks.append(d)
    x = random.randint(1, 3)
    y = random.randint(1, 3)

In [None]:
# Saving the density maps into the foreground and background folders respectively

for i in range(len(backgroundArray)):
    fg = './foreground/fg'+ str(masks[i][17:]) 
    bg = './background/bg' + str(cars[i][16:]) 
    cv2.imwrite(fg, foregroundArray[i])
    cv2.imwrite(bg, backgroundArray[i])


In [None]:
# Saving the foreground and background click array to be used later into PICKLE

file_name1 = "fg_randomclicks.pkl"
file_name2 = "bg_randomclicks.pkl"

open_file = open(file_name1, "wb")
pickle.dump(foregroundClicks, open_file)
open_file.close()

open_file = open(file_name2, "wb")
pickle.dump(backgroundClicks, open_file)
open_file.close()

# Saving the UNET Array (3 car dimensions + fg & bg dimensions)

In [None]:
# THiS CELL CONVERTS THE SHAPE OF THE CARS TO INPUT CHANNELS FIRST
for i in range(len(resized_cars)):
    resized_cars[i] = np.moveaxis(resized_cars[i], -1, 0)   # hopefully work

print(type(resized_cars))
#print(masks)
#print(cars)
print(np.shape(resized_cars))
# Thank god the masks and cars were read in order ! ! !

In [None]:
print(np.shape(foregroundArray))

In [None]:
foregroundExpand = np.expand_dims(foregroundArray, axis=1)

In [None]:
print(np.shape(foregroundExpand))

In [None]:
backgroundExpand = np.expand_dims(backgroundArray, axis=1)

In [None]:
print(np.shape(backgroundExpand))

In [None]:
unetArray = np.concatenate((resized_cars, foregroundExpand, backgroundExpand), axis=1)

In [None]:
print(np.shape(unetArray))

In [None]:
c = unetArray[0]
print(np.shape(c))

In [None]:
# Saving the UNET array to be used later into PICKLE

file_nameUnet = "UnetArray.pkl"

open_file = open(file_nameUnet, "wb")
pickle.dump(unetArray, open_file)
open_file.close()