# LIBRARIES

In [None]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.layers import Flatten, Dense
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam, SGD
from tensorflow.keras.preprocessing import image
from imageio import imread
from skimage.transform import resize

# DIFFERENT SIZED OBJECTS

In [None]:
ch = image.load_img('charmander-tight.png')
POKE_DIM      = 200 # Size of image 
ch            = np.array(ch)
CH_H, CH_W, _ = ch.shape

plt.imshow(ch)
plt.show()

In [None]:
def pokemon_generator_with_resize(batch_size=64):
    # Generate image and targets
    while True:
        # Each epoch will have 50 batches
        for _ in range(50):
            X = np.zeros(shape=(batch_size, POKE_DIM, POKE_DIM, 3))
            Y = np.zeros(shape=(batch_size, 4))
            
            # Make the boxes and store their location in target
            for i in range(batch_size):
                # Resize img - make it bigger or smaller
                scale = 0.5 + np.random.random() # [0.5 -> 1.5]
                new_height = int(CH_H * scale)
                new_width  = int(CH_W * scale)
                obj = resize(image=ch, output_shape=(new_height, new_width),
                            preserve_range=True).astype(np.uint) # Keep 0 -> 255
                
                # Choose location & store in target
                row0 = np.random.randint(POKE_DIM - new_height)
                col0 = np.random.randint(POKE_DIM - new_width)
                row1 = row0 + new_height
                col1 = col0 + new_width
                
                # Images
                X[i, row0:row1, col0:col1, :] = obj[:, :, :3]
                # Targets
                Y[i, 0] = row0 / POKE_DIM           # Top left
                Y[i, 1] = col0 / POKE_DIM           # Top left
                Y[i, 2] = (row1 - row0) / POKE_DIM  # Height
                Y[i, 3] = (col1 - col0) / POKE_DIM  # Width
            
            yield X / 255., Y

In [None]:
def make_model2():
    vgg   = tf.keras.applications.VGG16(input_shape=[POKE_DIM, POKE_DIM, 3],
                                        include_top=False, weights='imagenet')
    x     = Flatten()(vgg.output)
    x     = Dense(units=4, activation='sigmoid')(x)
    model = Model(vgg.input, x)
    model.compile(loss='binary_crossentropy', optimizer=Adam(learning_rate=0.0001))
    
    return model

model = make_model2()
model.fit(x=pokemon_generator_with_resize(), steps_per_epoch=50, epochs=5)

In [None]:
from matplotlib.patches import Rectangle

# Make predictions with resize
def make_predictions_with_resize():
    scale = 0.5 + np.random.random() # [0.5 -> 1.5]
    new_height = int(CH_H * scale)
    new_width  = int(CH_W * scale)
    obj = resize(image=ch, output_shape=(new_height, new_width),
                preserve_range=True).astype(np.unit8) # Keep 0 -> 255
    
    # Generate a random image
    x    = np.zeros(shape=(POKE_DIM, POKE_DIM, 3))
    row0 = np.random.randint(POKE_DIM - new_height)
    col0 = np.random.randint(POKE_DIM - new_width)
    row1 = row0 + new_height
    col1 = col0 + new_width
    x[row0:row1, col0:col1, :] = obj[:, :, :3]
    
    # Predict
    X = np.expand_dims(x, 0) / 255.
    p = model.predict(X)[0]
    
    # Calculate target/loss
    y = np.zeros(4)
    y[0] = row0 / POKE_DIM           # Top left
    y[1] = col0 / POKE_DIM           # Top left
    y[2] = (row1 - row0) / POKE_DIM  # Height
    y[3] = (col1 - col0) / POKE_DIM  # Width

    # Draw the box
    row0 = int(p[0] * POKE_DIM)
    col0 = int(p[1] * POKE_DIM)
    row1 = int(row0 + p[2] * POKE_DIM)
    col1 = int(col0 + p[3] * POKE_DIM)
    
    print(f'Pred: {(row0, col0, row1, col1)}')
    print(f'Loss: {-np.mean(y * np.log(p) + (1 - y) * np.log(1 - p))}')
    
    fig, ax = plt.subplots(1)
    ax.imshow(x.astype(np.unit8))   
    # Top left, Width, Height
    rect = Rectangle((p[1] * POKE_DIM, p[0] * POKE_DIM), p[3] * POKE_DIM, p[2] * POKE_DIM, 
                        linewidth=1, edgecolor='r', facecolor='none')
    ax.add_patch(rect)
    plt.show()

In [None]:
make_predictions_with_resize()

# FLIPPED OBJECTS

In [None]:
POKE_DIM      = 200 # Size of image 
ch            = np.array(ch)
CH_H, CH_W, _ = ch.shape

def pokemon_generator_flip(batch_size=64):
    # Generate image and targets
    while True:
        # Each epoch will have 50 batches
        for _ in range(50):
            X = np.zeros(shape=(batch_size, POKE_DIM, POKE_DIM, 3))
            Y = np.zeros(shape=(batch_size, 4))
            
            # Make the boxes and store their location in target
            for i in range(batch_size):
                row0 = np.random.randint(POKE_DIM - CH_H)
                col0 = np.random.randint(POKE_DIM - CH_W)
                row1 = row0 + CH_H
                col1 = col0 + CH_W
                
                # Maybe flip
                if (np.random.random() < 0.5):
                    obj = np.fliplr(ch)
                else:
                    obj = ch
                
                # Images
                X[i, row0:row1, col0:col1, :] = obj[:, :, :3]
                
                # Targets
                Y[i, 0] = row0 / POKE_DIM           # Top left
                Y[i, 1] = col0 / POKE_DIM           # Top left
                Y[i, 2] = (row1 - row0) / POKE_DIM  # Height
                Y[i, 3] = (col1 - col0) / POKE_DIM  # Width
            
            yield X / 255., Y

In [None]:
def make_model():
    vgg   = tf.keras.applications.VGG16(input_shape=[POKE_DIM, POKE_DIM, 3],
                                        include_top=False, weights='imagenet')
    x     = Flatten()(vgg.output)
    x     = Dense(units=4, activation='sigmoid')(x)
    model = Model(vgg.input, x)
    model.compile(loss='binary_crossentropy', optimizer=Adam(learning_rate=0.0001))
    
    return model

model = make_model()
model.fit(x=pokemon_generator_flip(), steps_per_epoch=50, epochs=5)

In [None]:
from matplotlib.patches import Rectangle

# Make predictions
def make_predictions_flip():
    # Generate a random image
    x    = np.zeros(shape=(POKE_DIM, POKE_DIM, 3))
    row0 = np.random.randint(POKE_DIM - CH_H)
    col0 = np.random.randint(POKE_DIM - CH_W)
    row1 = row0 + CH_H
    col1 = col0 + CH_W
    
    # Maybe flip
    if (np.random.random() < 0.5):
        obj = np.fliplr(ch)
    else:
        obj = ch
    
    x[row0:row1, col0:col1, :] = obj[:, :, :3]
    
    # Predict
    X = np.expand_dims(x, 0) / 255.
    p = model.predict(X)[0]
    
    # Calculate target/loss
    y = np.zeros(4)
    y[0] = row0 / POKE_DIM           # Top left
    y[1] = col0 / POKE_DIM           # Top left
    y[2] = (row1 - row0) / POKE_DIM  # Height
    y[3] = (col1 - col0) / POKE_DIM  # Width

    # Draw the box
    row0 = int(p[0] * POKE_DIM)
    col0 = int(p[1] * POKE_DIM)
    row1 = int(row0 + p[2] * POKE_DIM)
    col1 = int(col0 + p[3] * POKE_DIM)
    
    print(f'Pred: {(row0, col0, row1, col1)}')
    print(f'Loss: {-np.mean(y * np.log(p) + (1 - y) * np.log(1 - p))}')
    
    fig, ax = plt.subplots(1)
    ax.imshow(x.astype(np.unit8))   
    # Top left, Width, Height
    rect = Rectangle((p[1] * POKE_DIM, p[0] * POKE_DIM), p[3] * POKE_DIM, p[2] * POKE_DIM, 
                        linewidth=1, edgecolor='r', facecolor='none')
    ax.add_patch(rect)
    plt.show()

In [None]:
make_predictions_flip()

# ADD ACTUAL BACKGROUND

In [None]:
# !wget -nc https://lazyprogrammer.me/course_files/backgrounds.zip
# !unzip -n backgrounds.zip

In [None]:
from glob import glob
backgrounds = []

background_files = glob(r'C:\Users\Doan Trong Hieu\Downloads\IMPORTANT\SPECIALIZATION\Artificial_Intelligence\COMPUTER VISION\CODING_COMPUTER_VISION\UDM_Lazy_Programmer_Inc_De_Le_Ad_Co_Vi\backgrounds\*.jpg')
for f in background_files:
    # They may not all be the same size
    bg = np.array(image.load_img(f))
    backgrounds.append(bg)

In [None]:
plt.imshow(backgrounds[3])

In [None]:
def pokemon_generator_bg(batch_size=64):
    # Generate image and targets
    while True:
        # Each epoch will have 50 batches
        for _ in range(50):
            X = np.zeros(shape=(batch_size, POKE_DIM, POKE_DIM, 3))
            Y = np.zeros(shape=(batch_size, 4))
            
            # Make the boxes and store their location in target
            for i in range(batch_size):
                # Select a random background
                bg_idx        = np.random.choice(len(backgrounds))
                bg            = backgrounds[bg_idx]
                bg_h, bg_w, _ = bg.shape
                rnd_h         = np.random.randint(bg_h - POKE_DIM)
                rnd_w         = np.random.randint(bg_w - POKE_DIM)
                X[i]          = bg[rnd_h:rnd_h+POKE_DIM, rnd_w:rnd_w+POKE_DIM].copy()
                
                # Resize - Bigger of Smaller
                scale = 0.5 + np.random.random() # [0.5 -> 1.5]
                new_height = int(CH_H * scale)
                new_width  = int(CH_W * scale)
                obj = resize(image=ch, output_shape=(new_height, new_width),
                            preserve_range=True).astype(np.uint8) # Keep 0 -> 255
                
                # Maybe flip
                if (np.random.random() < 0.5):
                    obj = np.fliplr(obj)
                
                # Choose a random location to store the object
                row0       = np.random.randint(POKE_DIM - new_height)
                col0       = np.random.randint(POKE_DIM - new_width)
                row1       = row0 + new_height
                col1       = col0 + new_width
                
                # Can't just assign obj to a slice of X since the transparent parts will be black (0)
                mask      = (obj[:, :, 3] == 0)                 # Find where the pokemon is 0
                bg_slice  = X[i, row0:row1, col0:col1, :]       # Where we want to place `obj`
                bg_slice  = np.expand_dims(mask, -1) * bg_slice # (h, w, 1) x (h, w, 3)
                bg_slice += obj[:, :, :3]                       # `Add` the pokemon to the slice
                X[i, row0:row1, col0:col1, :] = bg_slice        # Put the slice back
                
                # Targets
                Y[i, 0] = row0 / POKE_DIM           # Top left
                Y[i, 1] = col0 / POKE_DIM           # Top left
                Y[i, 2] = (row1 - row0) / POKE_DIM  # Height
                Y[i, 3] = (col1 - col0) / POKE_DIM  # Width
            
            yield X / 255., Y

In [None]:
xx = None
yy = None
for x, y in pokemon_generator_bg():
    xx, yy = x, y
    break
plt.imshow(xx[5]);

In [None]:
model = make_model2()
model.fit(pokemon_generator_bg(), steps_per_epoch=50, epochs=5)

In [None]:
from matplotlib.patches import Rectangle

# Make predictions
def make_predictions_flip():
    # Generate a random image
    x    = np.zeros(shape=(POKE_DIM, POKE_DIM, 3))
    row0 = np.random.randint(POKE_DIM - CH_H)
    col0 = np.random.randint(POKE_DIM - CH_W)
    row1 = row0 + CH_H
    col1 = col0 + CH_W
    
    # Maybe flip
    if (np.random.random() < 0.5):
        obj = np.fliplr(ch)
    else:
        obj = ch
    
    x[row0:row1, col0:col1, :] = obj[:, :, :3]
    
    # Predict
    X = np.expand_dims(x, 0) / 255.
    p = model.predict(X)[0]
    
    # Calculate target/loss
    y = np.zeros(4)
    y[0] = row0 / POKE_DIM           # Top left
    y[1] = col0 / POKE_DIM           # Top left
    y[2] = (row1 - row0) / POKE_DIM  # Height
    y[3] = (col1 - col0) / POKE_DIM  # Width

    # Draw the box
    row0 = int(p[0] * POKE_DIM)
    col0 = int(p[1] * POKE_DIM)
    row1 = int(row0 + p[2] * POKE_DIM)
    col1 = int(col0 + p[3] * POKE_DIM)
    
    print(f'Pred: {(row0, col0, row1, col1)}')
    print(f'Loss: {-np.mean(y * np.log(p) + (1 - y) * np.log(1 - p))}')
    
    fig, ax = plt.subplots(1)
    ax.imshow(x.astype(np.unit8))   
    # Top left, Width, Height
    rect = Rectangle((p[1] * POKE_DIM, p[0] * POKE_DIM), p[3] * POKE_DIM, p[2] * POKE_DIM, 
                        linewidth=1, edgecolor='r', facecolor='none')
    ax.add_patch(rect)
    plt.show()

In [None]:
make_predictions_flip()