In [5]:
%matplotlib inline

import numpy as np
import cv2
import matplotlib.pyplot as plt
import transformations as t
from mpl_toolkits.mplot3d import Axes3D
import os
import threading

def show_cv2(img):
    # swap b and r channels
    b, g, r = cv2.split(img)
    img = cv2.merge([r,g,b])

    plt.figure(figsize=(15, 15))
    plt.imshow(img)
    plt.show()

def plot_histogram(img):
    color = ('b','g','r')
    for i,col in enumerate(color):
        histr = cv2.calcHist([img],[i],None,[256],[0,256])
        plt.plot(histr,color = col)
        plt.xlim([0,256])
    plt.show()
    
def add_grayscale_noise(img, std_dev):
    noise = np.expand_dims(np.random.normal(0, std_dev, size=img.shape[:2]).astype(np.int8), axis=2)
    img = np.clip(img.astype(np.int16) + noise, 0, 255).astype(np.uint8)
    return img

def add_colored_noise(img, std_dev):
    noise = np.random.normal(0, std_dev, size=img.shape).astype(np.int8)
    img = np.clip(img.astype(np.int16) + noise, 0, 255).astype(np.uint8)
    return img


In [6]:
def listfiles(path):
    return [os.path.join(dp, f) for dp, dn, fn in os.walk(path) for f in fn]
tag_urls = listfiles("./tags")
scene_urls = listfiles("./places")

print(tag_urls[0], scene_urls[0])

./tags/tag36_11_00376.png ./places/Places365_val_00001162.jpg


In [7]:
# Populate some camera intrinsic settings
# And other constants
# To do: capitolize these variables names idk how to find and replace in this notebook

#camera_intrinsics = [1.1998640834468974e+03, 0, 640.0 / 2, 0, 1.1998640834468974e+03, 480.0 / 2, 0, 0, 1]
camera_intrinsics = [1473.967474, 0.000000, 569.329599, 0.000000, 1473.916904, 379.610277, 0.000000, 0.000000, 1.000000]
camera_intrinsics = np.array(camera_intrinsics).reshape([3, 3])

fx = camera_intrinsics[0,0]
fy = camera_intrinsics[1,1]
cx = camera_intrinsics[0,2]
cy = camera_intrinsics[1,2]

tag_size = 0.1

print(camera_intrinsics)

[[1.47396747e+03 0.00000000e+00 5.69329599e+02]
 [0.00000000e+00 1.47391690e+03 3.79610277e+02]
 [0.00000000e+00 0.00000000e+00 1.00000000e+00]]


In [15]:
def gen_sample(plot=False, low_res=False):
    #img_scene = np.random.randint(0, 255, size=(768, 1024, 3)).astype(np.uint8)
    white_background = False
    if np.random.randint(20) == 1:
        # Eliminate tag borders sometime to prevent overfitting
        white_background = True

    if not white_background:
        img_scene = cv2.resize(cv2.imread(np.random.choice(scene_urls)), (1280, 720), interpolation=cv2.INTER_NEAREST)

    img_tag = cv2.resize(cv2.imread(np.random.choice(tag_urls)), (200, 200), interpolation=cv2.INTER_NEAREST)

    world_corners = np.array([[-1, -1], [1, -1], [-1, 1], [1, 1]]) * tag_size / 2.0
    pixel_corners = np.array([[0, 0], [1, 0], [0, 1], [1, 1]]) * np.array(img_tag.shape)[:2]

    # Add a Z component and homogeneous coordinate
    corners_3d = np.hstack((world_corners, np.array([[0, 1]] * 4)))

    done = False
    attempts = 0
    while not done:
        # Apply a random rotation to our corners
        angle_lim = np.pi / 2 * 0.7
        #rotation_matrix = t.rotation_matrix(np.random.uniform(-angle_lim, angle_lim), [1, 0, 0])
        #rotation_matrix = rotation_matrix @ t.rotation_matrix(np.random.uniform(-angle_lim, angle_lim), [0, 1, 0])
        rand_vector = t.random_vector(3)
        rand_vector[2]= 0
        rand_vector /= np.linalg.norm(rand_vector)
        rotation_matrix = t.rotation_matrix(np.random.uniform(-angle_lim, angle_lim), rand_vector)
        #rotation_matrix = rotation_matrix @ t.rotation_matrix(np.random.uniform(-np.pi, np.pi), [0, 0, 1])
        corners_3d_rotated = (rotation_matrix @ corners_3d.T).T

        # Translate our corners to a random 3D point within our camera view
        #z = np.random.triangular(0.2, 0.2, 5)
        z = np.random.uniform(0.2, 3.5)
        x = np.random.uniform(-1, 1) * z / fx * cx
        y = np.random.uniform(-1, 1) * z / fy * cy
        translation = np.array([x, y, z])
        translation_matrix = t.translation_matrix(translation)
        corners_3d_transformed = (translation_matrix @ corners_3d_rotated.T).T

        # Project into 2D image space
        projected_transformed = camera_intrinsics @ corners_3d_transformed.T[:3]
        projected_transformed /= projected_transformed[2]
 
        projected_transformed = np.vstack((projected_transformed[0], projected_transformed[1]))
        # print(projected_transformed)

        done = True
        attempts += 1
        for (x, y) in projected_transformed.T:
            # Reject views that have tag corners chopped off
            if x < 0 or y < 0 or x > 1280 or y > 720:
                done = False

    center_x, center_y = np.mean(projected_transformed, axis=1)
    width = np.max(projected_transformed[0]) - np.min(projected_transformed[0])
    height = np.max(projected_transformed[1]) - np.min(projected_transformed[1])

    # print(attempts, "attempt(s)")

    # Compute a homography
    H = cv2.findHomography(pixel_corners, projected_transformed.T)[0]

    # Random lighting condition
    dynamic_range = np.random.uniform(0.4, 1.0)
    color_shift =  (1.0 - dynamic_range) * np.random.uniform(0, 255) + np.random.normal(0.0, 10, size=3)

    img_tag_lighting = img_tag * dynamic_range
    img_tag_lighting += color_shift
    img_tag_lighting = np.clip(img_tag_lighting, 0.0, 255.0).astype(np.uint8)
    
    img_tag_blank = np.zeros(img_tag.shape) + 255.0 * dynamic_range
    img_tag_blank += color_shift
    img_tag_blank = np.clip(img_tag_blank, 0.0, 255.0).astype(np.uint8)
    
    if white_background:
        img_scene = np.clip(np.zeros((720, 1280, 3)) + 255.0 * dynamic_range + color_shift, 0.0, 255.0).astype(np.uint8)
        img_scene = add_colored_noise(img_scene, 4)
        img_scene = add_grayscale_noise(img_scene, 10)
    
    # Some noise
    img_tag_filtered = cv2.GaussianBlur(img_tag_lighting, (3, 3), 0)
    img_tag_filtered = add_colored_noise(img_tag_filtered, 4)
    img_tag_filtered = add_grayscale_noise(img_tag_filtered, 10)
    img_tag_blank = cv2.GaussianBlur(img_tag_blank, (3, 3), 0)
    img_tag_blank = add_colored_noise(img_tag_blank, 4)
    img_tag_blank = add_grayscale_noise(img_tag_blank, 10)
    
    # Overlay warped image
    img_scene_with_tag = np.array(img_scene)
    cv2.warpPerspective(img_tag_filtered, H, dsize=img_scene.shape[:2][::-1], dst=img_scene_with_tag, borderMode=cv2.BORDER_TRANSPARENT)
    
    if np.random.randint(3) == 1:
        # put a blank rectangle on the "to tag" scene for some extra overfitting protection
        cv2.warpPerspective(img_tag_blank, H, dsize=img_scene.shape[:2][::-1], dst=img_scene, borderMode=cv2.BORDER_TRANSPARENT)
    blur_size = np.random.randint(4) * 2 + 1
    img_scene_with_tag = cv2.GaussianBlur(img_scene_with_tag, (blur_size, blur_size), 0)
    img_scene = cv2.GaussianBlur(img_scene, (blur_size, blur_size), 0)

    # Bias our tag's color towards the image average color
    img_tag_filtered = (img_tag_filtered * 49.0 / 50.0 + (np.mean(img_scene,axis=(0,1)) / 50.0).astype(np.int16)).astype(np.uint8)

    # Plot?
    if plot:
        show_cv2(img_tag_filtered)
        
        show_cv2(img_tag_blank)
        
#         # Visualize in 3D
#         fig = plt.figure()
#         ax = fig.add_subplot(111, projection='3d')

#         xs = corners_3d.T[0]
#         ys = corners_3d.T[1]
#         zs = corners_3d.T[2]
#         ax.scatter(xs, ys, zs)

#         xs = corners_3d_transformed.T[0]
#         ys = corners_3d_transformed.T[1]
#         zs = corners_3d_transformed.T[2]
#         ax.scatter(xs, ys, zs)

#         # Visualize in 3D
#         plt.figure()
#         plt.ylim(0, cy * 2)
#         plt.xlim(0, cx * 2)

        #xs = projected_orig[0]
        #ys = projected_orig[1]
        #plt.scatter(xs, ys)
#         xs = projected_transformed[0]
#         ys = projected_transformed[1]
#         plt.plot(xs, ys)

#         plt.show()
        
        # Visualize the scene + histograms
#         plot_histogram(img_scene)
        show_cv2(img_scene)

#         plot_histogram(img_scene_with_tag)
        show_cv2(img_scene_with_tag)
        
        show_cv2(cv2.resize(img_scene_with_tag, (640, 360), interpolation=cv2.INTER_NEAREST))
    
    width += 5
    height += 5
    if low_res:
        resize = lambda x: cv2.resize(x, (640, 360), interpolation=cv2.INTER_NEAREST)
        img_scene, img_scene_with_tag = resize(img_scene), resize(img_scene_with_tag)
        center_x /= 2
        width /= 2
        center_y /= 2
        height /= 2
    return img_scene, img_scene_with_tag, center_x, center_y, width, height

# gen_sample(False, True)
# pass

In [34]:
# Test for generating training sample

fd = open("generated_data_labels.csv", "w")
fd.write("filename,width,height,class,xmin,ymin,xmax,ymin\r\n")
fd.close()

for count in range(10000):
    img_scene, img_scene_with_tag, center_x, center_y, bbox_width, bbox_height = gen_sample()

    # train_labels.csv
    # columns as filename,width,height,class,xmin,ymin,xmax,ymin

    filename = "generated_data/sample_{}.jpg".format(count)
    width = img_scene_with_tag.shape[1]
    height = img_scene_with_tag.shape[0]
    class_label = "tag"
    min_x = int(center_x - bbox_width / 2)
    min_y = int(center_y - bbox_height / 2)
    max_x = int(center_x + bbox_width / 2)
    max_y = int(center_y + bbox_height / 2)

    elements = [filename, width, height, class_label, min_x, min_y, max_x, max_y]
    row = ",".join([str(x) for x in elements])
    fd = open("generated_data_labels.csv","a")
    fd.write(row + "\r\n")
    fd.close()

    # cv2.rectangle(img_scene_with_tag, (min_x, min_y), (max_x, max_y), (0, 0, 255), thickness=2)
    # show_cv2(img_scene_with_tag)

    cv2.imwrite(filename, img_scene_with_tag, [int(cv2.IMWRITE_JPEG_QUALITY), 90])

KeyboardInterrupt: 