## Mount GoogleDrive

In [1]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


## Parameters

In [2]:
# Dataset
dataset_folder_path = '/content/gdrive/MyDrive/deephomography/dataset/'
rho = 32
patch_size = 128
image_size = (320,240)

# Create required folder(s)
!mkdir -p {dataset_folder_path}

## Download and extract COCO 2014 dataset

In [3]:
!wget http://images.cocodataset.org/zips/train2014.zip
!wget http://images.cocodataset.org/zips/val2014.zip
!wget http://images.cocodataset.org/zips/test2014.zip

!echo "Extracting train2014.zip..."
!unzip -q train2014.zip

!echo "Extracting val2014.zip..."
!unzip -q val2014.zip

!echo "Extracting test2014.zip..."
!unzip -q test2014.zip

!echo "Deleting downloaded .zip files..."
!rm -f train2014.zip val2014.zip test2014.zip

--2022-07-10 15:11:32--  http://images.cocodataset.org/zips/train2014.zip
Resolving images.cocodataset.org (images.cocodataset.org)... 52.217.91.124
Connecting to images.cocodataset.org (images.cocodataset.org)|52.217.91.124|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 13510573713 (13G) [application/zip]
Saving to: ‘train2014.zip’


2022-07-10 15:16:39 (42.0 MB/s) - ‘train2014.zip’ saved [13510573713/13510573713]

--2022-07-10 15:16:39--  http://images.cocodataset.org/zips/val2014.zip
Resolving images.cocodataset.org (images.cocodataset.org)... 3.5.11.188
Connecting to images.cocodataset.org (images.cocodataset.org)|3.5.11.188|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 6645013297 (6.2G) [application/zip]
Saving to: ‘val2014.zip’


2022-07-10 15:19:14 (41.1 MB/s) - ‘val2014.zip’ saved [6645013297/6645013297]

--2022-07-10 15:19:14--  http://images.cocodataset.org/zips/test2014.zip
Resolving images.cocodataset.org (images.cocodata

## Data generation

In [4]:
import os
import cv2
import random
import numpy as np
from tqdm import tqdm

# Do not change these limits! They prevent bordering artifacts
tl_point_limits = [(rho,rho), (image_size[0]-patch_size-rho,image_size[1]-patch_size-rho)]

# Function to generate a single sample
def generate_sample(image_path):
    # Read image file (as grayscale)
    image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)

    # Resize image to image_size
    image = cv2.resize(image, image_size, interpolation=cv2.INTER_CUBIC)

    # Select a random patch
    tl_point = np.random.randint(tl_point_limits[0], tl_point_limits[1], size=2)
    tr_point = tl_point + (patch_size,0)
    br_point = tl_point + (patch_size,patch_size)
    bl_point = tl_point + (0,patch_size)
    patch_points = np.array([tl_point, tr_point, br_point, bl_point]);

    # Extract patch
    patch = image[tl_point[1]:br_point[1], tl_point[0]:br_point[0]]

    # Create a random perturbation (or H_fp)
    perturbation = np.random.randint(-rho, rho+1, size=(4,2))
    perturbed_patch_points = patch_points + perturbation

    # Find corresponding homography and its inverse
    H = cv2.getPerspectiveTransform(patch_points.astype(np.float32), perturbed_patch_points.astype(np.float32))
    H_inv = np.linalg.inv(H)

    # Apply inverse homography to image
    warped_image = cv2.warpPerspective(image, H_inv, image_size)

    # Extract perturbed patch
    perturbed_patch = warped_image[tl_point[1]:br_point[1], tl_point[0]:br_point[0]]

    # Return generated sample
    return patch, perturbed_patch, patch_points, perturbation

def generate_samples_from_folder(folder_path, num_samples=0):
    # Create target folder
    target_folder_path = folder_path + 'homography'
    os.makedirs(target_folder_path, exist_ok=True)

    print('Generating ' + target_folder_path + '...')

    # List folder contents
    file_list = os.listdir(folder_path)
    if num_samples > len(file_list):
        file_list = random.choices(file_list, k=num_samples)
    elif num_samples > 0:
        file_list = random.sample(file_list, k=num_samples)

    # Process files 
    for count,file in enumerate(tqdm(file_list)):
        file_path = os.path.join(folder_path, file)
        target_file_path = os.path.join(target_folder_path, f'{count:08d}')

        # Save generated sample
        np.save(target_file_path, np.array((generate_sample(file_path)), dtype=object))

## Process folders and generate new data for homography estimation

In [5]:
generate_samples_from_folder(folder_path='train2014', num_samples=500000)
generate_samples_from_folder(folder_path='val2014')
generate_samples_from_folder(folder_path='test2014', num_samples=5000)

!echo "Deleting dataset folders..."
!rm -dr train2014 val2014 test2014

Generating train2014homography...


100%|██████████| 500000/500000 [1:39:58<00:00, 83.35it/s]


Generating val2014homography...


100%|██████████| 40504/40504 [08:03<00:00, 83.73it/s]


Generating test2014homography...


100%|██████████| 5000/5000 [01:05<00:00, 76.87it/s]


Deleting dataset folders...


## Tarball generated datasets

In [6]:
!echo "Tarballing train2014homography..."
!tar -cf train2014homography.tar train2014homography

!echo "Tarballing val2014homography..."
!tar -cf val2014homography.tar val2014homography

!echo "Tarballing test2014homography..."
!tar -cf test2014homography.tar test2014homography

!echo "Deleting generated dataset folders..."
!rm -dr train2014homography val2014homography test2014homography

Tarballing train2014homography...
Tarballing val2014homography...
Tarballing test2014homography...
Deleting generated dataset folders...


## Move tarballs to GoogleDrive

In [7]:
!echo "Moving train2014homography.tar..."
!mv train2014homography.tar {dataset_folder_path}

!echo "Moving val2014homography.tar..."
!mv val2014homography.tar {dataset_folder_path}

!echo "Moving test2014homography.tar..."
!mv test2014homography.tar {dataset_folder_path}

Moving train2014homography.tar...
Moving val2014homography.tar...
Moving test2014homography.tar...


## Unmount GoogleDrive

In [8]:
drive.flush_and_unmount()