# Dependencies

In [1]:
from sys import path
path.append('src/')

import cv2
import numpy as np
import matplotlib.pyplot as plt
import pickle as pkl
from os.path import basename, splitext
from glob import glob
from src.utils import load
from src.mrk_file import MRKFile

%matplotlib inline

In [2]:
IMAGE_SIZE = (224, 224)

In [3]:
def load_image_and_mrk_files(image_folder, mrk_folder):
    image_files = glob(image_folder)
    mrk_files = glob(mrk_folder)
    assert(len(image_files) == len(mrk_files))

    file_name = lambda f: splitext(basename(f))[0]    
    for img, mrk in zip(image_files, mrk_files):
        assert(file_name(img) == file_name(mrk))
    return image_files, mrk_files

def load_mrk_objects(list_mrk_files):
    return np.array([MRKFile(mrk_file) for mrk_file in list_mrk_files])

# Data Loading 

In [None]:
train_image_files, train_mrk_files = load_image_and_mrk_files('data/train/images/*', 'data/train/ground_truth/*')

print(len(train_image_files))
print(len(train_mrk_files))

In [None]:
val_image_files, val_mrk_files = load_image_and_mrk_files('data/val/**/images/*', 'data/val/**/ground_truth/*')

print(len(val_image_files))
print(len(val_mrk_files))

In [None]:
x_train = load.images_from_list_files(train_image_files, output_size=IMAGE_SIZE, interpolation=cv2.INTER_AREA)
train_mrks = load_mrk_objects(train_mrk_files)

print(x_train.shape, x_train.dtype)
pkl.dump((train_image_files, x_train, train_mrks), open('data/train_dump.pkl', 'wb'), protocol=-1)

In [None]:
x_val = load.images_from_list_files(val_image_files, output_size=IMAGE_SIZE, interpolation=cv2.INTER_AREA)
val_mrks = load_mrk_objects(val_mrk_files)

print(x_val.shape, x_val.dtype)
pkl.dump((val_image_files, x_val, val_mrks), open('data/val_dump.pkl', 'wb'), protocol=-1)

# Autoencoder 

In [4]:
train_image_files, _ = load_image_and_mrk_files('data/train/images/*', 'data/train/ground_truth/*')
val_image_files, _ = load_image_and_mrk_files('data/val/**/images/*', 'data/val/**/ground_truth/*')

print(len(train_image_files))
print(len(val_image_files))

5211
571


In [5]:
x_train = load.images_from_list_files(train_image_files, output_size=(232, 232), interpolation=cv2.INTER_AREA)
x_val = load.images_from_list_files(val_image_files, output_size=(232, 232), interpolation=cv2.INTER_AREA)

print(x_train.shape, x_train.dtype)
print(x_val.shape, x_val.shape)

5211 of 5211
571 of 571
(5211, 232, 232, 3) float32
(571, 232, 232, 3) (571, 232, 232, 3)


In [6]:
pkl.dump((x_train, x_val), open('data/autoencoder_samples.pkl', 'wb'), protocol=-1)