<h1>"Louis, I think this is the beginning of a beautiful friendship."</h1>

In [1]:
import numpy as np
import pandas as pd
import glob, os
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from os import listdir
from os.path import isfile, join
from PIL import Image
from sklearn.preprocessing import StandardScaler
import matplotlib.colors as mcolors

In [2]:
#Reading data
print("Reading data...")
data_filenames = []
for root, dirs, files in os.walk('data/raw_images/'):  
    for filename in files:
        data_filenames.append(filename)

data = [np.array(Image.open('data/raw_images/' + filename)) for filename in data_filenames]
print("Number of raw images: \t", end="")
print(len(data))

print("\nReading annotated images of segmentation...")
annot_filenames = []
for root, dirs, files in os.walk('data/class_color/'):  
    for filename in files:
        annot_filenames.append(filename)
        
annot = [np.array(Image.open('data/class_color/' + filename)) for filename in annot_filenames ]
print("Number of annotated images: \t", end="")
print(len(annot))
if len(data)==len(annot):
    print("\nAll raw images are annotated.\n")

#Splitting data into train-validation-test parts with ratios 70-20-10
print("Splitting data into training data, validation data, test data")
nb_samples=len(data_filenames)
#Splitting ratios:
valid_split = 0.2
test_split = 0.1
train_split = 0.7
print("The ratios are: ")
print("\t train:\t", train_split )
print("\t validation::\t",valid_split )
print("\t test:\t",test_split)
    
#Splitting
data_train = np.array(data[0:int(nb_samples*(1-valid_split-test_split))])
annot_train = np.array(annot[0:int(nb_samples*(1-valid_split-test_split))])
data_valid = data[int(nb_samples*(1-valid_split-test_split)):int(nb_samples*(1-test_split))]
annot_valid = annot[int(nb_samples*(1-valid_split-test_split)):int(nb_samples*(1-test_split))]
data_test  = data[int(nb_samples*(1-test_split)):]
annot_test  = annot[int(nb_samples*(1-test_split)):]

#Separation of axes (RGB channels)
red_train = []
green_train = []
blue_train = []
for img in data_train:
    image = np.array(img.ravel(), dtype='float64')
    red_train.append(image[0::3])
    green_train.append(image[1::3])
    blue_train.append(image[2::3])


#Standardizing
scaler = StandardScaler()

scaler.fit(np.reshape(red_train, (-1, 1)))
red_std = scaler.transform(red_train)

scaler.fit(np.reshape(green_train, (-1,1)))
green_std = scaler.transform(green_train)

scaler.fit(np.reshape(blue_train, (-1,1)))
blue_std = scaler.transform(blue_train)

print("\nNumber of training samples:\t", len(data_train))
print("Number of validation samples:\t", len(data_valid))
print("Number of test samples:\t", len(data_test))

Reading data...
Number of raw images: 	100

Reading annotated images of segmentation...
Number of annotated images: 	100

All raw images are annotated.

Splitting data into training data, validation data, test data
The ratios are: 
	 train:	 0.7
	 validation::	 0.2
	 test:	 0.1

Standardized data:
Red:
[[ 0.70883338  0.75526445  0.77074147 ... -1.0865011  -1.11745515
  -1.13293217]
 [-0.99363897 -1.009116   -1.05554706 ... -0.82339174 -0.88529982
  -0.93173089]
 [-1.78296707 -1.78296707 -1.79844409 ... -0.73052961 -0.68409855
  -0.6531445 ]
 ...
 [-0.52932833 -0.49837429 -0.46742025 ...  1.45173041  1.45173041
   1.45173041]
 [-0.48289727 -0.48289727 -0.48289727 ...  1.54459254  1.54459254
   1.54459254]
 [-0.54480535 -0.54480535 -0.54480535 ...  1.62197764  1.60650062
   1.5910236 ]]

Green:
[[ 7.46722452e-01  7.90557744e-01  8.05169507e-01 ... -1.16741860e+00
  -1.19664213e+00 -1.21125390e+00]
 [-1.07974802e+00 -1.09435979e+00 -1.13819508e+00 ... -1.06513626e+00
  -1.12358331e+00 -1.

In [None]:
import tensorflow as tf
import cv2
import json
from keras.backend.tensorflow_backend import set_session
from keras.utils.np_utils import to_categorical
from keras.applications import imagenet_utils
from tensorflow.python.client import device_lib
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
set_session(tf.Session(config=config))
# Device check
print(device_lib.list_local_devices())

  from ._conv import register_converters as _register_converters


In [None]:
def preprocess_input(x):
    return imagenet_utils.preprocess_input(x, mode='tf')

In [None]:
def data_generator(images, labels, batch_size=32, dim=(1024, 2048), n_classes=34, shuffle=True):
    # Initialization
    data_size = len(images)
    nbatches = data_size // batch_size
    list_IDs = np.arange(data_size)
    indices = list_IDs
    
    # Data generation
    while True:
        if shuffle == True:
            np.random.shuffle(indices)
        for index in range(nbatches):
            batch_indices = indices[index*batch_size:(index+1)*batch_size]

            X = np.empty((batch_size, *dim, 3))
            y_semseg = np.empty((batch_size, *dim), dtype=int)

            for i, ID in enumerate(batch_indices):
                image = cv2.resize(np.array(imageio.imread(images[ID]), dtype=np.uint8), dim[1::-1])
                label = cv2.resize(imageio.imread(labels[ID]), dim[1::-1], interpolation=cv2.INTER_NEAREST)
                X[i,] = image
                y_semseg[i] = label
            yield (preprocess_input(X), to_categorical(y_semseg, num_classes=n_classes))