# Building an Image Generator

This notebook goes over how to create a data generator that reads in a section of a dataset and trains the model sequentially. 

TensorFlow wasn't loading properly on my Windows computer, so I followed this link to create a new environment to run tensorflow things: https://medium.com/@mengjiunchiou/how-to-set-keras-with-tensorflow-with-conda-virtual-environment-on-ubuntu-5d56d22e3dc7

In [1]:
from numpy.random import seed
seed(1)
from tensorflow import set_random_seed
set_random_seed(2)

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [2]:
import matplotlib
matplotlib.use("Agg")
 
# import the necessary packages
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import tensorflow as tf
from skimage import io, filters, measure
from scipy import ndimage
from keras.models import Sequential
import cv2
from glob import glob
from sklearn.model_selection import train_test_split
from PIL import Image # for conversion to grayscale

Using TensorFlow backend.


In [3]:
# Data have been pre-processed already. The raw images were converted
# to grayscale and were split into 25 smaller images. Labeled images
# were created from the original images and are a series of 0 (no 
# person) and 255 (a person). The labeled images were also split
# into 25 pieces so that they would match the original images. 

# Find location of image files and labeled images
data = glob('data/raw/resized/with_people/splits/*hsv*.png')
labels = glob('data/processed/dots/with_people/splits/*.png')

# Split into the training and testing data
train_X, test_X = train_test_split(data, test_size=0.25, random_state=33)
train_Y, test_Y = train_test_split(labels, test_size=0.25, random_state=33)

In [4]:
# Create function to make a data generator for the model
def gather_images(images, labels, batch_size=10): 
    """ Takes the original and labeled images, combines them into np """
    """ arrays, and passes to model"""
    while 1: 
        for offset in range(0, len(images), batch_size): 
            X = [] # empty list for training data
            Y = [] # empty list for labels 
            for img in images[offset:offset+batch_size]: # for each image in the list
                img_temp = cv2.imread(img)
                #img_temp = Image.open(img).convert('LA')
                #img_temp = cv2.cvtColor(img_temp, cv2.COLOR_BGR2HSV)
                img_flatten = np.array(img_temp) # create np array
                X.append(img_flatten) # and add to list for X
            for lab in labels[offset:offset+batch_size]: # for each label in the list
                label_temp = io.imread(lab, as_gray=True)
                labels_temp = measure.label(label_temp)
                label_flatten = labels_temp.max() # create np array
                Y.append(label_flatten) # and add to list for y
            yield (np.array(X), np.array(Y).reshape(len(Y),1)) # yield X and y for the model

## Batch Size = 30, Conv # 1 = 5x5 with 3x3 pooling, Conv # 2 = 5x5 with 3x3 pooling

In [6]:
batch_size = 30

# Set up Convolutional Network
model = tf.keras.models.Sequential([
  tf.keras.layers.Conv2D(8, (5,5), padding="same",input_shape=(108, 192, 3), activation='relu'),
  tf.keras.layers.MaxPooling2D(pool_size=(3,3)),
  tf.keras.layers.Conv2D(16, (5,5), padding="same", activation='relu'),
  tf.keras.layers.MaxPooling2D(pool_size=(3,3)),
  tf.keras.layers.Conv2D(32, (2,2), padding="same", activation='relu'),
  tf.keras.layers.MaxPooling2D(pool_size=(2,2)),
  tf.keras.layers.Conv2D(64, (2,2), padding="same", activation='relu'),
  tf.keras.layers.MaxPooling2D(pool_size=(2,2)),
  tf.keras.layers.Flatten(),
  tf.keras.layers.Dense(512, activation='relu'),
  tf.keras.layers.Dropout(0.2),
  tf.keras.layers.Dense(128, activation='relu'),
  tf.keras.layers.Dropout(0.2),
  tf.keras.layers.Dense(1, activation='linear'),
])

model.compile(optimizer='adam',
              loss='mse',
              metrics=['mean_squared_error', 'mean_absolute_error'])

model.fit_generator(gather_images(train_X, train_Y, batch_size=batch_size),
                    steps_per_epoch = len(train_X)//batch_size, epochs=10)

Instructions for updating:
keep_dims is deprecated, use keepdims instead
Epoch 1/10

Epoch 2/10

Epoch 3/10

Epoch 4/10

Epoch 5/10

Epoch 6/10

Epoch 7/10

Epoch 8/10

Epoch 9/10

Epoch 10/10



<tensorflow.python.keras._impl.keras.callbacks.History at 0x244f5669a58>

## Batch Size = 30, Conv # 1 = 5x5 with 3x3 pooling, Conv # 2 = 5x5 with 3x3 pooling, Conv # 3 = 5x5 with 3x3 pooling

In [5]:
batch_size = 30

# Set up Convolutional Network
model = tf.keras.models.Sequential([
  tf.keras.layers.Conv2D(8, (5,5), padding="same",input_shape=(108, 192, 3), activation='relu'),
  tf.keras.layers.MaxPooling2D(pool_size=(3,3)),
  tf.keras.layers.Conv2D(16, (5,5), padding="same", activation='relu'),
  tf.keras.layers.MaxPooling2D(pool_size=(3,3)),
  tf.keras.layers.Conv2D(16, (5,5), padding="same", activation='relu'),
  tf.keras.layers.MaxPooling2D(pool_size=(3,3)),
  tf.keras.layers.Conv2D(32, (2,2), padding="same", activation='relu'),
  tf.keras.layers.MaxPooling2D(pool_size=(2,2)),
  tf.keras.layers.Conv2D(64, (2,2), padding="same", activation='relu'),
  tf.keras.layers.MaxPooling2D(pool_size=(2,2)),
  tf.keras.layers.Flatten(),
  tf.keras.layers.Dense(512, activation='relu'),
  tf.keras.layers.Dropout(0.2),
  tf.keras.layers.Dense(128, activation='relu'),
  tf.keras.layers.Dropout(0.2),
  tf.keras.layers.Dense(1, activation='linear'),
])

model.compile(optimizer='adam',
              loss='mse',
              metrics=['mean_squared_error', 'mean_absolute_error'])

model.fit_generator(gather_images(train_X, train_Y, batch_size=batch_size),
                    steps_per_epoch = len(train_X)//batch_size, epochs=10)

Instructions for updating:
keep_dims is deprecated, use keepdims instead
Epoch 1/10

Epoch 2/10

Epoch 3/10

Epoch 4/10

Epoch 5/10

Epoch 6/10

Epoch 7/10

Epoch 8/10

Epoch 9/10

Epoch 10/10



<tensorflow.python.keras._impl.keras.callbacks.History at 0x2a71adabac8>

## Batch Size = 50, Conv # 1 = 5x5 with 3x3 pooling, Conv # 2 = 5x5 with 3x3 pooling, Conv # 3 = 5x5 with 3x3 pooling

In [6]:
batch_size = 50

# Set up Convolutional Network
model = tf.keras.models.Sequential([
  tf.keras.layers.Conv2D(8, (5,5), padding="same",input_shape=(108, 192, 3), activation='relu'),
  tf.keras.layers.MaxPooling2D(pool_size=(3,3)),
  tf.keras.layers.Conv2D(16, (5,5), padding="same", activation='relu'),
  tf.keras.layers.MaxPooling2D(pool_size=(3,3)),
  tf.keras.layers.Conv2D(16, (5,5), padding="same", activation='relu'),
  tf.keras.layers.MaxPooling2D(pool_size=(3,3)),
  tf.keras.layers.Conv2D(32, (2,2), padding="same", activation='relu'),
  tf.keras.layers.MaxPooling2D(pool_size=(2,2)),
  tf.keras.layers.Conv2D(64, (2,2), padding="same", activation='relu'),
  tf.keras.layers.MaxPooling2D(pool_size=(2,2)),
  tf.keras.layers.Flatten(),
  tf.keras.layers.Dense(512, activation='relu'),
  tf.keras.layers.Dropout(0.2),
  tf.keras.layers.Dense(128, activation='relu'),
  tf.keras.layers.Dropout(0.2),
  tf.keras.layers.Dense(1, activation='linear'),
])

model.compile(optimizer='adam',
              loss='mse',
              metrics=['mean_squared_error', 'mean_absolute_error'])

model.fit_generator(gather_images(train_X, train_Y, batch_size=batch_size),
                    steps_per_epoch = len(train_X)//batch_size, epochs=10)

Epoch 1/10

Epoch 2/10

Epoch 3/10

Epoch 4/10

Epoch 5/10

Epoch 6/10

Epoch 7/10

Epoch 8/10

Epoch 9/10

Epoch 10/10



<tensorflow.python.keras._impl.keras.callbacks.History at 0x2a71ce77390>

## Batch Size = 50, Conv # 1 = 5x5 with 3x3 pooling, Conv # 2 = 5x5 with 3x3 pooling

In [7]:
batch_size = 50

# Set up Convolutional Network
model = tf.keras.models.Sequential([
  tf.keras.layers.Conv2D(8, (5,5), padding="same",input_shape=(108, 192, 3), activation='relu'),
  tf.keras.layers.MaxPooling2D(pool_size=(3,3)),
  tf.keras.layers.Conv2D(16, (5,5), padding="same", activation='relu'),
  tf.keras.layers.MaxPooling2D(pool_size=(3,3)),
  tf.keras.layers.Conv2D(32, (2,2), padding="same", activation='relu'),
  tf.keras.layers.MaxPooling2D(pool_size=(2,2)),
  tf.keras.layers.Conv2D(64, (2,2), padding="same", activation='relu'),
  tf.keras.layers.MaxPooling2D(pool_size=(2,2)),
  tf.keras.layers.Flatten(),
  tf.keras.layers.Dense(512, activation='relu'),
  tf.keras.layers.Dropout(0.2),
  tf.keras.layers.Dense(128, activation='relu'),
  tf.keras.layers.Dropout(0.2),
  tf.keras.layers.Dense(1, activation='linear'),
])

model.compile(optimizer='adam',
              loss='mse',
              metrics=['mean_squared_error', 'mean_absolute_error'])

model.fit_generator(gather_images(train_X, train_Y, batch_size=batch_size),
                    steps_per_epoch = len(train_X)//batch_size, epochs=10)

Epoch 1/10

Epoch 2/10

Epoch 3/10

Epoch 4/10

Epoch 5/10

Epoch 6/10

Epoch 7/10

Epoch 8/10

Epoch 9/10

Epoch 10/10



<tensorflow.python.keras._impl.keras.callbacks.History at 0x244879e7748>

## Batch Size = 30, Conv # 1 = 10x10 with 5x5 pooling, Conv # 2 = 5x5 with 3x3 pooling

In [8]:
batch_size = 30

# Set up Convolutional Network
model = tf.keras.models.Sequential([
  tf.keras.layers.Conv2D(8, (10,10), padding="same",input_shape=(108, 192, 3), activation='relu'),
  tf.keras.layers.MaxPooling2D(pool_size=(5,5)),
  tf.keras.layers.Conv2D(16, (5,5), padding="same", activation='relu'),
  tf.keras.layers.MaxPooling2D(pool_size=(3,3)),
  tf.keras.layers.Conv2D(32, (2,2), padding="same", activation='relu'),
  tf.keras.layers.MaxPooling2D(pool_size=(2,2)),
  tf.keras.layers.Conv2D(64, (2,2), padding="same", activation='relu'),
  tf.keras.layers.MaxPooling2D(pool_size=(2,2)),
  tf.keras.layers.Flatten(),
  tf.keras.layers.Dense(512, activation='relu'),
  tf.keras.layers.Dropout(0.2),
  tf.keras.layers.Dense(128, activation='relu'),
  tf.keras.layers.Dropout(0.2),
  tf.keras.layers.Dense(1, activation='linear'),
])

model.compile(optimizer='adam',
              loss='mse',
              metrics=['mean_squared_error', 'mean_absolute_error'])

model.fit_generator(gather_images(train_X, train_Y, batch_size=batch_size),
                    steps_per_epoch = len(train_X)//batch_size, epochs=10)

Epoch 1/10

Epoch 2/10

Epoch 3/10

Epoch 4/10

Epoch 5/10

Epoch 6/10

Epoch 7/10

Epoch 8/10

Epoch 9/10

Epoch 10/10



<tensorflow.python.keras._impl.keras.callbacks.History at 0x24488a30240>

## Batch Size = 30, Conv # 1 = 10x10 with 5x5 pooling, Conv # 2 = 10x10 with 5x5 pooling

In [9]:
batch_size = 30

# Set up Convolutional Network
model = tf.keras.models.Sequential([
  tf.keras.layers.Conv2D(8, (10,10), padding="same",input_shape=(108, 192, 3), activation='relu'),
  tf.keras.layers.MaxPooling2D(pool_size=(5,5)),
  tf.keras.layers.Conv2D(16, (10,10), padding="same", activation='relu'),
  tf.keras.layers.MaxPooling2D(pool_size=(5,5)),
  tf.keras.layers.Conv2D(32, (2,2), padding="same", activation='relu'),
  tf.keras.layers.MaxPooling2D(pool_size=(2,2)),
  tf.keras.layers.Conv2D(64, (2,2), padding="same", activation='relu'),
  tf.keras.layers.MaxPooling2D(pool_size=(2,2)),
  tf.keras.layers.Flatten(),
  tf.keras.layers.Dense(512, activation='relu'),
  tf.keras.layers.Dropout(0.2),
  tf.keras.layers.Dense(128, activation='relu'),
  tf.keras.layers.Dropout(0.2),
  tf.keras.layers.Dense(1, activation='linear'),
])

model.compile(optimizer='adam',
              loss='mse',
              metrics=['mean_squared_error', 'mean_absolute_error'])

model.fit_generator(gather_images(train_X, train_Y, batch_size=batch_size),
                    steps_per_epoch = len(train_X)//batch_size, epochs=10)

Epoch 1/10

Epoch 2/10

Epoch 3/10

Epoch 4/10

Epoch 5/10

Epoch 6/10

Epoch 7/10

Epoch 8/10

Epoch 9/10

Epoch 10/10



<tensorflow.python.keras._impl.keras.callbacks.History at 0x24487b2e358>

## Batch Size = 50, Conv # 1 = 10x10 with 5x5 pooling, Conv # 2 = 10x10 with 5x5 pooling

In [10]:
batch_size = 50

# Set up Convolutional Network
model = tf.keras.models.Sequential([
  tf.keras.layers.Conv2D(8, (10,10), padding="same",input_shape=(108, 192, 3), activation='relu'),
  tf.keras.layers.MaxPooling2D(pool_size=(5,5)),
  tf.keras.layers.Conv2D(16, (10,10), padding="same", activation='relu'),
  tf.keras.layers.MaxPooling2D(pool_size=(5,5)),
  tf.keras.layers.Conv2D(32, (2,2), padding="same", activation='relu'),
  tf.keras.layers.MaxPooling2D(pool_size=(2,2)),
  tf.keras.layers.Conv2D(64, (2,2), padding="same", activation='relu'),
  tf.keras.layers.MaxPooling2D(pool_size=(2,2)),
  tf.keras.layers.Flatten(),
  tf.keras.layers.Dense(512, activation='relu'),
  tf.keras.layers.Dropout(0.2),
  tf.keras.layers.Dense(128, activation='relu'),
  tf.keras.layers.Dropout(0.2),
  tf.keras.layers.Dense(1, activation='linear'),
])

model.compile(optimizer='adam',
              loss='mse',
              metrics=['mean_squared_error', 'mean_absolute_error'])

model.fit_generator(gather_images(train_X, train_Y, batch_size=batch_size),
                    steps_per_epoch = len(train_X)//batch_size, epochs=10)

Epoch 1/10

Epoch 2/10

Epoch 3/10

Epoch 4/10

Epoch 5/10

Epoch 6/10

Epoch 7/10

Epoch 8/10

Epoch 9/10

Epoch 10/10



<tensorflow.python.keras._impl.keras.callbacks.History at 0x24487b92908>

## Batch Size = 30, Conv # 1 = 10x10 with 5x5 pooling, Conv # 2 = 10x10 with 5x5 pooling, Conv # 3 = 5x5 with 2x2 pooling

In [11]:
batch_size = 30

# Set up Convolutional Network
model = tf.keras.models.Sequential([
  tf.keras.layers.Conv2D(8, (10,10), padding="same",input_shape=(108, 192, 3), activation='relu'),
  tf.keras.layers.MaxPooling2D(pool_size=(5,5)),
  tf.keras.layers.Conv2D(16, (10,10), padding="same", activation='relu'),
  tf.keras.layers.MaxPooling2D(pool_size=(5,5)),
  tf.keras.layers.Conv2D(32, (5,5), padding="same", activation='relu'),
  tf.keras.layers.MaxPooling2D(pool_size=(2,2)),
  tf.keras.layers.Conv2D(64, (2,2), padding="same", activation='relu'),
  tf.keras.layers.MaxPooling2D(pool_size=(2,2)),
  tf.keras.layers.Flatten(),
  tf.keras.layers.Dense(512, activation='relu'),
  tf.keras.layers.Dropout(0.2),
  tf.keras.layers.Dense(128, activation='relu'),
  tf.keras.layers.Dropout(0.2),
  tf.keras.layers.Dense(1, activation='linear'),
])

model.compile(optimizer='adam',
              loss='mse',
              metrics=['mean_squared_error', 'mean_absolute_error'])

model.fit_generator(gather_images(train_X, train_Y, batch_size=batch_size),
                    steps_per_epoch = len(train_X)//batch_size, epochs=10)

Epoch 1/10

Epoch 2/10

Epoch 3/10

Epoch 4/10

Epoch 5/10

Epoch 6/10

Epoch 7/10

Epoch 8/10

Epoch 9/10

Epoch 10/10



<tensorflow.python.keras._impl.keras.callbacks.History at 0x24487c28908>

In [143]:
predictions = model.predict_generator(gather_images(test_X, test_Y, batch_size=batch_size), len(test_X)//batch_size)
print(predictions[0:10])

[[0.06639786]
 [0.597787  ]
 [0.05902122]
 [0.04196331]
 [0.04343889]
 [0.25613928]
 [0.04196331]
 [0.12044369]
 [0.04196331]
 [0.04298855]]


In [125]:
test_labels = next(gather_images(test_X, test_Y, batch_size=len(test_X)))[1]
print(test_labels[0:10])

[[0]
 [1]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]]
