# Building an Image Generator

This notebook goes over how to create a data generator that reads in a section of a dataset and trains the model sequentially. 

TensorFlow wasn't loading properly on my Windows computer, so I followed this link to create a new environment to run tensorflow things: https://medium.com/@mengjiunchiou/how-to-set-keras-with-tensorflow-with-conda-virtual-environment-on-ubuntu-5d56d22e3dc7

In [50]:
import matplotlib
matplotlib.use("Agg")
 
# import the necessary packages
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import tensorflow as tf
from skimage import io, filters, measure
from scipy import ndimage
from keras.models import Sequential
import cv2
from glob import glob
from sklearn.model_selection import train_test_split
from PIL import Image # for conversion to grayscale

In [126]:
# Data have been pre-processed already. The raw images were converted
# to grayscale and were split into 25 smaller images. Labeled images
# were created from the original images and are a series of 0 (no 
# person) and 255 (a person). The labeled images were also split
# into 25 pieces so that they would match the original images. 

# Find location of image files and labeled images
data = glob('data/raw/resized/with_people/splits/*full_color*.png')
labels = glob('data/processed/dots/with_people/splits/*.png')

# Split into the training and testing data
train_X, test_X = train_test_split(data, test_size=0.25, random_state=33)
train_Y, test_Y = train_test_split(labels, test_size=0.25, random_state=33)

In [113]:
# Create function to make a data generator for the model
def gather_images(images, labels, batch_size=10): 
    """ Takes the original and labeled images, combines them into np """
    """ arrays, and passes to model"""
    while 1: 
        for offset in range(0, len(images), batch_size): 
            X = [] # empty list for training data
            Y = [] # empty list for labels 
            for img in images[offset:offset+batch_size]: # for each image in the list
                img_temp = cv2.imread(img)
                #img_temp = Image.open(img).convert('LA')
                #img_temp = cv2.cvtColor(img_temp, cv2.COLOR_BGR2HSV)
                img_flatten = np.array(img_temp) # create np array
                X.append(img_flatten) # and add to list for X
            for lab in labels[offset:offset+batch_size]: # for each label in the list
                label_temp = io.imread(lab, as_gray=True)
                labels_temp = measure.label(label_temp)
                label_flatten = labels_temp.max() # create np array
                Y.append(label_flatten) # and add to list for y
            yield (np.array(X), np.array(Y).reshape(len(Y),1)) # yield X and y for the model

# Attempt 1: Full Images - GrayScale
This uses full size images that have people in them, converted to grayscale. 

In [106]:
batch_size = 10

# Set up Convolutional Network
model = tf.keras.models.Sequential([
  tf.keras.layers.Conv2D(8, (2,2), padding="same",input_shape=(540, 960, 2), activation='relu'),
  tf.keras.layers.MaxPooling2D(pool_size=(2,2)),
  tf.keras.layers.Conv2D(16, (2,2), padding="same", activation='relu'),
  tf.keras.layers.MaxPooling2D(pool_size=(2,2)),
  tf.keras.layers.Conv2D(32, (2,2), padding="same", activation='relu'),
  tf.keras.layers.MaxPooling2D(pool_size=(4,4)),
  tf.keras.layers.Conv2D(64, (2,2), padding="same", activation='relu'),
  tf.keras.layers.MaxPooling2D(pool_size=(4,4)),
  tf.keras.layers.Flatten(),
  tf.keras.layers.Dense(512, activation='relu'),
  tf.keras.layers.Dropout(0.2),
  tf.keras.layers.Dense(128, activation='relu'),
  tf.keras.layers.Dropout(0.2),
  tf.keras.layers.Dense(1),
])

model.compile(optimizer='adam',
              loss='mean_squared_error',
              metrics=['mean_squared_error', 'mean_absolute_error', 'accuracy'])

model.fit_generator(gather_images(train_X, train_Y, batch_size=batch_size),
                    steps_per_epoch = len(train_X)//batch_size, epochs=10)

Epoch 1/10

Epoch 2/10

Epoch 3/10

Epoch 4/10

Epoch 5/10

Epoch 6/10

Epoch 7/10

Epoch 8/10

Epoch 9/10

Epoch 10/10



<tensorflow.python.keras._impl.keras.callbacks.History at 0x1f259d8bfd0>

In [107]:
predictions = model.predict_generator(gather_images(test_X, test_Y, batch_size=batch_size), len(test_X)//batch_size)
print(predictions[0:10])

[[4.4597096]
 [4.0453725]
 [2.5865219]
 [1.876172 ]
 [2.7817848]
 [4.3727183]
 [3.9000027]
 [2.5050535]
 [3.3840609]
 [3.5734978]]


In [108]:
test_labels = next(gather_images(test_X, test_Y, batch_size=len(test_X)))[1]
print(test_labels[0:10])

[[7]
 [5]
 [1]
 [1]
 [2]
 [9]
 [3]
 [2]
 [3]
 [3]]


# Attempt 2: Full Images - HSV
This uses full size images that have people in them, converted to HSV.

In [110]:
batch_size = 10

# Set up Convolutional Network
model = tf.keras.models.Sequential([
  tf.keras.layers.Conv2D(8, (2,2), padding="same",input_shape=(540, 960, 3), activation='relu'),
  tf.keras.layers.MaxPooling2D(pool_size=(2,2)),
  tf.keras.layers.Conv2D(16, (2,2), padding="same", activation='relu'),
  tf.keras.layers.MaxPooling2D(pool_size=(2,2)),
  tf.keras.layers.Conv2D(32, (2,2), padding="same", activation='relu'),
  tf.keras.layers.MaxPooling2D(pool_size=(4,4)),
  tf.keras.layers.Conv2D(64, (2,2), padding="same", activation='relu'),
  tf.keras.layers.MaxPooling2D(pool_size=(4,4)),
  tf.keras.layers.Flatten(),
  tf.keras.layers.Dense(512, activation='relu'),
  tf.keras.layers.Dropout(0.2),
  tf.keras.layers.Dense(128, activation='relu'),
  tf.keras.layers.Dropout(0.2),
  tf.keras.layers.Dense(1),
])

model.compile(optimizer='adam',
              loss='mean_squared_error',
              metrics=['mean_squared_error', 'mean_absolute_error', 'accuracy'])

model.fit_generator(gather_images(train_X, train_Y, batch_size=batch_size),
                    steps_per_epoch = len(train_X)//batch_size, epochs=10)

Epoch 1/10

Epoch 2/10

Epoch 3/10

Epoch 4/10

Epoch 5/10

Epoch 6/10

Epoch 7/10

Epoch 8/10

Epoch 9/10

Epoch 10/10



<tensorflow.python.keras._impl.keras.callbacks.History at 0x1f25ad63fd0>

In [111]:
predictions = model.predict_generator(gather_images(test_X, test_Y, batch_size=batch_size), len(test_X)//batch_size)
print(predictions[0:10])

[[3.0930457]
 [2.5246828]
 [1.4346604]
 [1.3098843]
 [1.7063771]
 [1.902762 ]
 [3.913823 ]
 [1.0183108]
 [1.3632728]
 [3.1236968]]


In [112]:
test_labels = next(gather_images(test_X, test_Y, batch_size=len(test_X)))[1]
print(test_labels[0:10])

[[7]
 [5]
 [1]
 [1]
 [2]
 [9]
 [3]
 [2]
 [3]
 [3]]


# Attempt 3: Full Images - No Color Change
This uses full size images that have people in them.

In [114]:
batch_size = 10

# Set up Convolutional Network
model = tf.keras.models.Sequential([
  tf.keras.layers.Conv2D(8, (2,2), padding="same",input_shape=(540, 960, 3), activation='relu'),
  tf.keras.layers.MaxPooling2D(pool_size=(2,2)),
  tf.keras.layers.Conv2D(16, (2,2), padding="same", activation='relu'),
  tf.keras.layers.MaxPooling2D(pool_size=(2,2)),
  tf.keras.layers.Conv2D(32, (2,2), padding="same", activation='relu'),
  tf.keras.layers.MaxPooling2D(pool_size=(4,4)),
  tf.keras.layers.Conv2D(64, (2,2), padding="same", activation='relu'),
  tf.keras.layers.MaxPooling2D(pool_size=(4,4)),
  tf.keras.layers.Flatten(),
  tf.keras.layers.Dense(512, activation='relu'),
  tf.keras.layers.Dropout(0.2),
  tf.keras.layers.Dense(128, activation='relu'),
  tf.keras.layers.Dropout(0.2),
  tf.keras.layers.Dense(1),
])

model.compile(optimizer='adam',
              loss='mean_squared_error',
              metrics=['mean_squared_error', 'mean_absolute_error', 'accuracy'])

model.fit_generator(gather_images(train_X, train_Y, batch_size=batch_size),
                    steps_per_epoch = len(train_X)//batch_size, epochs=10)

Epoch 1/10

Epoch 2/10

Epoch 3/10

Epoch 4/10

Epoch 5/10

Epoch 6/10

Epoch 7/10

Epoch 8/10

Epoch 9/10

Epoch 10/10



<tensorflow.python.keras._impl.keras.callbacks.History at 0x1f25c490240>

In [115]:
predictions = model.predict_generator(gather_images(test_X, test_Y, batch_size=batch_size), len(test_X)//batch_size)
print(predictions[0:10])

[[1.3958495 ]
 [8.321123  ]
 [2.3532782 ]
 [1.035324  ]
 [0.70101446]
 [2.332126  ]
 [4.5625787 ]
 [0.87305564]
 [1.6255327 ]
 [2.5387604 ]]


In [116]:
test_labels = next(gather_images(test_X, test_Y, batch_size=len(test_X)))[1]
print(test_labels[0:10])

[[7]
 [5]
 [1]
 [1]
 [2]
 [9]
 [3]
 [2]
 [3]
 [3]]


# Attempt 4: Split Images - Grayscale
This uses split images (from full images with people in them) that have been converted to grayscale.

In [119]:
batch_size = 10

# Set up Convolutional Network
model = tf.keras.models.Sequential([
  tf.keras.layers.Conv2D(8, (2,2), padding="same",input_shape=(108, 192, 3), activation='relu'),
  tf.keras.layers.MaxPooling2D(pool_size=(2,2)),
  tf.keras.layers.Conv2D(16, (2,2), padding="same", activation='relu'),
  tf.keras.layers.MaxPooling2D(pool_size=(2,2)),
  tf.keras.layers.Conv2D(32, (2,2), padding="same", activation='relu'),
  tf.keras.layers.MaxPooling2D(pool_size=(4,4)),
  tf.keras.layers.Conv2D(64, (2,2), padding="same", activation='relu'),
  tf.keras.layers.MaxPooling2D(pool_size=(4,4)),
  tf.keras.layers.Flatten(),
  tf.keras.layers.Dense(512, activation='relu'),
  tf.keras.layers.Dropout(0.2),
  tf.keras.layers.Dense(128, activation='relu'),
  tf.keras.layers.Dropout(0.2),
  tf.keras.layers.Dense(1),
])

model.compile(optimizer='adam',
              loss='mean_squared_error',
              metrics=['mean_squared_error', 'mean_absolute_error', 'accuracy'])

model.fit_generator(gather_images(train_X, train_Y, batch_size=batch_size),
                    steps_per_epoch = len(train_X)//batch_size, epochs=10)

Epoch 1/10

Epoch 2/10

Epoch 3/10

Epoch 4/10

Epoch 5/10

Epoch 6/10

Epoch 7/10

Epoch 8/10

Epoch 9/10

Epoch 10/10



<tensorflow.python.keras._impl.keras.callbacks.History at 0x1f23f893128>

In [120]:
predictions = model.predict_generator(gather_images(test_X, test_Y, batch_size=batch_size), len(test_X)//batch_size)
print(predictions[0:10])

[[0.1062379 ]
 [0.12491779]
 [0.16854063]
 [0.10197994]
 [0.10313762]
 [0.11242331]
 [0.23321387]
 [0.10273636]
 [0.10166924]
 [0.10310157]]


In [121]:
test_labels = next(gather_images(test_X, test_Y, batch_size=len(test_X)))[1]
print(test_labels[0:10])

[[0]
 [1]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]]


# Attempt 5: Split Images - HSV
This uses split images (from full images with people in them) that have been converted to hsv.

In [123]:
batch_size = 10

# Set up Convolutional Network
model = tf.keras.models.Sequential([
  tf.keras.layers.Conv2D(8, (2,2), padding="same",input_shape=(108, 192, 3), activation='relu'),
  tf.keras.layers.MaxPooling2D(pool_size=(2,2)),
  tf.keras.layers.Conv2D(16, (2,2), padding="same", activation='relu'),
  tf.keras.layers.MaxPooling2D(pool_size=(2,2)),
  tf.keras.layers.Conv2D(32, (2,2), padding="same", activation='relu'),
  tf.keras.layers.MaxPooling2D(pool_size=(4,4)),
  tf.keras.layers.Conv2D(64, (2,2), padding="same", activation='relu'),
  tf.keras.layers.MaxPooling2D(pool_size=(4,4)),
  tf.keras.layers.Flatten(),
  tf.keras.layers.Dense(512, activation='relu'),
  tf.keras.layers.Dropout(0.2),
  tf.keras.layers.Dense(128, activation='relu'),
  tf.keras.layers.Dropout(0.2),
  tf.keras.layers.Dense(1),
])

model.compile(optimizer='adam',
              loss='mean_squared_error',
              metrics=['mean_squared_error', 'mean_absolute_error', 'accuracy'])

model.fit_generator(gather_images(train_X, train_Y, batch_size=batch_size),
                    steps_per_epoch = len(train_X)//batch_size, epochs=10)

Epoch 1/10

Epoch 2/10

Epoch 3/10

Epoch 4/10

Epoch 5/10

Epoch 6/10

Epoch 7/10

Epoch 8/10

Epoch 9/10

Epoch 10/10



<tensorflow.python.keras._impl.keras.callbacks.History at 0x1f259090cf8>

In [124]:
predictions = model.predict_generator(gather_images(test_X, test_Y, batch_size=batch_size), len(test_X)//batch_size)
print(predictions[0:10])

[[0.0312355 ]
 [0.22261013]
 [0.09479196]
 [0.05141732]
 [0.05010214]
 [0.04356905]
 [0.19408457]
 [0.03841535]
 [0.06442752]
 [0.09745529]]


In [125]:
test_labels = next(gather_images(test_X, test_Y, batch_size=len(test_X)))[1]
print(test_labels[0:10])

[[0]
 [1]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]]


# Attempt 6: Split Images - No Color Change
This uses split images (from full images with people in them). 

In [127]:
batch_size = 10

# Set up Convolutional Network
model = tf.keras.models.Sequential([
  tf.keras.layers.Conv2D(8, (2,2), padding="same",input_shape=(108, 192, 3), activation='relu'),
  tf.keras.layers.MaxPooling2D(pool_size=(2,2)),
  tf.keras.layers.Conv2D(16, (2,2), padding="same", activation='relu'),
  tf.keras.layers.MaxPooling2D(pool_size=(2,2)),
  tf.keras.layers.Conv2D(32, (2,2), padding="same", activation='relu'),
  tf.keras.layers.MaxPooling2D(pool_size=(4,4)),
  tf.keras.layers.Conv2D(64, (2,2), padding="same", activation='relu'),
  tf.keras.layers.MaxPooling2D(pool_size=(4,4)),
  tf.keras.layers.Flatten(),
  tf.keras.layers.Dense(512, activation='relu'),
  tf.keras.layers.Dropout(0.2),
  tf.keras.layers.Dense(128, activation='relu'),
  tf.keras.layers.Dropout(0.2),
  tf.keras.layers.Dense(1),
])

model.compile(optimizer='adam',
              loss='mean_squared_error',
              metrics=['mean_squared_error', 'mean_absolute_error', 'accuracy'])

model.fit_generator(gather_images(train_X, train_Y, batch_size=batch_size),
                    steps_per_epoch = len(train_X)//batch_size, epochs=10)

Epoch 1/10

Epoch 2/10

Epoch 3/10

Epoch 4/10

Epoch 5/10

Epoch 6/10

Epoch 7/10

Epoch 8/10

Epoch 9/10

Epoch 10/10



<tensorflow.python.keras._impl.keras.callbacks.History at 0x1f25bac0eb8>

In [128]:
predictions = model.predict_generator(gather_images(test_X, test_Y, batch_size=batch_size), len(test_X)//batch_size)
print(predictions[0:10])

[[0.13789415]
 [0.25354084]
 [0.13789415]
 [0.13789415]
 [0.13789415]
 [0.1382464 ]
 [0.19708386]
 [0.1383607 ]
 [0.13789415]
 [0.13789415]]


In [129]:
test_labels = next(gather_images(test_X, test_Y, batch_size=len(test_X)))[1]
print(test_labels[0:10])

[[0]
 [1]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]]
