In [17]:
import os
import numpy as np
import random
import pandas as pd
import matplotlib.pyplot as plt
import cv2
from sklearn.svm import LinearSVC
from sklearn.metrics import accuracy_score
from sklearn.decomposition import PCA
import seaborn as sns
from umap import UMAP
from ddrop import DropConnect

from utils import unpickle, load_cifar_data
from visual_utils import plot_images, plot_pca_results, plot_umap_results

from skimage.transform import resize

In [16]:
!pip install scikit-image

Collecting scikit-image
  Using cached https://files.pythonhosted.org/packages/ab/e8/8c0c9d26ff80dfdf51c209c8c823269adaea1d26d52c43216685bfb590a4/scikit_image-0.14.2-cp36-cp36m-macosx_10_6_intel.macosx_10_9_intel.macosx_10_9_x86_64.macosx_10_10_intel.macosx_10_10_x86_64.whl
Collecting PyWavelets>=0.4.0 (from scikit-image)
  Using cached https://files.pythonhosted.org/packages/03/9b/6623e4197d459529602e02e52a4a1e277b9113c562bcfaf8b64b2c38408c/PyWavelets-1.0.1-cp36-cp36m-macosx_10_6_intel.macosx_10_9_intel.macosx_10_9_x86_64.macosx_10_10_intel.macosx_10_10_x86_64.whl
Collecting pillow>=4.3.0 (from scikit-image)
  Using cached https://files.pythonhosted.org/packages/81/96/05a5c9ba4a75ed330234780e4ae30018bbf77c847982ff6a16b8b291a0da/Pillow-5.4.1-cp36-cp36m-macosx_10_6_intel.macosx_10_9_intel.macosx_10_9_x86_64.macosx_10_10_intel.macosx_10_10_x86_64.whl
Collecting cloudpickle>=0.2.1 (from scikit-image)
  Downloading https://files.pythonhosted.org/packages/47/d5/efa7cacef5d3bdcd71d7053a698fb

In [2]:
import tensorflow as tf
from keras.applications.resnet50 import ResNet50
from keras.models import Model, Sequential
from keras.layers import Flatten, Input, Dense, Dropout
from keras.utils import Sequence
from keras.optimizers import Adam
from keras.callbacks import ModelCheckpoint, Callback
from keras.applications.resnet50 import preprocess_input


In [5]:
X_train, Y_train, X_val, Y_val, X_test, Y_test, label_names = load_cifar_data()

In [3]:
class DataPreprocessor(Sequence):
    'Generates data for Keras'
    def __init__(self, X_data, Y_data, batch_size=32, dim=(224, 224), shuffle=True):
        'Initialization'
        self.dim = dim
        self.batch_size = batch_size
        self.Y_data = Y_data
        self.X_data = X_data
        self.shuffle = shuffle
        self.on_epoch_end()

    def __len__(self):
        'Denotes the number of batches per epoch'
        return int(np.floor(len(self.Y_data) / self.batch_size))

    def __getitem__(self, index):
        'Generate one batch of data'
        # Generate indexes of the batch
        indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]

        # Generate data
        X = self.__data_generation(self.X_data[indexes], self.dim)
        y = self.Y_data[indexes]

        return X, y

    def on_epoch_end(self):
        'Updates indexes after each epoch'
        self.indexes = np.arange(len(self.Y_data))
        if self.shuffle == True:
            np.random.shuffle(self.indexes)

    def __data_generation(self, batch, dim):
        'Generates data containing batch_size samples' # X : (n_samples, *dim, n_channels)
        # Initialization
        batch_resized = [cv2.resize(image, dim, interpolation=cv2.INTER_AREA) for image in batch]
        return np.array(batch_resized)

In [6]:
def resize_images(batch_data, target_size, preserve_range=True):
    resized_batch = np.array(
        [resize(batch_data[i], target_size, preserve_range=preserve_range)
         for i in range(batch_data.shape[0])]
    ).astype('float32')
    return resized_batch


def data_generator(
    X, y, batch_size, target_size=(224, 224, 3), 
    preprocessor=preprocess_input, shuffle=False):
    
    start = 0
    end = start + batch_size
    n = X.shape[0]
    if shuffle:
        X, y = np.random.shuffle(X, y)
    while True:
        X_batch = X[start : end]
        y_batch = y [start : end]
        
        X_resized = resize_images(X_batch, target_size)
        X_preprocessed = preprocessor(X_resized)

        start += batch_size
        end   += batch_size
        if start >= n:
            start = 0
            end = batch_size
            if shuffle:
                X, y = np.random.shuffle(X, y)
        yield (X_preprocessed, y_batch)
        
def resnet_base_flatten():
    model = ResNet50(include_top=False, weights='imagenet', input_tensor=Input(shape=(224,224,3)))
    top_model = Sequential()
    top_model.add(Flatten(input_shape=model.output_shape[1:]))
    return Model(model.input, top_model(model.output))

X_trial = X_train[:256]
Y_trial = Y_train[:256]
X_trial_val = X_val[:256]
Y_trial_val = Y_val[:256]

In [7]:
trial = DataPreprocessor(X_trial, Y_trial)

In [13]:
trial[3][0].shape

(32, 224, 224, 3)

In [19]:
for x_batch, y_batch in data_generator(X_trial, Y_trial, 32):
    print(x_batch.shape)

(32, 224, 224, 3)
(32, 224, 224, 3)
(32, 224, 224, 3)
(32, 224, 224, 3)
(32, 224, 224, 3)
(32, 224, 224, 3)
(32, 224, 224, 3)
(32, 224, 224, 3)
(32, 224, 224, 3)
(32, 224, 224, 3)
(32, 224, 224, 3)
(32, 224, 224, 3)
(32, 224, 224, 3)
(32, 224, 224, 3)
(32, 224, 224, 3)
(32, 224, 224, 3)
(32, 224, 224, 3)
(32, 224, 224, 3)
(32, 224, 224, 3)
(32, 224, 224, 3)
(32, 224, 224, 3)
(32, 224, 224, 3)
(32, 224, 224, 3)
(32, 224, 224, 3)
(32, 224, 224, 3)
(32, 224, 224, 3)
(32, 224, 224, 3)
(32, 224, 224, 3)
(32, 224, 224, 3)
(32, 224, 224, 3)
(32, 224, 224, 3)
(32, 224, 224, 3)
(32, 224, 224, 3)
(32, 224, 224, 3)
(32, 224, 224, 3)
(32, 224, 224, 3)
(32, 224, 224, 3)
(32, 224, 224, 3)
(32, 224, 224, 3)
(32, 224, 224, 3)
(32, 224, 224, 3)
(32, 224, 224, 3)
(32, 224, 224, 3)
(32, 224, 224, 3)
(32, 224, 224, 3)
(32, 224, 224, 3)
(32, 224, 224, 3)
(32, 224, 224, 3)
(32, 224, 224, 3)
(32, 224, 224, 3)
(32, 224, 224, 3)
(32, 224, 224, 3)
(32, 224, 224, 3)
(32, 224, 224, 3)
(32, 224, 224, 3)
(32, 224, 

KeyboardInterrupt: 