In [1]:
import pandas as pd
import numpy as np
import data_generator as gen
from models import cnn
import h5py, pickle

from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras import optimizers

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = 'all'

Using TensorFlow backend.


## Configure the Data
Batch size of 16 to keep memory usage down  
365 classes  
each "image" is 365x365 with 3 "channels" [Trips, Spend, Basket]

In [2]:
bs = 16
n_classes = 365
dim = (365,365)
n_channels = 3
shuffle = True

### Define Data Generator Objects
This will allow us to train our CNN in very small batches and not read the entire data set into memory all at once

In [None]:
dg = gen.DataGenerator(
    '/home/nneuenschwander/customer_image/data/customer_image_brand365_train-val_scaled.hdf5', 
    dim=dim, 
    train_val='train',    
    n_channels=n_channels,
    n_classes=n_classes,
    batch_size=bs,
    shuffle=shuffle,
)

vdg = gen.DataGenerator(
    '/home/nneuenschwander/customer_image/data/customer_image_brand365_train-val_scaled.hdf5', 
    dim=dim, 
    train_val='validation',
    n_channels=n_channels,
    n_classes=n_classes,
    batch_size=bs,
    shuffle=shuffle,
)

## Import model object and start training

In [3]:
from keras import optimizers
model = cnn.get_cnn(dim,n_channels,n_classes)
model.compile(
    optimizers.rmsprop(lr=0.0001, decay=1e-6),
    loss="binary_crossentropy",
    metrics=["accuracy"],
)
# checkpointer = ModelCheckpoint(filepath='data/brand365-bestModel.hdf5', verbose=1, save_best_only=True)
earlystopper = EarlyStopping(monitor='val_acc', patience=1000, verbose=1)

history = model.fit_generator(    
    generator=dg,
    validation_data=vdg,
    epochs=3,
#     callbacks=[checkpointer, earlystopper]
)
# model.save_weights('/home/nneuenschwander/customer_image/brand365_image_weights.h5')

W0814 15:16:08.699174 140494284666688 deprecation_wrapper.py:119] From /home/nneuenschwander/anaconda3/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:74: The name tf.get_default_graph is deprecated. Please use tf.compat.v1.get_default_graph instead.

W0814 15:16:08.999161 140494284666688 deprecation_wrapper.py:119] From /home/nneuenschwander/anaconda3/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:517: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead.

W0814 15:16:09.130935 140494284666688 deprecation_wrapper.py:119] From /home/nneuenschwander/anaconda3/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:4138: The name tf.random_uniform is deprecated. Please use tf.random.uniform instead.

W0814 15:16:09.255667 140494284666688 deprecation_wrapper.py:119] From /home/nneuenschwander/anaconda3/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:3976: The name tf.nn.max_pool is deprecated. Please us

## Load our weights back into model predict the test data

In [6]:
model.load_weights('/home/nneuenschwander/customer_image/models/brand365-bestModel.hdf5')
model.get_weights()

[array([[[[-5.30199669e-02, -1.74659062e-02,  2.38458738e-02,
           -1.23290857e-02, -5.13316393e-02, -6.48824871e-02,
           -6.71199262e-02, -4.37228903e-02, -3.51970531e-02,
            1.13868512e-01,  7.39126652e-03,  1.05716147e-01,
           -1.28297195e-01, -6.96728304e-02,  2.81012785e-02,
            8.80028903e-02, -7.96040073e-02, -1.19422702e-02,
            7.85363615e-02, -3.10006738e-03, -7.45344758e-02,
            8.00734311e-02, -2.15989677e-03,  4.36717831e-02,
           -9.66186076e-02, -1.47533277e-02,  2.66428720e-02,
           -5.13153113e-02, -9.49329212e-02, -8.61192718e-02,
            8.68094787e-02, -1.19509295e-01],
          [ 5.48096374e-02, -7.30190426e-02, -8.18222836e-02,
           -8.74961093e-02,  7.09555373e-02,  7.59255514e-02,
           -1.27271013e-02,  4.88203913e-02,  7.24151358e-02,
            2.48947255e-02, -7.36069726e-03, -1.23777062e-01,
           -9.77181420e-02, -8.71447548e-02, -7.07367212e-02,
           -3.16553377e-

## Predict test data in batches and save in results file

In [None]:
total_test_custs = 100354 # I know this because I read the file in and counted them
start_idx = 0

with h5py.File('/home/nneuenschwander/customer_image/data/customer_image_brand365_test_scaled.hdf5', 'r') as read:
#     with h5py.File('/home/nneuenschwander/customer_image/data/customer_image_brand365_test_results.hdf5', 'w') as write:
        
        write.create_dataset('y_pred', (total_test_custs,n_classes), np.float64, compression='gzip')
        write.create_dataset('y', (total_test_custs,n_classes), np.int8, compression='gzip')
        write.create_dataset('idx', (total_test_custs,), np.int64, compression='gzip')
        
        batches = [ b[0] for b in read.items() ]
        for b in batches:
            print(b)
            X = read[b]['X']
            y = read[b]['y']
            idx = read[b]['idx']
           
            assert X.shape[0] == y.shape[0] == idx.shape[0]
            y_pred = model.predict(X)

            X_shape = X.shape[0]
            end_idx = start_idx + X_shape
            (start_idx,end_idx)
            
            write['y_pred'][start_idx:end_idx,:] = model.predict(X)
            write['y'][start_idx:end_idx,:] = y[:,:]
            write['idx'][start_idx:end_idx] = idx[:]
            
            start_idx += X_shape


