# Merging a ResNet and VGG model

In [1]:
%matplotlib inline

from __future__ import division, print_function

import os, json
from glob import glob
import numpy as np
from scipy import misc, ndimage
from scipy.ndimage.interpolation import zoom


from keras.preprocessing import image
from keras.models import Model, load_model
from keras.optimizers import Adam
import keras.preprocessing.image as image
from keras.utils.data_utils import get_file

 
from keras.models import Sequential, load_model

from keras import models 
from keras.layers.normalization import BatchNormalization


from sklearn import metrics
from sklearn import linear_model


import numpy as np

import matplotlib.pyplot as plt

Using Theano backend.
 https://github.com/Theano/Theano/wiki/Converting-to-the-new-gpu-back-end%28gpuarray%29

Using gpu device 0: Tesla K80 (CNMeM is disabled, cuDNN 5103)


In [5]:
ResNet = load_model('/home/ubuntu/INV-RES2.model')
VGG = load_model('/home/ubuntu/courses/deeplearning1/nbs/data/invasive/invasive_vgg.model')         
V3 = load_model('/home/ubuntu/INV-V31.model')

## Merging the two models, using Scikit-learn

I can't just merge a sequential and functional model, so I need to turn VGG into a functional model

In [6]:
def make_xy(model, path):
    
    #making the X values
    for z in range(len(model)):
    
        gen = image.ImageDataGenerator()

        batches = gen.flow_from_directory(path, 
                                          target_size = (224, 224), 
                                          batch_size = 32, 
                                          shuffle = False
                                         )

        #only works with this particular numbering 
        preds = model[z].predict_generator(batches, batches.nb_sample)

        is_inv = [(i[0]) for i in preds] #because of how the classes are numbered
        is_inv = np.asarray(is_inv)
        
        if z == 0:
            X = is_inv
        else: 
            X = np.vstack((X, is_inv))
        print(X.shape)
    return X.T, batches.classes

In [7]:
def ensemble_linreg(X, Y): 
    
    linreg = linear_model.LinearRegression()
    
    linreg.fit(X, Y)
    
    return linreg

In [8]:
X_train, Y_train = make_xy([ResNet, VGG, V3], "data/invasive/train")

Found 2095 images belonging to 2 classes.
(2095,)
Found 2095 images belonging to 2 classes.
(2, 2095)
Found 2095 images belonging to 2 classes.
(3, 2095)


In [9]:
np.save("data/invasive/X_train2.npy", X_train)
np.save("data/invasive/Y_train2.npy", Y_train)

Make the model, trained on X_train and Y_train

In [7]:
ens_linreg = ensemble_linreg(X_train,Y_train)

### Checking the strength of the model

In [10]:
X_val, Y_val = make_xy([ResNet, VGG, V3], "data/invasive/valid")

Found 200 images belonging to 2 classes.
(200,)
Found 200 images belonging to 2 classes.
(2, 200)
Found 200 images belonging to 2 classes.
(3, 200)


In [15]:
np.save("data/invasive/X_valid2.npy", X_val)
np.save("data/invasive/Y_valid2.npy", Y_val)

In [9]:
Y_val_pred = ens_linreg.predict(X_val)

In [10]:
Y_val_pred[Y_val_pred < 0.5] = 0
Y_val_pred[Y_val_pred >= 0.5] = 1

In [11]:
metrics.f1_score(Y_val, Y_val_pred)

0.97560975609756095

## Submitting to Kaggle

In [11]:
test_gen = image.ImageDataGenerator()
test_batches = test_gen.flow_from_directory("data/invasive/Test", 
                                             target_size=(224,224), 
                                             batch_size = 8, 
                                             shuffle = False)
filenames = test_batches.filenames

valid_batches = test_gen.flow_from_directory("data/invasive/valid", 
                                             target_size=(224,224), 
                                             batch_size = 8, 
                                             shuffle = False)

valid_batches.class_indices

Found 1531 images belonging to 1 classes.
Found 200 images belonging to 2 classes.


{'inv': 0, 'non_inv': 1}

In [12]:
X_test, test_classes = make_xy([ResNet, VGG, V3], "data/invasive/Test")

Found 1531 images belonging to 1 classes.
(1531,)
Found 1531 images belonging to 1 classes.
(2, 1531)
Found 1531 images belonging to 1 classes.
(3, 1531)


In [13]:
np.save("data/invasive/X_test2.npy", X_test)

In [14]:
np.save("data/invasive/filenames_test2.npy", filenames)

In [15]:
test_preds = ens_linreg.predict(X_test)

In [16]:
test_preds

array([-0.00569677,  0.97170637,  0.96196031, ...,  0.97553732,
        0.97495138,  0.97276451])

In [17]:
ids = [int(f[5:f.find('.')]) for f in filenames]
combined = np.column_stack((ids, test_preds))

In [18]:
np.savetxt("data/invasive/INV3.csv", combined, fmt="%d,%.5f", header = 'name,invasive')