# Resnet

## Setup

In this notebook I'm going to try using pretrained networks (or at least their architecture). I think that resnet will be ideal because of its generally good performance and due to the fact that it behaves like an exponential ensemble of networks with varying depth. From my preliminary results it seems like the small size of the images (75x75) and the small size of some of the features (5x5) will make models that are too deep--have lots of pooling layers--ineffective. I don't think resnet50 will work for 75x75 images, so I'll probably need to make a smaller model with similar architecture.

In [2]:
from utils import *
from fastai import *

In [6]:
from keras.callbacks import ReduceLROnPlateau, EarlyStopping
from keras.applications import resnet50
from keras.layers import GlobalAveragePooling2D
K.set_image_data_format('channels_first')

In [4]:
from vis import *

In [5]:
#Paths
path = os.curdir + '/data/processed/composites/sample/'
#path = os.curdir + '/data/processed/composites/'
trn_path = path + 'train/'
val_path = path + 'valid/'
if os.path.exists(path+'models/')==False:
    os.mkdir(path+'models')
if os.path.exists(path+'results/')==False:
    os.mkdir(path+'results/')
model_path = path + 'models/'
results_path = path + 'results/'

In [3]:
batch_size=64

In [4]:
batches = get_batches(trn_path)
val_batches = get_batches(val_path, shuffle=False)

In [5]:
trn_data = get_data(trn_path)
val_data = get_data(val_path)

In [6]:
#to stack them I need to remove the 1 in the reshape, the stacking takes care of the channel number
band_1_s = np.array([np.array(band).astype(np.float32).reshape(75,75) for band in train.band_1])
band_2_s = np.array([np.array(band).astype(np.float32).reshape(75,75) for band in train.band_2])
bands = np.stack([band_1_s, band_2_s, (band_1_s+band_2_s)/0.5], axis=1)

In [7]:
#this will cause roughly 20% of the training set to be split out for validation
msk = np.random.rand(len(train.band_1))<0.8

In [8]:
trn_data = bands[msk]
val_data = bands[~msk]

In [9]:
trn_data_transpose = np.array([np.array(image).reshape(3, 75*75).T for image in trn_data])
val_data_transpose = np.array([np.array(image).reshape(3, 75*75).T for image in val_data])

In [10]:
save_array(path + 'val_data', val_data)
save_array(path + 'trn_data', trn_data)
save_array(path + 'val_data_transpose', val_data_transpose)
save_array(path + 'trn_data_transpose', trn_data_transpose)

In [11]:
val_data = load_array(path + 'val_data')
trn_data = load_array(path + 'trn_data')
val_data_transpose = load_array(path + 'val_data_transpose')
trn_data_transpose = load_array(path + 'trn_data_transpose')

In [12]:
#band_1 and band_2 share the same labels
trn_labels = train.is_iceberg[msk]
val_labels = train.is_iceberg[~msk]

In [13]:
if os.path.exists(path + 'models/')==False:
    os.mkdir(path + 'models/')
model_path = path + 'models/'

## Resnet From Keras

In [7]:
rn = resnet50.ResNet50()

NameError: name 'ResNet50' is not defined

In [17]:
rn.compile(optimizer=Adam(), loss='binary_crossentropy', metrics=['accuracy'])
rn.fit(trn_data, trn_labels, batch_size=64, epochs=3, 
       validation_data=(val_data, val_labels))



Train on 1237 samples, validate on 367 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3


<keras.callbacks.History at 0x7ff447e80890>

In [15]:
def identity_block(input_tensor, kernel_size, filters, stage, block):
    
    filters1, filters2, filters3 = filters
    if K.image_data_format() == 'channels_last':
        bn_axis = 3
    else:
        bn_axis = 1
    conv_name_base = 'res' + str(stage) + block + '_branch'
    bn_name_base = 'bn' + str(stage) + block + '_branch'

    x = Conv2D(filters1, (1, 1), name=conv_name_base + '2a')(input_tensor)
    x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2a')(x)
    x = Activation('relu')(x)

    x = Conv2D(filters2, kernel_size,
               padding='same', name=conv_name_base + '2b')(x)
    x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2b')(x)
    x = Activation('relu')(x)

    x = Conv2D(filters3, (1, 1), name=conv_name_base + '2c')(x)
    x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2c')(x)

    x = add([x, input_tensor])
    x = Activation('relu')(x)
    return x


def conv_block(input_tensor, kernel_size, filters, stage, block, strides=(2, 2)):
    """A block that has a conv layer at shortcut.
    # Arguments
        input_tensor: input tensor
        kernel_size: default 3, the kernel size of middle conv layer at main path
        filters: list of integers, the filters of 3 conv layer at main path
        stage: integer, current stage label, used for generating layer names
        block: 'a','b'..., current block label, used for generating layer names
    # Returns
        Output tensor for the block.
    Note that from stage 3, the first conv layer at main path is with strides=(2,2)
    And the shortcut should have strides=(2,2) as well
    """
    filters1, filters2, filters3 = filters
    if K.image_data_format() == 'channels_last':
        bn_axis = 3
    else:
        bn_axis = 1
    conv_name_base = 'res' + str(stage) + block + '_branch'
    bn_name_base = 'bn' + str(stage) + block + '_branch'

    x = Conv2D(filters1, (1, 1), strides=strides,
               name=conv_name_base + '2a')(input_tensor)
    x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2a')(x)
    x = Activation('relu')(x)

    x = Conv2D(filters2, kernel_size, padding='same',
               name=conv_name_base + '2b')(x)
    x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2b')(x)
    x = Activation('relu')(x)

    x = Conv2D(filters3, (1, 1), name=conv_name_base + '2c')(x)
    x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2c')(x)

    shortcut = Conv2D(filters3, (1, 1), strides=strides,
                      name=conv_name_base + '1')(input_tensor)
    shortcut = BatchNormalization(axis=bn_axis, name=bn_name_base + '1')(shortcut)

    x = add([x, shortcut])
    x = Activation('relu')(x)
    return x


def ResNet50():

    img_input = Input(shape=(3,75,75))
    
    if K.image_data_format() == 'channels_last':
        bn_axis = 3
    else:
        bn_axis = 1

    x = Conv2D(
        64, (7, 7), strides=(2, 2), padding='same', name='conv1')(img_input)
    x = BatchNormalization(axis=bn_axis, name='bn_conv1')(x)
    x = Activation('relu')(x)
    x = MaxPooling2D((3, 3), strides=(2, 2))(x)

    x = conv_block(x, 3, [16, 16, 32], stage=2, block='a', strides=(1, 1))
    x = identity_block(x, 3, [16, 16, 32], stage=2, block='b')
    x = identity_block(x, 3, [16, 16, 32], stage=2, block='c')

    x = conv_block(x, 3, [32, 32, 64], stage=3, block='a')
    x = identity_block(x, 3, [32, 32, 64], stage=3, block='b')
    x = identity_block(x, 3, [32, 32, 64], stage=3, block='c')
    x = identity_block(x, 3, [32, 32, 64], stage=3, block='d')

    #x = conv_block(x, 3, [256, 256, 1024], stage=4, block='a')
    #x = identity_block(x, 3, [256, 256, 1024], stage=4, block='b')
    #x = identity_block(x, 3, [256, 256, 1024], stage=4, block='c')
    #x = identity_block(x, 3, [256, 256, 1024], stage=4, block='d')
    #x = identity_block(x, 3, [256, 256, 1024], stage=4, block='e')
    #x = identity_block(x, 3, [256, 256, 1024], stage=4, block='f')

    #x = conv_block(x, 3, [512, 512, 2048], stage=5, block='a')
    #x = identity_block(x, 3, [512, 512, 2048], stage=5, block='b')
    #x = identity_block(x, 3, [512, 512, 2048], stage=5, block='c')

    x = GlobalAveragePooling2D(name='avg_pool')(x)
    x = Dense(1, activation='relu')(x)

    model = Model(img_input, x, name='resnet50')
    
    return model