# NASNet-iMaterialist
## TFNW

Train NASNet on the iMaterialist dataset.

@alkari

### Check GPU/CPU memory capacity

In [0]:
# To reset your vm, useful if you can't login to Google Drive
# !kill -9 -1

In [0]:
%%capture
# memory footprint support libraries/code
!ln -sf /opt/bin/nvidia-smi /usr/bin/nvidia-smi
!pip install -q gputil
!pip install -q psutil
!pip install -q humanize


In [1]:
import psutil
import humanize
import os
import GPUtil as GPU

GPUs = GPU.getGPUs()
# XXX: only one GPU on Colab and isn’t guaranteed
gpu = GPUs[0]
def printm():
  process = psutil.Process(os.getpid())
  print('Gen RAM Free: ' + humanize.naturalsize( psutil.virtual_memory().available ), ' I Proc size: ' + humanize.naturalsize( process.memory_info().rss))
  print('GPU RAM Free: {0:.0f}MB | Used: {1:.0f}MB | Util {2:3.0f}% | Total {3:.0f}MB'.format(gpu.memoryFree, gpu.memoryUsed, gpu.memoryUtil*100, gpu.memoryTotal))
printm()

Gen RAM Free: 12.9 GB  I Proc size: 129.0 MB
GPU RAM Free: 11438MB | Used: 1MB | Util   0% | Total 11439MB


### Access GoogleDrive Store

In [0]:
!apt-get install -y -qq software-properties-common python-software-properties module-init-tools
!add-apt-repository -y ppa:alessandro-strada/ppa 2>&1 > /dev/null
!apt-get update -qq 2>&1 > /dev/null
!apt-get -y install -qq google-drive-ocamlfuse fuse
from google.colab import auth
auth.authenticate_user()
from oauth2client.client import GoogleCredentials
creds = GoogleCredentials.get_application_default()
import getpass
!google-drive-ocamlfuse -headless -id={creds.client_id} -secret={creds.client_secret} < /dev/null 2>&1 | grep URL
vcode = getpass.getpass()
!echo {vcode} | google-drive-ocamlfuse -headless -id={creds.client_id} -secret={creds.client_secret}

In [0]:
!mkdir -p drive
!google-drive-ocamlfuse drive

### Create folders structure

In [0]:
import os
os.chdir('/content/drive')
os.makedirs('nasnet', exist_ok=True)
os.chdir('nasnet')
os.makedirs('weights', exist_ok=True)

In [0]:
!pwd
!ls ../iMaterialist

### Download model definition

In [0]:
if not os.path.isfile('nasnet.py'):
  !wget https://raw.githubusercontent.com/alkari/Keras-NASNet/master/nasnet.py
if not os.path.isfile('cutout.py'):
  !wget https://raw.githubusercontent.com/alkari/Keras-NASNet/master/cutout.py

### Imports

In [4]:
from __future__ import print_function
from keras.preprocessing.image import ImageDataGenerator
from keras.utils import np_utils
from keras.callbacks import ModelCheckpoint
from keras.callbacks import ReduceLROnPlateau
from keras.callbacks import CSVLogger
from keras.optimizers import Adam
from nasnet import NASNet, NASNetLarge, preprocess_input
from cutout import cutout
import numpy as np
import h5py


Using TensorFlow backend.


### Parameters

In [0]:
weights_file = 'weights/NASNet-iMaterialist.h5'
lr_reducer = ReduceLROnPlateau(factor=np.sqrt(0.5), cooldown=0, patience=5, min_lr=0.5e-5)
csv_logger = CSVLogger('NASNet-iMaterialist.csv')
model_checkpoint = ModelCheckpoint(weights_file, monitor='val_predictions_acc', save_best_only=True,
                                   save_weights_only=True, mode='max')

In [0]:
batch_size = 16
nb_classes = 129
nb_epoch = 20 
data_augmentation = True

# input image dimensions
img_rows, img_cols = 224, 224
img_channels = 3

### Load Data

In [0]:
def load_dataset(train_file, test_file):
    with h5py.File(train_file, 'r') as hf:
        train_images = hf['train']['images'][:]
        train_labels= hf['train']['labels'][:]

    with h5py.File(test_file, 'r') as hf:
        test_images = hf['test']['images'][:]
        test_labels= hf['test']['labels'][:]
        
    train_labels = np.reshape(train_labels, (len(train_labels), 1))
    test_labels = np.reshape(test_labels, (len(test_labels), 1))
    
    return (train_images,train_labels), (test_images, test_labels)

(X_train, y_train), (X_test, y_test) = load_dataset('../iMaterialist/train_58.h5','../iMaterialist/test_2.h5')

In [8]:
print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)


(1000, 224, 224, 3)
(1000, 1)
(1000, 224, 224, 3)
(1000, 1)


In [0]:
# Convert class vectors to binary class matrices (one-hot-encoding)
Y_train = np_utils.to_categorical(y_train, nb_classes)
Y_test = np_utils.to_categorical(y_test, nb_classes)

X_train = X_train.astype('float32')
X_test = X_test.astype('float32')


In [10]:
Y_train.shape

(1000, 129)

In [0]:
# preprocess input
X_train = preprocess_input(X_train)
X_test = preprocess_input(X_test)

In [12]:
# Check memory
!free -m

              total        used        free      shared  buff/cache   available
Mem:          13029        2043        7013         220        3971       10522
Swap:             0           0           0


## Build the model using the auxilary branch to correctly train NASNet


In [0]:
model = NASNetLarge((img_rows, img_cols, img_channels),dropout=0.5, use_auxiliary_branch=True, include_top=True, weights=None, classes=nb_classes)
#model.summary()

In [0]:
optimizer = Adam(lr=1e-3, clipnorm=5)
model.compile(loss=['categorical_crossentropy', 'categorical_crossentropy'],
              optimizer=optimizer, metrics=['accuracy'], loss_weights=[1.0, 0.4])

## Begin Training

In [15]:
if not data_augmentation:
    print('Not using data augmentation.')
    
    model.fit(X_train, [Y_train, Y_train],
              batch_size=batch_size,
              epochs=nb_epoch,
              validation_data=(X_test, [Y_test, Y_test]),
              shuffle=True,
              verbose=1,
              callbacks=[lr_reducer, csv_logger, model_checkpoint])
else:
    print('Using real-time data augmentation.')
    # This will do preprocessing and realtime data augmentation:
    datagen = ImageDataGenerator(
        featurewise_center=False,  # set input mean to 0 over the dataset
        samplewise_center=False,  # set each sample mean to 0
        featurewise_std_normalization=False,  # divide inputs by std of the dataset
        samplewise_std_normalization=False,  # divide each input by its std
        zca_whitening=False,  # apply ZCA whitening
        rotation_range=0,  # randomly rotate images in the range (degrees, 0 to 180)
        width_shift_range=0.1,  # randomly shift images horizontally (fraction of total width)
        height_shift_range=0.1,  # randomly shift images vertically (fraction of total height)
        horizontal_flip=True,  # randomly flip images
        vertical_flip=False,  # randomly flip images
        preprocessing_function=cutout)  # randomly apply cutout

    # Compute quantities required for featurewise normalization
    # (std, mean, and principal components if ZCA whitening is applied).
    datagen.fit(X_train)

    # wrap the ImageDataGenerator to yield two label batches [y, y] for each input batch X
    def image_generator(image_datagenerator, batch_size):
        iterator = datagen.flow(X_train, Y_train, batch_size=batch_size)

        while True:
            X, y = next(iterator)  # get the next batch
            yield X, [y, y]  # duplicate the labels for each batch

    # Fit the model on the batches generated by datagen.flow().
    model.fit_generator(image_generator(datagen, batch_size),
                        steps_per_epoch=X_train.shape[0] // batch_size,
                        validation_data=(X_test, [Y_test, Y_test]),
                        epochs=nb_epoch, verbose=1,
                        callbacks=[lr_reducer, csv_logger, model_checkpoint])

scores = model.evaluate(X_test, [Y_test, Y_test], batch_size=batch_size)
for score, metric_name in zip(scores, model.metrics_names):
    print("%s : %0.4f" % (metric_name, score))
    print('\n')

Using real-time data augmentation.
Epoch 1/20
Epoch 2/20
Epoch 3/20

Epoch 4/20
Epoch 5/20
Epoch 6/20
 6/62 [=>............................] - ETA: 1:57 - loss: 8.4686 - predictions_loss: 5.5516 - aux_predictions_loss: 5.1437 - predictions_acc: 0.0312 - aux_predictions_acc: 0.0000e+00

Epoch 7/20
Epoch 8/20
Epoch 9/20


Epoch 10/20
Epoch 11/20


Epoch 12/20
Epoch 13/20
Epoch 14/20

Epoch 15/20
Epoch 16/20
Epoch 17/20
 8/62 [==>...........................] - ETA: 1:50 - loss: 7.9669 - predictions_loss: 5.4042 - aux_predictions_loss: 4.9539 - predictions_acc: 0.0469 - aux_predictions_acc: 0.0312

Epoch 18/20
Epoch 19/20
Epoch 20/20


loss : 22.9982


predictions_loss : 15.9730


aux_predictions_loss : 16.0859


predictions_acc : 0.0090


aux_predictions_acc : 0.0020






---














# Backup

In [0]:
# Get filelist
import os
workdir = "/content/drive/iMaterialist"
filelist = []
for file in os.listdir(workdir):
    if file.startswith("train_"):
        filelist.append(os.path.join(workdir,file))