# VGG19 Training

In [None]:
from Models.VGG19.train_layers import train_classifier as vgg19_train
# boolean parameters
summary_only = False
use_resize = True
restart_training = False

# Datasets to use

train_img_path = "/workspace/workspace/CVML_augdata/ZcaAugTrainImages" #"Data/Train/AugTrainImages"  # "Data/Train/TrainImages"
train_labels_path = "/workspace/workspace/CVML_augdata/ZcaAugTrainLbls.npy" #"Data/Train/trainLbls.txt" #"Data/Train/trainAugLbls.npy"
val_img_path = "/workspace/workspace/CVML_augdata/ZcaValImages" #"Data/Validation/ValidationImages/"
val_label_path = "Data/Validation/valLbls.txt"

# Start and Stop layers to train between and inclusive
start_layer = "clf_softmax"
stop_layer = "clf_softmax"

# Tensorboard Path
tb_path = "/workspace/workspace/zca_aug_clf_001/logdir/clf_001"

# folder to store params
folder_name_to_store_params = "/workspace/workspace/zca_aug_clf_001"

# Folder where input model is loaded
input_model_path = None

# Batch size to use
training_batch_size = 128

# Max number of Epochs to run 
max_number_of_epochs = 20

# Initial Learning Rate
initial_learning_rate = 0.001

# Classifier Dropout
clf_dropout = 0.5 # 0.5

# Parameters for early stopping
early_stop_patience = 3 # default 3
early_stop_min_increase = 0.005 # default 0.01

# Paramters for lr plateau
lr_plateau_patience = 3
lr_plateau_min_delta = 0.1 
lr_plateau_factor = 0.1 
lr_plateau_min = initial_learning_rate

# Get Histogram Graphs
get_histograms = True

"""
# Train Classifier
"""

!python3 Models/VGG19/train_layers.py {train_img_path} {train_labels_path} {val_img_path} {val_label_path} -output {folder_name_to_store_params} -stop_layer {stop_layer} -start_layer {start_layer} -tb_path {tb_path} -epochs {max_number_of_epochs} -lr {initial_learning_rate} -dropout {clf_dropout} -batch_size {training_batch_size} -early_stop {early_stop_patience} {early_stop_min_increase} -lr_plateau {lr_plateau_patience} {lr_plateau_min_delta} {lr_plateau_factor} {lr_plateau_min} -summary_only {summary_only} -restart {restart_training} -histogram_graphs get_histograms

"""
# Train Train Block 1
"""
early_stop_patience = 6 # default 3
early_stop_min_increase = 0.002 # default 0.01
lr_plateau_patience = 3
lr_plateau_min_delta = 0.05 
lr_plateau_factor = 0.1 
lr_drops = 2
initial_learning_rate = 0.0001
lr_plateau_min = initial_learning_rate * lr_plateau_factor**lr_drops
get_histograms = False

start_layer = "clf_softmax"
stop_layer = "block2_conv1"
tb_path = "/workspace/workspace/zca_aug_clf_001/logdir/b2c1"
input_model_path = "/workspace/workspace/zca_aug_clf_001/checkpoint.h5" # default None
folder_name_to_store_params = "/workspace/workspace/zca_aug_clf_001/b2c1" # folder to store params
!python3 Models/VGG19/train_layers.py {train_img_path} {train_labels_path} {val_img_path} {val_label_path} -output {folder_name_to_store_params} -stop_layer {stop_layer} -start_layer {start_layer} -tb_path {tb_path} -epochs {max_number_of_epochs} -lr {initial_learning_rate} -dropout {clf_dropout} -batch_size {training_batch_size} -early_stop {early_stop_patience} {early_stop_min_increase} -input_model {input_model_path} -lr_plateau {lr_plateau_patience} {lr_plateau_min_delta} {lr_plateau_factor} {lr_plateau_min} -summary_only {summary_only} -restart {restart_training} -histogram_graphs get_histograms


"""
# Stop Cloud Instance
"""

import socket
gce_name = socket.gethostname()
VMNAME= gce_name
ZONE="us-central1-c"

!gcloud compute instances stop {VMNAME} --zone {ZONE} --quiet

# InceptionResNetV2Training

In [2]:
# boolean parameters
summary_only = False
use_resize = True
restart_training = False

workspace = "/workspace/workspace"
save_dir = workspace + "/inception_inst_based"

# Datasets to use
train_img_path = workspace + "/CVML_kaggle_challenge/Data/Train/TrainImages" 
train_labels_path = workspace + "/CVML_kaggle_challenge/Data/Train/trainLbls.txt"
val_img_path = workspace + "/CVML_kaggle_challenge/Data/Validation/ValidationImages/"
val_label_path = workspace + "/CVML_kaggle_challenge/Data/Validation/valLbls.txt"

# Start and Stop layers to train between and inclusive
train_mode = "prediction"

# Tensorboard Path
tb_path = save_dir + "/logdir/clf_001"

# folder to store params
folder_name_to_store_params = save_dir

# Folder where input model is loaded
input_model_path = None

# Batch size to use
training_batch_size = 256

# Max number of Epochs to run 
max_number_of_epochs = 3

# Instance based training
instance_based = True

%cd /workspace/workspace/CVML_kaggle_challenge/

"""
# Train Classifier
"""
# Initial Learning Rate
initial_learning_rate = 0.001

# Classifier Dropout
clf_dropout = 0.2 # 0.5

# Parameters for early stopping
early_stop_patience = 3 # default 3
early_stop_min_increase = 0.01 # default 0.01

# Paramters for lr plateau
lr_plateau_patience = 3
lr_plateau_min_delta = 0.1 
lr_plateau_factor = 0.1 
lr_plateau_min = initial_learning_rate

# Get Histogram Graphs
get_histograms = False

!python3 Models/InceptionResNetV2/train_layers.py {train_img_path} {train_labels_path} {val_img_path} {val_label_path} -output {folder_name_to_store_params} -train_mode {train_mode} -tb_path {tb_path} -epochs {max_number_of_epochs} -lr {initial_learning_rate} -dropout {clf_dropout} -batch_size {training_batch_size} -early_stop {early_stop_patience} {early_stop_min_increase} -lr_plateau {lr_plateau_patience} {lr_plateau_min_delta} {lr_plateau_factor} {lr_plateau_min} -summary_only {summary_only} -restart {restart_training} -histogram_graphs {get_histograms} -inst_based {instance_based}


"""
# Train Module All
"""
max_number_of_epochs = 500
training_batch_size = 32
# Parameters for early stopping
early_stop_patience = 35 #default 3
early_stop_min_increase = 0.005 # default 0.01
lr_plateau_patience = 20
lr_plateau_min_delta = 0.01 
lr_plateau_factor = 0.1 
lr_drops = 2
initial_learning_rate = 0.0001
lr_plateau_min = initial_learning_rate * lr_plateau_factor**lr_drops
get_histograms = False


train_mode = "full"
tb_path = save_dir + "/logdir/full"
input_model_path = save_dir + "/checkpoint.h5" # default None
folder_name_to_store_params = save_dir + "/full" #+ "/all_dropout" # folder to store params
!python3 Models/InceptionResNetV2/train_layers.py {train_img_path} {train_labels_path} {val_img_path} {val_label_path} -output {folder_name_to_store_params} -train_mode {train_mode} -tb_path {tb_path} -epochs {max_number_of_epochs} -lr {initial_learning_rate} -dropout {clf_dropout} -batch_size {training_batch_size} -early_stop {early_stop_patience} {early_stop_min_increase} -lr_plateau {lr_plateau_patience} {lr_plateau_min_delta} {lr_plateau_factor} {lr_plateau_min} -summary_only {summary_only} -use_resize {use_resize} -restart {restart_training} -histogram_graphs {get_histograms} -inst_based {instance_based} -input_model {input_model_path}


"""
# Stop Cloud Instance
"""

import socket
gce_name = socket.gethostname()
VMNAME= gce_name
ZONE="us-central1-c"

#!gcloud compute instances stop {VMNAME} --zone {ZONE} --quiet

/jet/prs/workspace/CVML_kaggle_challenge
  from ._conv import register_converters as _register_converters
Using TensorFlow backend.
2018-05-01 14:33:43.952525: I tensorflow/core/platform/cpu_feature_guard.cc:137] Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE4.1 SSE4.2 AVX AVX2 FMA
2018-05-01 14:33:43.961588: E tensorflow/stream_executor/cuda/cuda_driver.cc:406] failed call to cuInit: CUDA_ERROR_UNKNOWN
2018-05-01 14:33:43.961642: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:145] kernel driver does not appear to be running on this host (rasmus-p100-keras-tf): /proc/driver/nvidia/version does not exist
  final_model = Model(input = inp, output = predictions)
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
image_input_1_2 (InputLayer)    (None, None, None, 3 0                                

activation_256_2 (Activation)   (None, None, None, 4 0           batch_normalization_256_2[0][0]  
__________________________________________________________________________________________________
conv2d_49_1 (Conv2D)            (None, None, None, 3 10240       block35_6_ac_1[0][0]             
__________________________________________________________________________________________________
conv2d_51_1 (Conv2D)            (None, None, None, 3 9216        activation_50_1[0][0]            
__________________________________________________________________________________________________
conv2d_54_1 (Conv2D)            (None, None, None, 6 27648       activation_53_1[0][0]            
__________________________________________________________________________________________________
conv2d_252_2 (Conv2D)           (None, None, None, 3 10240       block35_6_ac_2[0][0]             
__________________________________________________________________________________________________


activation_139_1 (Activation)   (None, None, None, 1 0           batch_normalization_139_1[0][0]  
__________________________________________________________________________________________________
activation_342_2 (Activation)   (None, None, None, 1 0           batch_normalization_342_2[0][0]  
__________________________________________________________________________________________________
conv2d_137_1 (Conv2D)           (None, None, None, 1 208896      block17_15_ac_1[0][0]            
__________________________________________________________________________________________________
conv2d_140_1 (Conv2D)           (None, None, None, 1 215040      activation_139_1[0][0]           
__________________________________________________________________________________________________
conv2d_340_2 (Conv2D)           (None, None, None, 1 208896      block17_15_ac_2[0][0]            
__________________________________________________________________________________________________


Epoch 1/3
terminate called after throwing an instance of 'std::bad_alloc'
  what():  std::bad_alloc
  from ._conv import register_converters as _register_converters
Using TensorFlow backend.
Using existing model: /workspace/workspace/inception_inst_based/checkpoint.h5
Traceback (most recent call last):
  File "Models/InceptionResNetV2/train_layers.py", line 296, in <module>
    instance_based=args.inst_based)
  File "Models/InceptionResNetV2/train_layers.py", line 90, in train_classifier
    final_model = load_model(input_model)
  File "/jet/var/python/lib/python3.6/site-packages/keras/models.py", line 237, in load_model
    with h5py.File(filepath, mode='r') as f:
  File "/jet/var/python/lib/python3.6/site-packages/h5py/_hl/files.py", line 269, in __init__
    fid = make_fid(name, mode, userblock_size, fapl, swmr=swmr)
  File "/jet/var/python/lib/python3.6/site-packages/h5py/_hl/files.py", line 99, in make_fid
    fid = h5f.open(name, flags, fapl=fapl)
  File "h5py/_objects.pyx", line

In [None]:
#@title Predict Test Set

input_model_folder_drive = "/workspace/workspace/inception_adam_reg_no_pre"
input_str = input_model_folder_drive+"/checkpoint.h5"

aug5_data = "/workspace/workspace/CVML_augdata/AugTestImages"
aug10_data = "/workspace/workspace/CVML_augdata/AugTestImagesLarge"

# regular prediction
output_path_drive = input_model_folder_drive + "/inception_img_preds.txt"
#!python3 Models/VGG19/predict.py -test_data Data/Test/TestImages -output {output_path_drive} -input_model {input_str}

# average instance prediction
output_path_drive = input_model_folder_drive + "/avg_inst_preds.txt"
#!python3 Models/VGG19/predict.py -test_data Data/Test/TestImages -output {output_path_drive} -input_model {input_str} -instance

# high instance prediction
output_path_drive = input_model_folder_drive + "/high_inst_preds.txt"
#!python3 Models/VGG19/predict.py -test_data Data/Test/TestImages -output {output_path_drive} -input_model {input_str} -instance

# highest aug5
output_path_drive = input_model_folder_drive + "/high_aug5_inst_preds.txt"
!python3 Models/VGG19/predict.py -test_data Data/Test/TestImages -aug_test_data {aug5_data} -output {output_path_drive} -input_model {input_str} -instance -augmented -decision_mode highest

# average aug5
output_path_drive = input_model_folder_drive + "/avg_aug5_inst_preds.txt"
!python3 Models/VGG19/predict.py -test_data Data/Test/TestImages -aug_test_data {aug5_data} -output {output_path_drive} -input_model {input_str} -instance -augmented

# weighted average aug5
output_path_drive = input_model_folder_drive + "/w_avg_aug5_inst_preds.txt"
!python3 Models/VGG19/predict.py -test_data Data/Test/TestImages -aug_test_data {aug5_data} -output {output_path_drive} -input_model {input_str} -instance -augmented -decision_mode weighted_average

# highest aug10
output_path_drive = input_model_folder_drive + "/high_aug10_inst_preds.txt"
!python3 Models/VGG19/predict.py -test_data Data/Test/TestImages -aug_test_data {aug10_data} -output {output_path_drive} -input_model {input_str} -instance -augmented -decision_mode highest

# average aug10
output_path_drive = input_model_folder_drive + "/avg_aug10_inst_preds.txt"
!python3 Models/VGG19/predict.py -test_data Data/Test/TestImages -aug_test_data {aug10_data} -output {output_path_drive} -input_model {input_str} -instance -augmented

# weighted average aug10
output_path_drive = input_model_folder_drive + "/w_avg_aug10_inst_preds.txt"
!python3 Models/VGG19/predict.py -test_data Data/Test/TestImages -aug_test_data {aug10_data} -output {output_path_drive} -input_model {input_str} -instance -augmented -decision_mode weighted_average

In [None]:
import socket
gce_name = socket.gethostname()
VMNAME= gce_name
ZONE="us-central1-c"

!gcloud compute instances stop {VMNAME} --zone {ZONE} --quiet