## Imports and config

In [None]:
#Append other folders to path
import sys
sys.path.append('../')

#Automatically reload modules
%load_ext autoreload
%autoreload 2

#Make this notebook full width
from IPython.core.display import display, HTML
%matplotlib inline
import matplotlib.pyplot as plt
display(HTML("<style>.container { width:100% !important; }</style>"))

#Imports
import numpy as np
import keras,os,gc,datetime
import logging
from Utils.zscore_images import *
from IO.load_training_data import *
from IO.load_test_data import *
from Models.NASNet_mobile import *
from keras_tqdm import TQDMNotebookCallback

logging.basicConfig(level=0) #show all logging output

np.random.seed(42) #set numpy seed to easy reproducibility

training_portion = 0.8 # use 80% of samples for training, rest validation

## Load data

In [None]:
# Loads the training data from /Datasets/ , we'll shuffle later
# If this throws an error, you probably haven't create a hdf5 dataset yet
# In that case run /Scripts/runDataAugmentation
X,y = load_training_data_h5(shuffle=False) 

## Setup model

In [None]:
model = NASNet_mobile() # Sets up a NASNetMobile. First time will load weights

In [None]:
#Setup keras training
model.compile(loss=keras.losses.binary_crossentropy,
              optimizer=keras.optimizers.Adam(0.0008),
              metrics=['accuracy'])

##  Run training

In [None]:
from keras_tqdm import TQDMNotebookCallback

model.fit(x=X, y=y, batch_size=50, epochs=1, verbose=0, callbacks=[TQDMNotebookCallback()], 
    validation_split=0.1, shuffle=True)

## Create submission

In [None]:
#free RAM
X = None
y = None
gc.collect();

# Loads the training data from /Datasets/ , we'll shuffle later
# If this throws an error, you probably haven't create a hdf5 dataset yet
# In that case run /Scripts/createTestHDF5
X = load_test_data_h5() 

logging.info("Loading file names")
test_files = glob(os.path.join("../Datasets/test/",'*.tif')) #find the test file names
submission = pd.DataFrame() #create a dataframe to hold results
test_df = pd.DataFrame({'path': test_files}) #add the filenames to the dataframe
#add the ids to the dataframe
if os.name == 'nt': #deal with windows backslashes
    test_df['id'] = test_df.path.map(lambda x: x.split('\\')[1].split(".")[0])
else:
    test_df['id'] = test_df.path.map(lambda x: x.split('/')[3].split(".")[0])

logging.info("Predicting labels")
predictions = model.predict(X,verbose = 1) #predict the labels for the test data

logging.info("Storing submission")
test_df['label'] = predictions #store them in the dataframe
submission = pd.concat([submission, test_df[["id", "label"]]])
submission.head() #display first lines
submission.to_csv("../Out/submission_" + str(datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S"))+".csv", index = False, header = True) #create the submission file