# Training the model from npy files
---

This study uses nearly 7,000 images images in the area bounded by 36N to 40N and 074W to 078W and 2020-05-25T22:00:00 to 2020-05-26T02:00:00.  Images are compressed to 224 x 224 pixels.

## Import useful packages

In [1]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import Input, Dense, Conv2D, Dropout, Lambda, Concatenate, Flatten, concatenate
from tensorflow.keras import Model
import pandas as pd
import matplotlib.pyplot as plt
import cv2
import os
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import scale
import ktrain
import random

In [2]:
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID";
os.environ["CUDA_VISIBLE_DEVICES"]="0,1"; 

## Define the loss function

In [3]:
def haversine_loss(y_true, y_pred, denorm=(36.0, 4.0, -78.0, 4.0), R=3443.92):
    """
    Returns the mean squared haversine distance
    between arrays consisting of lattitudes and
    longitudes.
    
    Args:
        y_true:  Either an np.array or a tf.constant
                 of dimensions m x 2 where m in the
                 number of observations.  Each row is
                 an ordered pair of [lat, long].
                 
        y_pred:  Has the same form as y_true.
        
        dnorm:   A tuple of four values needed to
                 convert normalized lat and long back
                 to actual values.
        
        R:       Float giving the radius of the earth.
                 The default value is in nautical
                 miles.  Values in other units:
                 
                 kilometers    -> 6378.14
                 statute miles -> 3963.19
                 smoots        -> 3.748e+6
        
    Returns:
        tf.tensor of shape () and dtype float64 giving
        the mean square distance error using the
        haversine function.
    
    Examples:
    
        Input:
        y1     = np.array([[0, 0]])
        y_hat1 = np.array([[0, 180]])
        
        Expected result:
        (pi * R) ** 2 = 117059281.6 nm^2
        
        Input:
        y2     = np.array([[0, 0]])
        y_hat2 = np.array([[90, 0]])
        
        Expected result:
        (pi * R / 2) ** 2 = 29264820.4 nm^2
        
        Input:
        Portmsouth, VA to Rota, Spain
        y3     = tf.constant([[36.8354, -76.2983]])
        y_hat3 = tf.constant([[36.6237, -6.3601]])
        
        Expected result:
        37065212.0 km^2
        
    Notes:
        Closely follows the JS implmentation at
        https://www.movable-type.co.uk/scripts/latlong.html.
    """
    # Break inputs into lattitudes and longitudes for
    # convienience

    # Convert normalized lat and long into actuals
    lat_min, lat_range, long_min, long_range = denorm
    lat1  = y_true[:,0] * lat_range + lat_min
    lat2  = y_pred[:,0] * lat_range + lat_min
    long1 = y_true[:,1] * long_range + long_min
    long2 = y_pred[:,1] * long_range + long_min
    
    # Compute phis and lambdas 
    phi1 = lat1 * np.pi / 180
    phi2 = lat2 * np.pi / 180
    delta_phi    = (lat2 - lat1) * np.pi / 180
    delta_lambda = (long2 - long1) * np.pi / 180
    
    # Intermediate computations
    a = tf.square(tf.sin(delta_phi / 2)) + tf.cos(phi1) * tf.cos(phi2) * tf.square(tf.sin(delta_lambda / 2))
    c = 2 * tf.atan2(tf.sqrt(a), tf.sqrt(1 - a))
    
    # Compute distances
    d = R * c
    
    # Compute the mean squared distance (MSE)
    return tf.reduce_mean(d)

## Prepare the data

### Load the images

In [4]:
x_train = np.load('x_train_big.npy')
x_test = np.load('x_test_big.npy')
y_train = np.load('y_train_big.npy')
y_test = np.load('y_test_big.npy')
t_train = np.load('t_train_big.npy')
t_test = np.load('t_test_big.npy')

In [5]:
x_train.min()

0.0196078431372549

In [6]:
x_train.max()

1.0

In [7]:
x_train.shape

(6788, 224, 224, 1)

In [8]:
y_train.shape

(6788, 2)

In [19]:
y_train[:,1].mean()

0.5012638402133798

In [25]:
x_train[0].min()

0.0196078431372549

## A CNN model with multiple inputs
-------

In [20]:
input_image = Input(shape=x_train[0].shape)
input_time = Input(shape=t_train[0].shape)
i = Conv2D(filters=5, kernel_size=10, padding='same', activation='relu')(input_image)
i = Conv2D(filters=1, kernel_size=10, padding='same', activation='relu')(i)
i = Flatten()(i)
t = Flatten()(input_time)
ti = concatenate([i, t])
ti = Dense(256, activation='relu')(ti)
ti = Dropout(0.2)(ti)
outputs = Dense(2, activation='sigmoid')(ti)
#outputs = Dense(2)(ti)

In [21]:
model = Model(inputs=[input_image, input_time], outputs=outputs)

In [22]:
model.summary()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 224, 224, 1) 0                                            
__________________________________________________________________________________________________
conv2d (Conv2D)                 (None, 224, 224, 5)  505         input_1[0][0]                    
__________________________________________________________________________________________________
conv2d_1 (Conv2D)               (None, 224, 224, 1)  501         conv2d[0][0]                     
__________________________________________________________________________________________________
input_2 (InputLayer)            [(None,)]            0                                            
______________________________________________________________________________________________

In [26]:
model.compile(optimizer='adam',
             loss=haversine_loss,
             metrics=[haversine_loss])

In [27]:
learner = ktrain.get_learner(model, train_data=([x_train, t_train], y_train),
                             val_data=([x_test, t_test], y_test))

In [28]:
learner.autofit(2e-4)

early_stopping automatically enabled at patience=5
reduce_on_plateau automatically enabled at patience=2


begin training using triangular learning rate policy with max lr of 0.0002...
Train on 6788 samples, validate on 715 samples
Epoch 1/1024
Epoch 2/1024
Epoch 3/1024
Epoch 4/1024
Epoch 5/1024
Epoch 6/1024
Epoch 7/1024
Epoch 8/1024
Epoch 9/1024
Epoch 10/1024
Epoch 11/1024
Epoch 12/1024
Epoch 13/1024
Epoch 14/1024
Epoch 15/1024
Epoch 16/1024
Epoch 17/1024
Epoch 18/1024
Epoch 19/1024
Epoch 20/1024
Epoch 21/1024
Epoch 22/1024
Epoch 23/1024
Epoch 24/1024
Epoch 25/1024
Epoch 26/1024
Epoch 27/1024
Epoch 28/1024
Epoch 29/1024
Epoch 30/1024
Epoch 00030: Reducing Max LR on Plateau: new max lr will be 0.0001 (if not early_stopping).
Epoch 31/1024
Epoch 32/1024
Epoch 33/1024
Epoch 00033: Reducing Max LR on Plateau: new max lr will be 5e-05 (if not early_stopping).
Epoch 34/1024
Epoch 35/1024
Epoch 36/1024
Epoch 37/1024
Epoch 38/1024
Epoch 39/1024
Epoch 40/1024
Epoch 00040: Reducing Max LR on Pl

<tensorflow.python.keras.callbacks.History at 0x7fb9f00560b8>

## Confirming validation loss and saving the model

In [30]:
y_hat = learner.model.predict([x_test, t_test])

In [32]:
haversine_loss(y_test, y_hat.astype('double')).numpy()

6.444400189148059

In [33]:
learner.model.save('../data/models/model_for_travis.h5')