# Model Building and Training

In this notebook we will train our model on mixed data through multiple inputs.

In [5]:
import tarfile
import pickle
import pandas as pd
import bz2
from datetime import datetime,timedelta
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.layers import Conv2D
from tensorflow.keras.layers import MaxPooling2D
from tensorflow.keras.layers import Activation
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Input
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.applications import InceptionV3
from tensorflow.keras.applications import InceptionResNetV2
from tensorflow.keras.layers import concatenate
import cv2
import os
import glob
from data_processing_functions import *

%load_ext autoreload
%autoreload 2
%aimport data_processing_functions

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


### Read in Data:

In [6]:
model_data_dict = open_pickle('../data_rp/model_data_dict.pkl')

In next 3 cells open the images from the tar.bz2 files using the `load_sky_images()` function. This will be a numpy array of all of the images for each instance in the df. We aren't using the built in stream from directory function of keras due to the file type. The files are very large, so I would prefer not to unzip them on my machine

In [14]:
img_5_min = model_data_dict['df_5_min']['file']

In [15]:
sorted_img_5 = img_5_min.sort_values()

In [23]:
images_5_min = load_sky_images(sorted_img_5)

2014 data/Folsom_sky_images_2014.tar.bz2
deleted tar
2015 data/Folsom_sky_images_2015.tar.bz2
deleted tar
2016 data/Folsom_sky_images_2016.tar.bz2


In [7]:
# save_pickle('../data_rp/5_min_w_img.pkl',images_5_min)
# images_5_min.shape
images_5_min = open_pickle('../data_rp/5_min_w_img.pkl')

In [12]:
train_ind = model_data_dict['df_5_min']['X_train'].index
test_ind = model_data_dict['df_5_min']['X_test'].index

In [18]:
sorted_img_5 = pd.DataFrame(sorted_img_5).reset_index()
# sorted_img_5['img_matrix'] = images_5_min

In [25]:
# sorted_img_5

In [22]:
train_img_ind,test_img_ind = get_index_of_img(train_ind,test_ind,sorted_img_5)
trainImagesX,testImagesX = get_img_from_index(train_img_ind,test_img_ind,images_5_min)

### Process our image data and targets:

right now, we'll just start by scaling the images down from [0-255], to between [0-1].

In [23]:
# images_5_min = images_5_min / 255.0
trainImagesX = trainImagesX / 255.0
testImagesX = testImagesX / 255.0

In [74]:
# model_data_dict['df_5_min'].keys()

In [24]:
# find the largest irradiation in the training set and use it to
# scale our irradiation to the range [0, 1] (will lead to better
# training and convergence)
maxPrice = model_data_dict['df_5_min']['y_train'].max()
trainY = model_data_dict['df_5_min']['y_train'] / maxPrice
testY = model_data_dict['df_5_min']['y_test'] / maxPrice

save processed data:

In [26]:
save_pickle('data/training_ImagesX.pkl',trainImagesX)
save_pickle('data/testing_ImagesX.pkl',testImagesX)
save_pickle('data/TrainY.pkl',trainY)
save_pickle('data/testY.pkl',testY)
save_pickle('data/testY.pkl',testY)

In [32]:
trainImagesX.shape[0]

8000

### Build model:

The three parts of this section will be making adjustments to pre-trained CNN models, building an MLP, and then adding layers at the end to contatinate the results of these two branches of NN models, and then added fully connected and regression layers at the end.

So, let's break them out: 
1. [Update pre-trained CNN models for image data](#Update-pretrained-CNN-models)
2. [Build MLP for numeric/categorical data](#Build-MLP-for-numeric-and-categorical-data)
3. [Create end of multi-imput model](#Build-the-final-section-of-our-model)

#### Update pretrained CNN models

Starting with ResNet50, but will do the others next.

In [35]:
import ssl

ssl._create_default_https_context = ssl._create_unverified_context

baseModelResNet = ResNet50(weights="imagenet", include_top=False,
                           input_tensor=Input(shape=(32, 32, 3)))
# trainImagesX.shape[0],
# input_shape=(trainImagesX.shape[0],32, 32, 3))

# baseModelIv3 = InceptionV3(weights="imagenet", include_top=False,
#                             input_tensor=Input(shape=(1,32, 32, 3)))

# baseModelResIv2 = InceptionResNetV2(weights="imagenet", include_top=False,
#                                       input_tensor=Input(shape=(32, 32, 3)))

In [36]:
baseModelResNet.summary()

Model: "resnet50"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_3 (InputLayer)            [(None, 32, 32, 3)]  0                                            
__________________________________________________________________________________________________
conv1_pad (ZeroPadding2D)       (None, 38, 38, 3)    0           input_3[0][0]                    
__________________________________________________________________________________________________
conv1_conv (Conv2D)             (None, 16, 16, 64)   9472        conv1_pad[0][0]                  
__________________________________________________________________________________________________
conv1_bn (BatchNormalization)   (None, 16, 16, 64)   256         conv1_conv[0][0]                 
___________________________________________________________________________________________

In [37]:
chanDim = -1

# flatten the volume, then FC => RELU => BN => DROPOUT
headModel = baseModelResNet.output
# headModel = AveragePooling2D(pool_size=(7, 7))(headModel)
headModel = Flatten()(headModel)
headModel = Dense(16)(headModel)
headModel = Activation("relu")(headModel)
headModel = BatchNormalization(axis=chanDim)(headModel)
headModel = Dropout(0.5)(headModel)
# apply another FC layer, this one to match the number of nodes
# coming out of the MLP
headModel = Dense(4)(headModel)
headModel = Activation("relu")(headModel)


baseModelResNet.trainable = False

cnn = Model(inputs=baseModelResNet.input, outputs=headModel)

#### Lets test with just a CNN:

In [38]:
chanDim = -1

# flatten the volume, then FC => RELU => BN => DROPOUT
headModel2 = baseModelResNet.output
# headModel = AveragePooling2D(pool_size=(7, 7))(headModel)
headModel2 = Flatten()(headModel2)
headModel2 = Dense(16)(headModel2)
headModel2 = Activation("relu")(headModel2)
headModel2 = BatchNormalization(axis=chanDim)(headModel2)
headModel2 = Dropout(0.5)(headModel2)
# apply another FC layer, this one to match the number of nodes
# coming out of the MLP
headModel2 = Dense(4)(headModel)
headModel2 = Activation("relu")(headModel2)
headModel2 = Dense(1, activation="linear")(headModel2)


baseModelResNet.trainable = False

cnn2 = Model(inputs=baseModelResNet.input, outputs=headModel)

In [40]:
opt = Adam(lr=1e-3, decay=1e-3 / 200)
cnn2.compile(loss="mean_absolute_percentage_error", optimizer=opt)

cnn2.fit(
    x=trainImagesX, y=trainY,
    validation_data=(testImagesX, testY),
    epochs=200, batch_size=8)

Epoch 1/200


ValueError: in user code:

    /Users/elena.morais/Documents/environments/reseach_paper/lib/python3.7/site-packages/tensorflow/python/keras/engine/training.py:805 train_function  *
        return step_function(self, iterator)
    /Users/elena.morais/Documents/environments/reseach_paper/lib/python3.7/site-packages/tensorflow/python/keras/engine/training.py:795 step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    /Users/elena.morais/Documents/environments/reseach_paper/lib/python3.7/site-packages/tensorflow/python/distribute/distribute_lib.py:1259 run
        return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
    /Users/elena.morais/Documents/environments/reseach_paper/lib/python3.7/site-packages/tensorflow/python/distribute/distribute_lib.py:2730 call_for_each_replica
        return self._call_for_each_replica(fn, args, kwargs)
    /Users/elena.morais/Documents/environments/reseach_paper/lib/python3.7/site-packages/tensorflow/python/distribute/distribute_lib.py:3417 _call_for_each_replica
        return fn(*args, **kwargs)
    /Users/elena.morais/Documents/environments/reseach_paper/lib/python3.7/site-packages/tensorflow/python/keras/engine/training.py:788 run_step  **
        outputs = model.train_step(data)
    /Users/elena.morais/Documents/environments/reseach_paper/lib/python3.7/site-packages/tensorflow/python/keras/engine/training.py:754 train_step
        y_pred = self(x, training=True)
    /Users/elena.morais/Documents/environments/reseach_paper/lib/python3.7/site-packages/tensorflow/python/keras/engine/base_layer.py:1012 __call__
        outputs = call_fn(inputs, *args, **kwargs)
    /Users/elena.morais/Documents/environments/reseach_paper/lib/python3.7/site-packages/tensorflow/python/keras/engine/functional.py:425 call
        inputs, training=training, mask=mask)
    /Users/elena.morais/Documents/environments/reseach_paper/lib/python3.7/site-packages/tensorflow/python/keras/engine/functional.py:560 _run_internal_graph
        outputs = node.layer(*args, **kwargs)
    /Users/elena.morais/Documents/environments/reseach_paper/lib/python3.7/site-packages/tensorflow/python/keras/engine/base_layer.py:998 __call__
        input_spec.assert_input_compatibility(self.input_spec, inputs, self.name)
    /Users/elena.morais/Documents/environments/reseach_paper/lib/python3.7/site-packages/tensorflow/python/keras/engine/input_spec.py:223 assert_input_compatibility
        str(tuple(shape)))

    ValueError: Input 0 of layer conv1_pad is incompatible with the layer: expected ndim=4, found ndim=3. Full shape received: (8, 32, 32)


#### Build MLP for numeric and categorical data

We're going to start with something super basic initially to see how this does.

In [11]:
def create_mlp(dim, regress=False):
    # define our MLP network
    model = Sequential()
    model.add(Dense(8, input_dim=dim, activation="relu"))
    model.add(Dense(4, activation="relu"))
    # check to see if the regression node should be added
    if regress:
        model.add(Dense(1, activation="linear"))
        # return our model
        
    return model

In [27]:
trainAttrX = model_data_dict['df_5_min']['X_train_p']
mlp = create_mlp(trainAttrX.shape[1], regress=False)

#### Build the final section of our model

Fully connected and with linear activation function for regression output.

In [28]:
# create the input to our final set of layers as the *output* of both
# the MLP and CNN
combinedInput = concatenate([mlp.output, cnn.output])

# our final FC layer head will have two dense layers, the final one
# being our regression head
x = Dense(4, activation="relu")(combinedInput)
x = Dense(1, activation="linear")(x)

# our final model will accept categorical/numerical data on the MLP
# input and images on the CNN input, outputting a single value (the
# predicted price of the house)
model = Model(inputs=[mlp.input, cnn.input], outputs=x)

In [29]:
testAttrX = model_data_dict['df_5_min']['X_test_p']

In [30]:
opt = Adam(lr=1e-3, decay=1e-3 / 200)
model.compile(loss="mean_absolute_percentage_error", optimizer=opt)
# train the model
print("[INFO] training model...")
model.fit(
    x=[trainAttrX, trainImagesX], y=trainY,
    validation_data=([testAttrX, testImagesX], testY),
    epochs=200, batch_size=8)

[INFO] training model...
Epoch 1/200


ValueError: in user code:

    /Users/elena.morais/Documents/environments/reseach_paper/lib/python3.7/site-packages/tensorflow/python/keras/engine/training.py:805 train_function  *
        return step_function(self, iterator)
    /Users/elena.morais/Documents/environments/reseach_paper/lib/python3.7/site-packages/tensorflow/python/keras/engine/training.py:795 step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    /Users/elena.morais/Documents/environments/reseach_paper/lib/python3.7/site-packages/tensorflow/python/distribute/distribute_lib.py:1259 run
        return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
    /Users/elena.morais/Documents/environments/reseach_paper/lib/python3.7/site-packages/tensorflow/python/distribute/distribute_lib.py:2730 call_for_each_replica
        return self._call_for_each_replica(fn, args, kwargs)
    /Users/elena.morais/Documents/environments/reseach_paper/lib/python3.7/site-packages/tensorflow/python/distribute/distribute_lib.py:3417 _call_for_each_replica
        return fn(*args, **kwargs)
    /Users/elena.morais/Documents/environments/reseach_paper/lib/python3.7/site-packages/tensorflow/python/keras/engine/training.py:788 run_step  **
        outputs = model.train_step(data)
    /Users/elena.morais/Documents/environments/reseach_paper/lib/python3.7/site-packages/tensorflow/python/keras/engine/training.py:754 train_step
        y_pred = self(x, training=True)
    /Users/elena.morais/Documents/environments/reseach_paper/lib/python3.7/site-packages/tensorflow/python/keras/engine/base_layer.py:1012 __call__
        outputs = call_fn(inputs, *args, **kwargs)
    /Users/elena.morais/Documents/environments/reseach_paper/lib/python3.7/site-packages/tensorflow/python/keras/engine/functional.py:425 call
        inputs, training=training, mask=mask)
    /Users/elena.morais/Documents/environments/reseach_paper/lib/python3.7/site-packages/tensorflow/python/keras/engine/functional.py:560 _run_internal_graph
        outputs = node.layer(*args, **kwargs)
    /Users/elena.morais/Documents/environments/reseach_paper/lib/python3.7/site-packages/tensorflow/python/keras/engine/base_layer.py:998 __call__
        input_spec.assert_input_compatibility(self.input_spec, inputs, self.name)
    /Users/elena.morais/Documents/environments/reseach_paper/lib/python3.7/site-packages/tensorflow/python/keras/engine/input_spec.py:223 assert_input_compatibility
        str(tuple(shape)))

    ValueError: Input 0 of layer conv1_pad is incompatible with the layer: expected ndim=4, found ndim=3. Full shape received: (8, 32, 32)


In [None]:
# make predictions on the testing data
print("[INFO] predicting irridiation...")
preds = model.predict([testAttrX, testImagesX])