In [2]:
def CNN_LSTM(self):

    frames_in = Input(shape=self.input_shape)
    vision_model = Sequential()
    vision_model.add(Conv2D(64, (1, 2),
                            activation='relu',
                            padding='same',
                            input_shape=self.image_dim)
                    )
    vision_model.add(BatchNormalization())
    vision_model.add(MaxPooling2D((1, 2)))
    vision_model.add(Flatten())
    vision_model.add(BatchNormalization())
    encoded_frame_sequence = TimeDistributed(vision_model)(frames_in)
    
    encoded_video = LSTM(256, activation='tanh',
                         return_sequences=True)\
                        (encoded_frame_sequence)

    fc2 = Dense(64, activation='relu',
                kernel_regularizer=regularizers.l2(0.05))\
                (encoded_video)
    
    out = Flatten()(fc2)
    out = Dropout(0.5)(out)
    output = Dense(1, activation='relu')(out)
    CNN_LSTM = Model(inputs=frames_input, outputs=output)
    return CNN_LSTM

In [3]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from IPython.display import display
import os
import random
import glob
import pickle
import tqdm
from model import *

Using TensorFlow backend.


In [4]:
def generator(IDs, yields, batch_size, cutoff=None):
    import numpy as np
    import random
    
 # Create empty arrays to contain batch of features and labels#

    if cutoff != None:
        batch_features = np.zeros((batch_size, cutoff, 1, 128, 11))
        batch_yields = np.zeros((batch_size))
        while True:
            for i in range(batch_size):
                # choose random index in features
                index = random.choice(range(len(IDs)))
                ID = IDs[index]
                if np.sum(np.isnan(np.load('data/PROCESSED_III/' + ID + '.npy'))) == 0:
                    batch_features[i, :, :, :, :] = np.load('data/PROCESSED_III/' + ID + '.npy')[:cutoff, :, :, :]
                    #print('yes', ID)
                    batch_yields[i] = yields[ID]
                else:
                    print('no', ID)
                    
            yield batch_features, batch_yields
                    
    else:
        batch_features = np.zeros((batch_size, 38, 1, 128, 11))
        batch_yields = np.zeros((batch_size))
        while True:
            for i in range(batch_size):
                # choose random index in features
                index = random.choice(range(len(IDs)))
                ID = IDs[index]
                if np.sum(np.isnan(np.load('data/PROCESSED_III/' + ID + '.npy'))) == 0:
                    batch_features[i, :, :, :, :] = np.load('data/PROCESSED_III/' + ID + '.npy')
                    #print('yes', ID)
                    batch_yields[i] = yields[ID]
                else:
                    print('no', ID)
            yield batch_features, batch_yields

# Model Training

In [5]:
# training model on data of year 2010-2015 (6 years total)

model_list = ['CNN_LSTM', 'SepCNN_LSTM', 'CONVLSTM', 'CONV3D', 'CONVLSTM_CONV3D']

# Datasets
yields = pickle.load(open('data/yields.p', 'rb'))
y = yields
print(len(yields['train']), len(yields['validation']))

# define early stopping callback
earlystop = EarlyStopping(monitor='val_loss', min_delta=0.0001, patience=5, \
                          verbose=1, mode='auto')
callbacks_list = [earlystop]

# Generators
training_generator = generator(list(y['train'].keys()), y['train'], 16)
validation_generator = generator(list(y['validation'].keys()), y['validation'], 16)

for model_name in model_list:
    rm = ResearchModels(model_name, 38, (1, 128, 11), print_model=True)
    rm.model.fit_generator(training_generator, validation_data=validation_generator, callbacks=callbacks_list,\
                               validation_steps=1353/16, samples_per_epoch=50, nb_epoch=100, verbose=0)
    rm.save(model_name)

FileNotFoundError: [Errno 2] No such file or directory: 'data/yields.p'

# Model Evaluation

In [6]:
# Evalurate model on the yields of corn yields across U.S. in year 2016
from numpy.random import seed
seed(42)
from tensorflow import set_random_seed
set_random_seed(42)

test_gen = generator(list(yields['validation'].keys()), yields['validation'], len(yields['validation']))
X_test, y_test = next(test_gen)
abs_error = np.empty(len(model_list))

for i, model_name in enumerate(model_list):
    rm = ResearchModels('No model', 38, (1, 128, 11), saved_model=model_name)
    abs_error[i] = rm.model.evaluate(X_test, y_test, batch_size=16)[1]
    print('For model {}, the test mean absolute error is {:.2f}.'.format(model_name, abs_error[i]))

best_model = model_list[np.argmin(abs_error)]
print('The best model is {}.'.format(best_model))

NameError: name 'yields' is not defined

# Optimization of batch size

In [7]:
batch_size = [8, 32, 64]
abs_error_batch = np.empty(len(batch_size))

for i, size in enumerate(batch_size):
    # Generators
    training_generator = generator(list(y['train'].keys()), y['train'], size)
    validation_generator = generator(list(y['validation'].keys()), y['validation'], size)

    rm = ResearchModels('None', 38, (1, 128, 11), saved_model=best_model)
    rm.model.fit_generator(training_generator, validation_data=validation_generator, callbacks=callbacks_list,\
                               validation_steps=1353/size, samples_per_epoch=50, nb_epoch=100, verbose=0)

    abs_error_batch[i] = rm.model.evaluate(X_test, y_test, batch_size=size)[1]
    print('For batch {}, the test mean absolute error is {:.2f}.'.format(size, abs_error_batch[i]))
    
    rm.model.save(best_model + '_' + str(size))

NameError: name 'y' is not defined

# Reducing the number of frames per year

In [8]:
n_frames = [1, 5, 10, 15, 20, 25, 30, 35]
abs_error_frames = np.empty(len(n_frames))
i = 0

for frame in n_frames:
    # Generators
    print('Doing {} frames per year'.format(frame))
    training_generator = generator(list(y['train'].keys()), y['train'], 16, cutoff=frame)
    validation_generator = generator(list(y['validation'].keys()), y['validation'], 16, cutoff=frame)
    
    test_gen = generator(list(yields['validation'].keys()), yields['validation'], len(yields['validation']), cutoff=frame)
    X_test, y_test = next(test_gen)

    rm = ResearchModels('CONVLSTM', frame, (1, 128, 11))
    rm.model.fit_generator(training_generator, validation_data=validation_generator, callbacks=callbacks_list,\
                               validation_steps=1353/16, samples_per_epoch=50, nb_epoch=100, verbose=0)
    abs_error_frames[i] = rm.model.evaluate(X_test, y_test, batch_size=16)[1]
    
    rm.model.save('CONVLSTM' + '_' + str(frame))
    print('For {} frames per year, the test mean absolute error is {:.2f}.'.format(frame, abs_error_frames[i]))
    i += 1
    pickle.dump( abs_error_frames, open( "abs_error_framess.p", "wb" ) )

Doing 1 frames per year


NameError: name 'y' is not defined