### Necessary Imports and Installs

In [1]:
import tensorflow as tf
from tensorflow.keras.layers import Add, GlobalAveragePooling2D, Conv2D, Dense, AveragePooling2D, \
BatchNormalization, Normalization, Dropout, Flatten, Lambda, Input, Activation, MaxPooling2D
from tensorflow.keras import Model
from tensorflow.keras.optimizers import schedules, SGD
from tensorflow.keras.callbacks import Callback, LambdaCallback
from tensorflow.keras import backend as K
import tensorflow_addons as tfa
#import tensorflow_datasets as tfds

import numpy as np
from collections import defaultdict
import matplotlib.pyplot as plt
from matplotlib import scale
import time
import random
from collections import defaultdict
import math
import copy
import threading
import opendatasets as od
import tempfile
from sklearn.utils import shuffle

%pylab inline --no-import-all
from pathlib import Path
import pandas as pd
import sys

from GLC.data_loading.common import load_patch

Matplotlib created a temporary config/cache directory at /tmp/matplotlib-by7g17iy because the default path (/.config/matplotlib) is not a writable directory; it is highly recommended to set the MPLCONFIGDIR environment variable to a writable directory, in particular to speed up the import of Matplotlib and to better support multiprocessing.


%pylab is deprecated, use %matplotlib inline and import the required libraries.
Populating the interactive namespace from numpy and matplotlib


# Dataset

## Download Dataset

In [2]:
# only uncomment if you don't have the dataset stored on disk yet
# -> have your kaggle user credentials ready
#data = od.download("https://www.kaggle.com/competitions/geolifeclef-2022-lifeclef-2022-fgvc9")

## Load Dataset from file

Set path to competition dataset here.

In [3]:
# Change this path to adapt to where you downloaded the data
DATA_PATH = Path("./geolifeclef-2022-lifeclef-2022-fgvc9/")

Run the following two commands to verify that the data path is set correctly. They should print folder and file names.

In [4]:
ls -L $DATA_PATH

[0m[01;34mmetadata[0m/      [01;34mpatches-fr[0m/  [01;34mpatches_sample[0m/  [01;34mrasters[0m/
[01;34mobservations[0m/  [01;34mpatches-us[0m/  [01;34mpre-extracted[0m/   sample_submission.csv


In [5]:
ls $DATA_PATH/observations

observations_fr_test.csv   observations_us_test.csv
observations_fr_train.csv  observations_us_train.csv


Load the observation ids of the training dataset.

In [6]:
### Training Dataset ###
# let's load the data from file
df_obs_fr = pd.read_csv(DATA_PATH / "observations" / "observations_fr_train.csv", sep=";", index_col="observation_id")
df_obs_us = pd.read_csv(DATA_PATH / "observations" / "observations_us_train.csv", sep=";", index_col="observation_id")

df_obs = pd.concat((df_obs_fr, df_obs_us))

print("Number of observations for training: {}".format(len(df_obs)))

# let's have a look at the data
df_obs.head()

Number of observations for training: 1627475


Unnamed: 0_level_0,latitude,longitude,species_id,subset
observation_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
10561949,45.705116,1.424622,241,train
10131188,45.146973,6.416794,101,train
10799362,46.783695,-2.072855,700,train
10392536,48.604866,-2.825003,1456,train
10335049,48.815567,-0.161431,157,train


Load the observation ids of the test dataset.

In [7]:
### Test Dataset ###
df_obs_fr_test = pd.read_csv(DATA_PATH / "observations" / "observations_fr_test.csv", sep=";", index_col="observation_id")
df_obs_us_test = pd.read_csv(DATA_PATH / "observations" / "observations_us_test.csv", sep=";", index_col="observation_id")

df_obs_test = pd.concat((df_obs_fr_test, df_obs_us_test))

print("Number of observations for testing: {}".format(len(df_obs_test)))

df_obs_test.head()

Number of observations for testing: 36421


Unnamed: 0_level_0,latitude,longitude
observation_id,Unnamed: 1_level_1,Unnamed: 2_level_1
10782781,43.601788,6.940195
10364138,46.241711,0.683586
10692017,45.181095,1.533459
10222322,46.93845,5.298678
10241950,45.017433,0.960736


Load suggested landcover alignment (only relevant if you're using landcover data later).

In [8]:
df_suggested_landcover_alignment = pd.read_csv(DATA_PATH / "metadata" / "landcover_suggested_alignment.csv", sep=";")
print(df_suggested_landcover_alignment.head())
landcover_mapping = df_suggested_landcover_alignment["suggested_landcover_code"].values

   landcover_code  suggested_landcover_code suggested_landcover_label
0               0                         0              Missing Data
1               1                        11          Cultivated Crops
2               2                        11          Cultivated Crops
3               3                         6       Broad-leaved Forest
4               4                         7         Coniferous Forest


### Train/Val Split Labels
Retrieve the train/val split provided, and load the labels of the train and val set elements.

In [9]:
obs_id_train = df_obs.index[df_obs["subset"] == "train"].values
obs_id_val = df_obs.index[df_obs["subset"] == "val"].values

y_train = df_obs.loc[obs_id_train]["species_id"].values
y_val = df_obs.loc[obs_id_val]["species_id"].values

n_val = len(obs_id_val)
print("Training set size: {} ({:.1%} of train observations)".format(len(y_train), len(y_train) / len(df_obs)))
print("Validation set size: {} ({:.1%} of train observations)".format(n_val, n_val / len(df_obs)))

Training set size: 1587395 (97.5% of train observations)
Validation set size: 40080 (2.5% of train observations)


**Let's get the environmental vectors.**

In [10]:
from sklearn.impute import SimpleImputer
#imp = SimpleImputer(
#    missing_values=np.nan,
#    strategy="constant",
#    fill_value=np.finfo(np.float32).min,
#)
df_env = pd.read_csv("./geolifeclef-2022-lifeclef-2022-fgvc9/pre-extracted/environmental_vectors.csv", sep=";", index_col="observation_id")
a  = df_env.loc(0)
columnsNamesArr = df_env.columns.values
#print(columnsNamesArr)
rowNames = df_env.index.values
#print(rowNames)
my_imputer = SimpleImputer()
#print(df_env.shape)
#print(my_imputer.fit_transform(df_env)[0])

df_env = pd.DataFrame(my_imputer.fit_transform(df_env))
df_env.set_axis(rowNames, axis='index', inplace = True)
df_env.set_axis(columnsNamesArr, axis='columns', inplace = True)
#print(df_env.shape)

df_env.head()

Unnamed: 0,bio_1,bio_2,bio_3,bio_4,bio_5,bio_6,bio_7,bio_8,bio_9,bio_10,...,bio_18,bio_19,bdticm,bldfie,cecsol,clyppt,orcdrc,phihox,sltppt,sndppt
10000000,1.420833,6.908333,29.272598,614.1493,15.1,-8.5,23.6,-1.0,9.183333,9.466667,...,248.0,358.0,2082.0,988.0,29.0,13.0,63.0,62.0,34.0,53.0
10000001,8.8375,9.858334,37.771393,586.8139,23.8,-2.3,26.099998,6.016667,16.383333,16.383333,...,226.0,288.0,1816.0,1142.0,20.0,22.0,39.0,58.0,41.0,36.0
10000002,6.241667,8.35,32.239384,632.8609,21.0,-4.9,25.9,3.033333,14.2,14.2,...,268.0,317.0,1346.0,1075.0,29.0,22.0,54.0,59.0,40.0,38.0
10000003,12.554167,9.525001,40.189877,541.80865,25.9,2.2,23.699999,6.85,19.35,19.35,...,157.0,257.0,1227.0,1383.0,21.0,28.0,18.0,71.0,46.0,25.0
10000004,8.029167,10.075,36.636364,633.0175,23.7,-3.8,27.5,4.616667,16.083334,16.083334,...,214.0,280.0,2833.0,1202.0,24.0,25.0,33.0,69.0,38.0,37.0


### Obtain train, val and test set.

In [11]:
### Kennedy's Train, Val and Test Split ###
# CHOOSE SUBSET FOR 30 Labels
label_amount = 30

import random
subset_size = 0
obs_list = list()
obs_test_list = list()
import numpy as np
# iterate over a subset of the labels
m = 0
for y in (np.unique(y_train)[:]):
    #print("in")
    # for each label, retrieve all corresponding observation ids
    obs = df_obs.index[(df_obs["species_id"] == y)]
   
    #print(len(obs))
    #print(counter)
    #print(obs)
    #print(len(obs))
    if (len(obs) >= 2000 and len(obs) <= 3000):
      t = set(df_obs.index[(df_obs["species_id"] == y) & (df_obs["subset"] == "train")].values)
      m += len(t)
      ten_perc = int(len(t)/10)
      random.seed(3)
      test = random.sample(t, ten_perc)
      train = t-set(test)
      #print(train)
      #print("here")
      obs_test_list.append(list(test))
      obs_list.append(list(train))
      subset_size += 1
    if (subset_size >= label_amount):
      #print("break")
      break
print(m)   
# we now have a numpy array of all observation ids corresponding to this subset of labels
obs_id_train = np.concatenate(obs_list)
obs_id_test = np.concatenate(obs_test_list)
gps_train = np.concatenate((df_obs.loc[obs_id_train]["latitude"].values, df_obs.loc[obs_id_train]["longitude"].values))
# obtain the labels in the right order 
y_train = df_obs.loc[obs_id_train]["species_id"].values
y_test = df_obs.loc[obs_id_test]["species_id"].values
print(y_train.size)

print()
print(y_test.size)
print(y_train[2])
print()

70294
63278

7016
5



In [12]:
#subset_size = 8
obs_list_1 = list()
print(y_val.size)

# iterate over a subset of the labels
counter = 0
print(y_val[0])
for y in (np.unique(y_val)[:]):
    # for each label, retrieve all corresponding observation ids
    if (y in y_train):
      #obs = df_obs.index[df_obs["species_id"] == y].values
      v = df_obs.index[(df_obs["species_id"] == y) & (df_obs["subset"] == "val")].values
      obs_list_1.append(v)
# we now have a numpy array of all observation ids corresponding to this subset of labels
obs_id_val = np.concatenate(obs_list_1)

# obtain the labels in the right order 
y_val = df_obs.loc[obs_id_val]["species_id"].values
gps_val = np.concatenate((df_obs.loc[obs_id_val]["latitude"].values, df_obs.loc[obs_id_val]["longitude"].values))

print(y_val.size)
print(obs_id_val == obs_id_train)

40080
11
1831
False


  print(obs_id_val == obs_id_train)


In [13]:
train_dict_count = defaultdict(lambda: 0)
for y in y_train:
    train_dict_count[y] += 1
    
print("training: ")
for key, value in train_dict_count.items():
    print("label {:>4}: {:.2f}%".format(key, value/len(y_train)))

print()

val_dict_count = defaultdict(lambda: 0)
for y in y_val:
    val_dict_count[y] += 1
    
print("validation: ")
for key, value in val_dict_count.items():
    print("label {:>4}: {:.2f}%".format(key, value/len(y_val)))

training: 
label    5: 0.04%
label   68: 0.03%
label  125: 0.04%
label  129: 0.03%
label  223: 0.03%
label  553: 0.03%
label  570: 0.03%
label  605: 0.03%
label  679: 0.03%
label  692: 0.03%
label  739: 0.03%
label  979: 0.04%
label 1075: 0.04%
label 1155: 0.03%
label 1916: 0.03%
label 2196: 0.03%
label 2524: 0.04%
label 2821: 0.03%
label 2950: 0.03%
label 2992: 0.03%
label 3253: 0.03%
label 4940: 0.04%
label 4991: 0.04%
label 4999: 0.04%
label 5025: 0.03%
label 5086: 0.03%
label 5168: 0.03%
label 5192: 0.03%
label 5207: 0.04%
label 5228: 0.04%

validation: 
label    5: 0.03%
label   68: 0.03%
label  125: 0.02%
label  129: 0.03%
label  223: 0.03%
label  553: 0.03%
label  570: 0.03%
label  605: 0.03%
label  679: 0.02%
label  692: 0.04%
label  739: 0.03%
label  979: 0.03%
label 1075: 0.03%
label 1155: 0.03%
label 1916: 0.03%
label 2196: 0.03%
label 2524: 0.04%
label 2821: 0.04%
label 2950: 0.05%
label 2992: 0.05%
label 3253: 0.03%
label 4940: 0.04%
label 4991: 0.01%
label 4999: 0.04%
lab

In [14]:
np.unique(y_train) == np.unique(y_val)

array([ True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True])

### Remap the labels s.t. they go from 0 to n-1
(NAN fix is here)

In [15]:
# create a map s.t. the labels will go from 0 to n-1
map_labels = dict()
i = 0
for l in np.unique(y_train):
    map_labels[l] = i
    i+=1
print(map_labels)

{5: 0, 68: 1, 125: 2, 129: 3, 223: 4, 553: 5, 570: 6, 605: 7, 679: 8, 692: 9, 739: 10, 979: 11, 1075: 12, 1155: 13, 1916: 14, 2196: 15, 2524: 16, 2821: 17, 2950: 18, 2992: 19, 3253: 20, 4940: 21, 4991: 22, 4999: 23, 5025: 24, 5086: 25, 5168: 26, 5192: 27, 5207: 28, 5228: 29}


In [16]:
# apply the map to the training labels
y_train_normalized = np.zeros(np.shape(y_train), dtype='int64')

for i in range(len(y_train)):
    y_train_normalized[i] = map_labels[y_train[i]]
    
# shuffle together
obs_id_train, y_train = shuffle(obs_id_train, y_train_normalized)

no_output_neurons = len(np.unique(y_train))
print("# output neurons: ", no_output_neurons)

# output neurons:  30


In [17]:
# apply the map to the validation labels
y_val_normalized = np.zeros(np.shape(y_val), dtype='int64')

for i in range(len(y_val)):
    y_val_normalized[i] = map_labels[y_val[i]]
    
# shuffle together
obs_id_val, y_val = shuffle(obs_id_val, y_val_normalized)

## Write input pipeline to load batches as we train.

### Custom Generator
Since dataset is too large to load it all into memory once, we need to load it from disk in batches as we train. Such a generator can later be passed into model.fit() instead of a train and/or validation dataset.

In [18]:
class Patches_Generator(tf.keras.utils.Sequence) :
  
    def __init__(self, obs_ids, labels, batch_size) :
        self.obs_ids = obs_ids
        self.labels = labels
        self.batch_size = batch_size
        
        # to make the generator thread safe 
        self.lock = threading.Lock()

    def __len__(self) :
        return (np.ceil(len(self.obs_ids) / float(self.batch_size))).astype(int)
  
    # returns one batch
    def __getitem__(self, idx) :
        X_batch = list()
        y_batch = list()

        for i in range(idx * self.batch_size, (idx+1) * self.batch_size):
            if i >= len(self.obs_ids): break
            
            patch = load_patch(self.obs_ids[i], DATA_PATH, data='rgb')
            X_batch.append(patch[0])
            y_batch.append(self.labels[i])

        with self.lock:
            return np.asarray(X_batch), np.array(y_batch)

In [19]:
import threading

class Environmental_Patches_Generator(tf.keras.utils.Sequence) :
  
    def __init__(self, obs_ids, labels, batch_size) :
        self.obs_ids = obs_ids
        self.labels = labels
        self.batch_size = batch_size
        #self.gps = gps
        #self.extractor = extractor
        #print("INIT")
        # to make the generator thread safe 
        self.lock = threading.Lock()

    def __len__(self) :
        return (np.ceil(len(self.obs_ids) / float(self.batch_size))).astype(int)
  
    # returns one batch
    def __getitem__(self, idx) :
        X_batch = list()
        y_batch = list()
        X_env_batch = list()

        #print("ONE BATCH")
        for i in range(idx * self.batch_size, (idx+1) * self.batch_size):
            if i >= len(self.obs_ids): break
            
            rgb, near_ir, landcover, altitude = load_patch(self.obs_ids[i], DATA_PATH, data='all')
            ni = near_ir.reshape(256, 256, 1)
            lc = landcover.reshape(256, 256, 1)
            alt = altitude.reshape(256, 256, 1)

            patch = np.concatenate((rgb, ni, lc, alt), axis=2)

            #cs = MinMaxScaler()
            #print("PATCH GENERATOR")
            #print((df_env.loc[self.obs_ids[i]].values).shape)
            #print(cs.fit_transform(df_env.loc[self.obs_ids[i]].values).shape)
            #k = input()
            X_env_batch.append(df_env.loc[self.obs_ids[i]].values)
            #X_env_batch.append(df_env[self.obs_ids[i], :])
            #X_env_batch.append(cs.fit_transform(df_env.loc[self.obs_ids[i]].values.reshape(-1,1)))
            X_batch.append(patch)
            y_batch.append(self.labels[i])

        with self.lock:
            
            #return {'input_1': np.asarray(X_batch), 'input_2': np.asarray(X_env_batch)}, np.asarray(np.array(y_batch))
            #return np.asarray(X_batch), np.array(y_batch)
            return (np.asarray(X_batch), np.asarray(X_env_batch)), np.array(y_batch)

# First Simple Neural Network
Let's create a first neural network as a baseline to see how it performs.

In [20]:
# # for distributed training (that is, using multiple GPUs for data parallelization)
# # https://www.tensorflow.org/guide/distributed_training#use_tfdistributestrategy_with_keras_modelfit
# mirrored_strategy = tf.distribute.MirroredStrategy()

In [21]:
# returns a simple convolutional neural net
def complex_model(input_shape, learning_rate=0.1, output_neurons=46):
    
    # for distributed training
    #with mirrored_strategy.scope():
    
    model = tf.keras.models.Sequential()
    dropout_prob = 0.1
    
    random.seed(42)

    he = tf.keras.initializers.HeNormal(seed=42)
    
    # 1. Preprocessing
    model.add(tf.keras.layers.Rescaling(1./255))
    #model.add(tf.keras.layers.RandomFlip("horizontal"))
    #model.add(tf.keras.layers.RandomRotation(factor=0.02))
    model.add(tf.keras.layers.RandomContrast(factor=0.1))
    model.add(tf.keras.layers.RandomCrop(input_shape[0], input_shape[1]))
    #model.add(tf.keras.layers.RandomZoom(-0.1, 0.1))

    # 2. Convolutional Layers
    # 64 units
    model.add(Conv2D(64, kernel_size=3, activation='relu', padding='valid', input_shape=input_shape,
                     kernel_initializer=he))
    model.add(Dropout(dropout_prob))

    model.add(Conv2D(64, kernel_size=3, activation='relu', padding='valid',
                     kernel_initializer=he))
    model.add(MaxPooling2D())
    model.add(Dropout(dropout_prob))
    
    # 128 units
    model.add(Conv2D(128, kernel_size=3, activation='relu', padding='valid',
                     kernel_initializer=he))
    model.add(Dropout(dropout_prob))
    
    model.add(Conv2D(128, kernel_size=3, activation='relu', padding='valid',
                     kernel_initializer=he))
    model.add(Dropout(dropout_prob))
    
    model.add(Conv2D(128, kernel_size=3, activation='relu', padding='valid',
                     kernel_initializer=he))
    model.add(MaxPooling2D())
    model.add(Dropout(dropout_prob))
    
    # 256 units
    model.add(Conv2D(256, kernel_size=3, activation='relu', padding='valid',
                     kernel_initializer=he))
    model.add(MaxPooling2D())
    model.add(Dropout(dropout_prob))
    
    model.add(Conv2D(256, kernel_size=3, activation='relu', padding='valid',
                     kernel_initializer=he))
    model.add(MaxPooling2D())
    model.add(Dropout(dropout_prob))

    model.add(Conv2D(256, kernel_size=3, activation='relu', padding='valid',
                     kernel_initializer=he))
    model.add(MaxPooling2D())
    model.add(Dropout(dropout_prob))

    
    # from convolutional layers to dense layers
    model.add(tf.keras.layers.Flatten())

    
    # 3. Dense Layers
    model.add(Dense(64, activation='relu', kernel_initializer=he))
    model.add(Dropout(dropout_prob))
    
    model.add(Dense(128, activation='relu', kernel_initializer=he))
    model.add(Dropout(dropout_prob))
    
    model.add(Dense(256, activation='relu', kernel_initializer=he))
    model.add(Dropout(dropout_prob))

    # 4. Output Layer
    model.add(Dense(output_neurons, activation='softmax'))
    
    # compire the model
    model.compile(loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False),
                  optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate),
                  metrics=[tf.keras.metrics.SparseCategoricalAccuracy(name="accuracy"),
                           tf.keras.metrics.SparseTopKCategoricalAccuracy(5, name="top-5-accuracy")])
    
    return model

In [24]:
def multi_modal_complex_model(input_shape1=(256, 256, 6), input_shape2=(29), learning_rate=0.001, output_neurons=30):
    #with mirrored_strategy.scope():

        # Inputs
        patch_input = tf.keras.layers.Input(shape=input_shape1, dtype='float32')
        tabular_input = tf.keras.layers.Input(shape=input_shape2, dtype='float32')  
        
        # hyperparameters
        dropout_prob = 0.0
        he = tf.keras.initializers.HeNormal(seed=42)

        # Augment data
        # augmented = data_augmentation_for_visualization(patch_input)
        x = tf.keras.layers.Rescaling(1./255)(patch_input)
        x = tf.keras.layers.RandomContrast(factor=0.1)(x)
        x = tf.keras.layers.RandomCrop(input_shape1[0], input_shape1[1])(x)

        # From Scratch model
        # 64 units
        x = tf.keras.layers.Conv2D(64, kernel_size=3, activation='relu', padding='valid', kernel_initializer=he)(x)
        x = Dropout(dropout_prob)(x)
        
        x = tf.keras.layers.Conv2D(64, kernel_size=3, activation='relu', padding='valid', kernel_initializer=he)(x)
        x = MaxPooling2D()(x)
        x = Dropout(dropout_prob)(x)
        
        # 128 units
        x = tf.keras.layers.Conv2D(128, kernel_size=3, activation='relu', padding='valid', kernel_initializer=he)(x)
        x = Dropout(dropout_prob)(x)
        
        x = tf.keras.layers.Conv2D(128, kernel_size=3, activation='relu', padding='valid', kernel_initializer=he)(x)
        x = Dropout(dropout_prob)(x)
        
        x = tf.keras.layers.Conv2D(128, kernel_size=3, activation='relu', padding='valid', kernel_initializer=he)(x)
        x = MaxPooling2D()(x)
        x = Dropout(dropout_prob)(x)
        
        # 256 units
        x = tf.keras.layers.Conv2D(256, kernel_size=3, activation='relu', padding='valid', kernel_initializer=he)(x)
        x = MaxPooling2D()(x)
        x = Dropout(dropout_prob)(x)
        
        x = tf.keras.layers.Conv2D(256, kernel_size=3, activation='relu', padding='valid', kernel_initializer=he)(x)
        x = MaxPooling2D()(x)
        x = Dropout(dropout_prob)(x)
        
        x = tf.keras.layers.Conv2D(256, kernel_size=3, activation='relu', padding='valid', kernel_initializer=he)(x)
        x = MaxPooling2D()(x)
        x = Dropout(dropout_prob)(x)

        # Add Dense layers for images
        x = tf.keras.layers.Flatten()(x)
        x = tf.keras.layers.Dense(64, activation='relu', kernel_initializer=he)(x)
        x = Dropout(dropout_prob)(x)
        
        x = tf.keras.layers.Dense(128, activation='relu', kernel_initializer=he)(x)
        x = Dropout(dropout_prob)(x)
    
        x = tf.keras.layers.Dense(256, activation='relu', kernel_initializer=he)(x)
        x = Dropout(dropout_prob)(x)

        # Add Dense layers for Tabular data
        y = tf.keras.layers.Dense(32, activation='relu')(tabular_input)
        y = tf.keras.layers.Dense(128, activation='relu')(y)
        y = tf.keras.layers.Dense(256, activation='relu')(y)
        y = tf.keras.layers.Dense(512, activation='relu')(y)

        # Concatenate Image and tabular weights
        z = tf.keras.layers.Concatenate(axis=1)([x, y])

        # Add Classification Head
        z = tf.keras.layers.Dense(128, activation='relu')(z)
        classifier = tf.keras.layers.Dense(output_neurons, name='outputs', activation='softmax')(z)

        # Define inputs and outputs
        model = tf.keras.Model(inputs=[patch_input, tabular_input], outputs=classifier)

        # Optimizer
        optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
        
        # Compile model
        model.compile(optimizer=optimizer,
                      loss=tf.keras.losses.sparse_categorical_crossentropy,
                      metrics=[
                          tf.keras.metrics.SparseCategoricalAccuracy(name="accuracy"),
                          tf.keras.metrics.SparseTopKCategoricalAccuracy(5, name="top-5-accuracy")
                      ]
                      )
        
        return model

## Training Prep

### Settings used when parallelizing the I/O Pipeline in model.fit().

I used the setting values in the cell below for a high-cpu vm on GCP with the following specs:
- machine type: n1-highcpu-96 (96 CPU cores)
- vCPUs to core ratio: 2 vCPUs per core (making a theoretical max value for num_threads of 96 * 2 = 192)
- 4 x NVIDIA Tesla T4 GPUs

In [25]:
# Remember to tune the learning rate accordingly.
BATCHSIZE = 64

# The maximun value for num_threads is dependent on amount of CPU cores:
# amount of CPU cores * vCPUs to core ratio = theoretical max of NUM_THREADS
NUM_THREADS = 11

# The more batches we prefetch, the less idle the GPUs will be. 
# To check GPU usage:
# 1. Run nvidia-smi -l 1 from the terminal to monitor the GPU usage during training. 
# 2. Try to get close to 100% for all GPUs by adjusting the value below (and the two above). Due to the overhead
#    from tf.distribute.MirroredStrategy(), you won't be able to consistently get 100% for all GPUs. But try to 
#    get close.
# 3. Be aware that RAM limits the amount of batches you can prefetch.
PRE_FETCH_NUM_BATCHES = int(NUM_THREADS * 70) 

### Generators
Create generators that will read training / validation data from disk during training.

In [26]:
#train_generator = Patches_Generator(obs_id_train, y_train, BATCHSIZE)

In [27]:
#val_generator = Patches_Generator(obs_id_val, y_val, BATCHSIZE)

In [28]:
train_generator = Environmental_Patches_Generator(obs_id_train, y_train, BATCHSIZE)
val_generator = Environmental_Patches_Generator(obs_id_val, y_val, BATCHSIZE)

# # converting our train dataset to tf.data.Dataset
# tf_train_dataset = tf.data.Dataset.from_generator(
#     lambda: train_generator ,  # Our generator 
#     output_types = ({'input_1': tf.float32 , 'input_2': tf.float32}, tf.float32) , # How we're expecting our output dtype
# #    output_shapes = ({'input_1': [BATCH_SIZE, 256 , 256, 6], 'input_2': [BATCH_SIZE, 29]} , [BATCH_SIZE, ]) # How we're expecting our output shape
# )

# tf_val_dataset = tf.data.Dataset.from_generator(
#     lambda: val_generator , 
#     output_types = ({'input_1': tf.float32 , 'input_2': tf.float32}, tf.float32),
# #    output_shapes = ({'input_1': [BATCH_SIZE, 256 , 256, 6], 'input_2': [BATCH_SIZE, 29]} , [BATCH_SIZE, ]) 
# )

### Training

Use the cyclical learning rate policy (with exponential decay).

In [None]:
STEPS_PER_EPOCH = len(y_train)//BATCHSIZE

In [None]:
# define cyclical learning rate policy
lr_min = 1e-6
lr_max = 1e-3

clr = tfa.optimizers.CyclicalLearningRate(
        initial_learning_rate=lr_min,
        maximal_learning_rate=lr_max,
        scale_fn=lambda x: 1/(2.**(x-1)), # exponential decay
        step_size=2 * STEPS_PER_EPOCH
)

Create the model.

In [29]:
# create the network
#model = simple_model((256, 256, 3), learning_rate=clr, output_neurons=len(np.unique(y_train)))
#model = simple_model_with_gps([(256, 256, 3), np.shape(gps)], learning_rate=clr, output_neurons=len(np.unique(y_train)))

#model = simple_model((256, 256, 3), output_neurons = no_output_neurons, learning_rate=0.00001)

# create the model
input_shape1 = (256, 256, 6)
input_shape2 = (27) # maybe (29) ?

model = multi_modal_complex_model(input_shape1, input_shape2, learning_rate=clr, output_neurons=30)


Train the network.

In [30]:
early_stop = tf.keras.callbacks.EarlyStopping(monitor='val_loss', min_delta=0.001, patience=10, 
                                              verbose=0, mode='auto', baseline=None, restore_best_weights=True)

In [31]:
history = model.fit(train_generator, epochs=100, callbacks=[early_stop], #steps_per_epoch=STEPS_PER_EPOCH, 
                    validation_data=val_generator,
                    # for parallelization of reading from disk (I/O) pipeline
                    max_queue_size=PRE_FETCH_NUM_BATCHES, workers=NUM_THREADS, use_multiprocessing=True)

Epoch 1/100


InternalError: Graph execution error:

Detected at node 'model/dense_3/MatMul' defined at (most recent call last):
    File "/usr/lib/python3.8/runpy.py", line 194, in _run_module_as_main
      return _run_code(code, main_globals, None,
    File "/usr/lib/python3.8/runpy.py", line 87, in _run_code
      exec(code, run_globals)
    File "/usr/local/lib/python3.8/dist-packages/ipykernel_launcher.py", line 16, in <module>
      app.launch_new_instance()
    File "/usr/local/lib/python3.8/dist-packages/traitlets/config/application.py", line 846, in launch_instance
      app.start()
    File "/usr/local/lib/python3.8/dist-packages/ipykernel/kernelapp.py", line 505, in start
      self.io_loop.start()
    File "/usr/local/lib/python3.8/dist-packages/tornado/platform/asyncio.py", line 199, in start
      self.asyncio_loop.run_forever()
    File "/usr/lib/python3.8/asyncio/base_events.py", line 570, in run_forever
      self._run_once()
    File "/usr/lib/python3.8/asyncio/base_events.py", line 1859, in _run_once
      handle._run()
    File "/usr/lib/python3.8/asyncio/events.py", line 81, in _run
      self._context.run(self._callback, *self._args)
    File "/usr/local/lib/python3.8/dist-packages/tornado/ioloop.py", line 688, in <lambda>
      lambda f: self._run_callback(functools.partial(callback, future))
    File "/usr/local/lib/python3.8/dist-packages/tornado/ioloop.py", line 741, in _run_callback
      ret = callback()
    File "/usr/local/lib/python3.8/dist-packages/tornado/gen.py", line 814, in inner
      self.ctx_run(self.run)
    File "/usr/local/lib/python3.8/dist-packages/tornado/gen.py", line 775, in run
      yielded = self.gen.send(value)
    File "/usr/local/lib/python3.8/dist-packages/ipykernel/kernelbase.py", line 365, in process_one
      yield gen.maybe_future(dispatch(*args))
    File "/usr/local/lib/python3.8/dist-packages/tornado/gen.py", line 234, in wrapper
      yielded = ctx_run(next, result)
    File "/usr/local/lib/python3.8/dist-packages/ipykernel/kernelbase.py", line 272, in dispatch_shell
      yield gen.maybe_future(handler(stream, idents, msg))
    File "/usr/local/lib/python3.8/dist-packages/tornado/gen.py", line 234, in wrapper
      yielded = ctx_run(next, result)
    File "/usr/local/lib/python3.8/dist-packages/ipykernel/kernelbase.py", line 540, in execute_request
      self.do_execute(
    File "/usr/local/lib/python3.8/dist-packages/tornado/gen.py", line 234, in wrapper
      yielded = ctx_run(next, result)
    File "/usr/local/lib/python3.8/dist-packages/ipykernel/ipkernel.py", line 294, in do_execute
      res = shell.run_cell(code, store_history=store_history, silent=silent)
    File "/usr/local/lib/python3.8/dist-packages/ipykernel/zmqshell.py", line 536, in run_cell
      return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
    File "/usr/local/lib/python3.8/dist-packages/IPython/core/interactiveshell.py", line 2768, in run_cell
      result = self._run_cell(
    File "/usr/local/lib/python3.8/dist-packages/IPython/core/interactiveshell.py", line 2814, in _run_cell
      return runner(coro)
    File "/usr/local/lib/python3.8/dist-packages/IPython/core/async_helpers.py", line 129, in _pseudo_sync_runner
      coro.send(None)
    File "/usr/local/lib/python3.8/dist-packages/IPython/core/interactiveshell.py", line 3012, in run_cell_async
      has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
    File "/usr/local/lib/python3.8/dist-packages/IPython/core/interactiveshell.py", line 3191, in run_ast_nodes
      if await self.run_code(code, result, async_=asy):
    File "/usr/local/lib/python3.8/dist-packages/IPython/core/interactiveshell.py", line 3251, in run_code
      exec(code_obj, self.user_global_ns, self.user_ns)
    File "<ipython-input-31-fa8289c9af32>", line 1, in <module>
      history = model.fit(train_generator, epochs=100, callbacks=[early_stop], #steps_per_epoch=STEPS_PER_EPOCH,
    File "/usr/local/lib/python3.8/dist-packages/keras/utils/traceback_utils.py", line 64, in error_handler
      return fn(*args, **kwargs)
    File "/usr/local/lib/python3.8/dist-packages/keras/engine/training.py", line 1384, in fit
      tmp_logs = self.train_function(iterator)
    File "/usr/local/lib/python3.8/dist-packages/keras/engine/training.py", line 1021, in train_function
      return step_function(self, iterator)
    File "/usr/local/lib/python3.8/dist-packages/keras/engine/training.py", line 1010, in step_function
      outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "/usr/local/lib/python3.8/dist-packages/keras/engine/training.py", line 1000, in run_step
      outputs = model.train_step(data)
    File "/usr/local/lib/python3.8/dist-packages/keras/engine/training.py", line 859, in train_step
      y_pred = self(x, training=True)
    File "/usr/local/lib/python3.8/dist-packages/keras/utils/traceback_utils.py", line 64, in error_handler
      return fn(*args, **kwargs)
    File "/usr/local/lib/python3.8/dist-packages/keras/engine/base_layer.py", line 1096, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "/usr/local/lib/python3.8/dist-packages/keras/utils/traceback_utils.py", line 92, in error_handler
      return fn(*args, **kwargs)
    File "/usr/local/lib/python3.8/dist-packages/keras/engine/functional.py", line 451, in call
      return self._run_internal_graph(
    File "/usr/local/lib/python3.8/dist-packages/keras/engine/functional.py", line 589, in _run_internal_graph
      outputs = node.layer(*args, **kwargs)
    File "/usr/local/lib/python3.8/dist-packages/keras/utils/traceback_utils.py", line 64, in error_handler
      return fn(*args, **kwargs)
    File "/usr/local/lib/python3.8/dist-packages/keras/engine/base_layer.py", line 1096, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "/usr/local/lib/python3.8/dist-packages/keras/utils/traceback_utils.py", line 92, in error_handler
      return fn(*args, **kwargs)
    File "/usr/local/lib/python3.8/dist-packages/keras/layers/core/dense.py", line 219, in call
      outputs = tf.matmul(a=inputs, b=self.kernel)
Node: 'model/dense_3/MatMul'
Detected at node 'model/dense_3/MatMul' defined at (most recent call last):
    File "/usr/lib/python3.8/runpy.py", line 194, in _run_module_as_main
      return _run_code(code, main_globals, None,
    File "/usr/lib/python3.8/runpy.py", line 87, in _run_code
      exec(code, run_globals)
    File "/usr/local/lib/python3.8/dist-packages/ipykernel_launcher.py", line 16, in <module>
      app.launch_new_instance()
    File "/usr/local/lib/python3.8/dist-packages/traitlets/config/application.py", line 846, in launch_instance
      app.start()
    File "/usr/local/lib/python3.8/dist-packages/ipykernel/kernelapp.py", line 505, in start
      self.io_loop.start()
    File "/usr/local/lib/python3.8/dist-packages/tornado/platform/asyncio.py", line 199, in start
      self.asyncio_loop.run_forever()
    File "/usr/lib/python3.8/asyncio/base_events.py", line 570, in run_forever
      self._run_once()
    File "/usr/lib/python3.8/asyncio/base_events.py", line 1859, in _run_once
      handle._run()
    File "/usr/lib/python3.8/asyncio/events.py", line 81, in _run
      self._context.run(self._callback, *self._args)
    File "/usr/local/lib/python3.8/dist-packages/tornado/ioloop.py", line 688, in <lambda>
      lambda f: self._run_callback(functools.partial(callback, future))
    File "/usr/local/lib/python3.8/dist-packages/tornado/ioloop.py", line 741, in _run_callback
      ret = callback()
    File "/usr/local/lib/python3.8/dist-packages/tornado/gen.py", line 814, in inner
      self.ctx_run(self.run)
    File "/usr/local/lib/python3.8/dist-packages/tornado/gen.py", line 775, in run
      yielded = self.gen.send(value)
    File "/usr/local/lib/python3.8/dist-packages/ipykernel/kernelbase.py", line 365, in process_one
      yield gen.maybe_future(dispatch(*args))
    File "/usr/local/lib/python3.8/dist-packages/tornado/gen.py", line 234, in wrapper
      yielded = ctx_run(next, result)
    File "/usr/local/lib/python3.8/dist-packages/ipykernel/kernelbase.py", line 272, in dispatch_shell
      yield gen.maybe_future(handler(stream, idents, msg))
    File "/usr/local/lib/python3.8/dist-packages/tornado/gen.py", line 234, in wrapper
      yielded = ctx_run(next, result)
    File "/usr/local/lib/python3.8/dist-packages/ipykernel/kernelbase.py", line 540, in execute_request
      self.do_execute(
    File "/usr/local/lib/python3.8/dist-packages/tornado/gen.py", line 234, in wrapper
      yielded = ctx_run(next, result)
    File "/usr/local/lib/python3.8/dist-packages/ipykernel/ipkernel.py", line 294, in do_execute
      res = shell.run_cell(code, store_history=store_history, silent=silent)
    File "/usr/local/lib/python3.8/dist-packages/ipykernel/zmqshell.py", line 536, in run_cell
      return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
    File "/usr/local/lib/python3.8/dist-packages/IPython/core/interactiveshell.py", line 2768, in run_cell
      result = self._run_cell(
    File "/usr/local/lib/python3.8/dist-packages/IPython/core/interactiveshell.py", line 2814, in _run_cell
      return runner(coro)
    File "/usr/local/lib/python3.8/dist-packages/IPython/core/async_helpers.py", line 129, in _pseudo_sync_runner
      coro.send(None)
    File "/usr/local/lib/python3.8/dist-packages/IPython/core/interactiveshell.py", line 3012, in run_cell_async
      has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
    File "/usr/local/lib/python3.8/dist-packages/IPython/core/interactiveshell.py", line 3191, in run_ast_nodes
      if await self.run_code(code, result, async_=asy):
    File "/usr/local/lib/python3.8/dist-packages/IPython/core/interactiveshell.py", line 3251, in run_code
      exec(code_obj, self.user_global_ns, self.user_ns)
    File "<ipython-input-31-fa8289c9af32>", line 1, in <module>
      history = model.fit(train_generator, epochs=100, callbacks=[early_stop], #steps_per_epoch=STEPS_PER_EPOCH,
    File "/usr/local/lib/python3.8/dist-packages/keras/utils/traceback_utils.py", line 64, in error_handler
      return fn(*args, **kwargs)
    File "/usr/local/lib/python3.8/dist-packages/keras/engine/training.py", line 1384, in fit
      tmp_logs = self.train_function(iterator)
    File "/usr/local/lib/python3.8/dist-packages/keras/engine/training.py", line 1021, in train_function
      return step_function(self, iterator)
    File "/usr/local/lib/python3.8/dist-packages/keras/engine/training.py", line 1010, in step_function
      outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "/usr/local/lib/python3.8/dist-packages/keras/engine/training.py", line 1000, in run_step
      outputs = model.train_step(data)
    File "/usr/local/lib/python3.8/dist-packages/keras/engine/training.py", line 859, in train_step
      y_pred = self(x, training=True)
    File "/usr/local/lib/python3.8/dist-packages/keras/utils/traceback_utils.py", line 64, in error_handler
      return fn(*args, **kwargs)
    File "/usr/local/lib/python3.8/dist-packages/keras/engine/base_layer.py", line 1096, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "/usr/local/lib/python3.8/dist-packages/keras/utils/traceback_utils.py", line 92, in error_handler
      return fn(*args, **kwargs)
    File "/usr/local/lib/python3.8/dist-packages/keras/engine/functional.py", line 451, in call
      return self._run_internal_graph(
    File "/usr/local/lib/python3.8/dist-packages/keras/engine/functional.py", line 589, in _run_internal_graph
      outputs = node.layer(*args, **kwargs)
    File "/usr/local/lib/python3.8/dist-packages/keras/utils/traceback_utils.py", line 64, in error_handler
      return fn(*args, **kwargs)
    File "/usr/local/lib/python3.8/dist-packages/keras/engine/base_layer.py", line 1096, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "/usr/local/lib/python3.8/dist-packages/keras/utils/traceback_utils.py", line 92, in error_handler
      return fn(*args, **kwargs)
    File "/usr/local/lib/python3.8/dist-packages/keras/layers/core/dense.py", line 219, in call
      outputs = tf.matmul(a=inputs, b=self.kernel)
Node: 'model/dense_3/MatMul'
2 root error(s) found.
  (0) INTERNAL:  Attempting to perform BLAS operation using StreamExecutor without BLAS support
	 [[{{node model/dense_3/MatMul}}]]
	 [[model/random_crop/cond/pivot_t/_4/_25]]
  (1) INTERNAL:  Attempting to perform BLAS operation using StreamExecutor without BLAS support
	 [[{{node model/dense_3/MatMul}}]]
0 successful operations.
0 derived errors ignored. [Op:__inference_train_function_2710]

In [None]:
model.save('multi_complex_cnn_final_data')