In [1]:
import tensorflow as tf
import pandas as pd
import numpy as np
import random
import cv2
import os

import resnet

First we need to set up some basic variables such as our batch size, country and data directories/paths 

In [3]:
COUNTRY = "phl"
CSV_PATH = "../../CCI/hmbaier/cci_example.csv"
IMAGERY_DIR = "../../CCI/hmbaier/"
BATCH_SIZE = 64

['../../CCI/hmbaier/107024_phl_2018-01-01_2018-03-31.png',
 '../../CCI/hmbaier/107025_phl_2018-01-01_2018-03-31.png',
 '../../CCI/hmbaier/107026_phl_2018-01-01_2018-03-31.png',
 '../../CCI/hmbaier/107027_phl_2013-09-01_2014-06-30.png',
 '../../CCI/hmbaier/107030_phl_2013-09-01_2014-06-30.png']

Next, we create a variables called files that contains the name of every image in our base folder, then we subset it to just the iamges for our selected country

In [None]:
files = os.listdir(IMAGERY_DIR)
files = [IMAGERY_DIR + i for i in files if COUNTRY in i]

files[0:5]

We will use a type of Python object called a Generator for our datalaoder. You can read more about them here: https://realpython.com/introduction-to-python-generators/

In [5]:
def train_test_split(files, split):

    """ Split data into training and validation sets """

    train_num = int(len(files) * split)

    train = random.sample(files, train_num)
    val = [i for i in files if i not in train]

    return train, val


def get_train():

    """ Training data generator """

    for file in train_files:
        
        # Grab the school_id from the image name
        school_id = file.split("/")[4].split("_")[0]
        
        # Grab the test score for the current school from our scores dataframe
        score = df[df["school_id"] == school_id]["y"].squeeze()
        
        # Read in our image and normalize it by divding it by the maximum value (this normalization is super important!)
        im = cv2.imread(file)
        im = im / np.max(im)

        # Create a tuple with (image array, test score) and return it
        ret = ( np.array(im), np.reshape(np.array(score), (-1, 1)) )
        
        yield ret


def get_val():

    """ Validation data generator """

    for file in val_files:
        school_id = file.split("/")[4].split("_")[0]
        score = df[df["school_id"] == school_id]["y"].squeeze()
        im = cv2.imread(file)
        im = im / np.max(im)
        ret = ( np.array(im), np.reshape(np.array(score), (-1, 1)) )
        yield ret

Read in our test scores dataframe

In [6]:
df = pd.read_csv(CSV_PATH)
df.head()

Unnamed: 0,country,school_id,test_score,scaled_score,y
0,phl,107022,105.3,35.583573,0
1,phl,107023,137.05,58.458213,1
2,phl,107024,142.39,62.305476,1
3,phl,107025,166.03,79.337176,1
4,phl,107026,152.81,69.81268,1


Create a TensorFlow dataloader using the ```tf.data.Dataset.from_generator``` class

In [8]:
print(f"Number of image files for {COUNTRY}: {str(len(files))}")

train_files, val_files = train_test_split(files, .75)

train_dataset = tf.data.Dataset.from_generator(generator = get_train, output_types = (tf.float32, tf.float32)).batch(BATCH_SIZE)
val_dataset = tf.data.Dataset.from_generator(generator = get_val, output_types = (tf.float32, tf.float32)).batch(BATCH_SIZE)

print("Number of training files: ", len(train_files))
print("Number of validation files: ", len(val_files))

Number of nCDF files:  5502


Create an compile our model using our selected parameters and metrics. In this case, I am using the MeanAbsoluteError Loss and a learning rate of 0.0001 along with the Adam optimizer.

In [10]:
model = resnet.resnet56(img_input = tf.keras.layers.Input((256, 256, 3)), classes = 1)

model.compile(
    optimizer = tf.keras.optimizers.Adam(learning_rate = 0.0001),
    loss = tf.keras.losses.MeanAbsoluteError(),
    metrics = [tf.keras.losses.MeanAbsoluteError()]
)

And finally, train your model!

In [None]:
with tf.device('/device:GPU:5'):
    
    model.fit(train_dataset,
               epochs = 25,
               validation_data = val_dataset),

Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25