In [None]:
import time
import os
import gc

import pandas as pd
import numpy as np
from sklearn import linear_model
from sklearn.utils import shuffle, resample
from sklearn.metrics import mean_squared_error, r2_score
np.set_printoptions(precision=6, suppress=True)

import PIL
from PIL import Image

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential
from tensorflow.keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img
tf.__version__

In [None]:
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        logical_gpus = tf.config.experimental.list_logical_devices('GPU')
        print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
    except RuntimeError as e:
        print(e)

In [None]:
strategy = tf.distribute.MirroredStrategy()

# Hyperparameters

In [None]:
BEST_PATH = './models/pg_convnet.h5'
TRAINING_EPOCHS = 200
LEARNING_RATE = 0.0015
EPSILON = 1e-05
BATCH_SIZE = 256

# Data preparation

In [None]:
DIRECTORY = './images/2020_S/'
dir_list = os.listdir(DIRECTORY)
dataset_list = [direc for direc in dir_list if direc.startswith('LAI_OVER') or direc.startswith('LAI2_OVER')]
dataset_list.sort()

In [None]:
output_df = pd.read_csv('./results/2020_S/pg_reg_result.csv', index_col='Unnamed: 0')

In [None]:
data_indices = []
input_images = []
output_labels = []
for DATE in output_df.index:
    for DIRECTORY in dataset_list:
        if DATE in DIRECTORY:
            file_list = os.listdir(f'./images/2020_S/{DIRECTORY}')
            file_list = [file for file in file_list if file.endswith('.jpg')]
            for FILE in file_list:
                image = Image.open(f'./images/2020_S/{DIRECTORY}/{FILE}')
                data_indices.append(pd.Timestamp(DATE))
                input_images.append(img_to_array(image))
                output_labels.append(output_df.loc[DATE].values)

In [None]:
input_images = np.stack(input_images, axis=0)
output_labels = np.stack(output_labels, axis=0)
data_indices = np.array(data_indices)

In [None]:
print(len(data_indices))
print(input_images.shape)
print(output_labels.shape)

In [None]:
data_indices, input_images, output_labels = resample(data_indices, input_images, output_labels, n_samples=1000, replace=False, random_state=4574)

In [None]:
print(len(data_indices))
print(input_images.shape)
print(output_labels.shape)

In [None]:
OUTPUT_MAXS = output_labels.max(axis=0)
OUTPUT_MINS = output_labels.min(axis=0)

In [None]:
output_labels = (output_labels - OUTPUT_MINS)/(OUTPUT_MAXS - OUTPUT_MINS)

In [None]:
gc.collect()

In [None]:
data_indices, input_images, output_labels = shuffle(data_indices, input_images, output_labels, random_state=4574)

In [None]:
num_train = int(input_images.shape[0]*.7)

In [None]:
train_index = data_indices[:num_train]
train_input = input_images[:num_train, ...]
train_label = output_labels[:num_train, ...]
test_index = data_indices[num_train:]
test_input = input_images[num_train:, ...]
test_label = output_labels[num_train:, ...]

In [None]:
datagen = ImageDataGenerator(
    rescale=1/255,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest')

In [None]:
train_iterator = datagen.flow(train_input, train_label, batch_size=BATCH_SIZE)
test_iterator = datagen.flow(test_input, test_label, batch_size=BATCH_SIZE)

In [None]:
gc.collect()

# Model construction

In [None]:
with strategy.scope():
    model = Sequential([
        layers.Conv2D(32, 2, padding='same', activation='relu', input_shape=(512, 512, 3)),
        layers.MaxPooling2D(),
        layers.Conv2D(64, 2, padding='same', activation='relu'),
        layers.MaxPooling2D(),
        layers.Conv2D(128, 2, padding='same', activation='relu'),
        layers.MaxPooling2D(),
        layers.Conv2D(128, 3, padding='same', activation='relu'),
        layers.MaxPooling2D(),
        layers.Conv2D(256, 3, padding='same', activation='relu'),
        layers.MaxPooling2D(),
        layers.Conv2D(512, 3, padding='same', activation='relu'),
        layers.MaxPooling2D(),
        layers.Conv2D(1024, 3, padding='same', activation='relu'),
        layers.MaxPooling2D(),
        layers.Flatten(),
        layers.Dense(128, activation='relu'),
        layers.Dense(4, activation='sigmoid')
        ])

In [None]:
model.summary()

In [None]:
callbacks = tf.keras.callbacks.ReduceLROnPlateau(
    monitor='val_loss', factor=.5, patience=2, verbose=0, mode='min',
    min_delta=0.0001, cooldown=0, min_lr=0)

save = tf.keras.callbacks.ModelCheckpoint(
    BEST_PATH, monitor='val_loss', verbose=0,
    save_best_only=True, save_weights_only=True, mode='min', save_freq='epoch')

early_stop = tf.keras.callbacks.EarlyStopping(monitor='val_loss', min_delta=0.0001, patience=5) 

In [None]:
with strategy.scope():
    opt = tf.keras.optimizers.Adam(learning_rate=LEARNING_RATE, epsilon=EPSILON)
    model.compile(optimizer=opt, loss='mae')
    model.fit(train_iterator, epochs=TRAINING_EPOCHS, validation_data=test_iterator,
                  verbose=1, callbacks=[callbacks, save, early_stop]) 

In [None]:
model.load_weights(BEST_PATH)

In [None]:
pred_result = trained_model.predict(test_input/255)

In [None]:
print(pred_result.shape)
print(test_label.shape)