In [None]:
import time
import os

import pandas as pd
import numpy as np
np.set_printoptions(precision=6, suppress=True)

from sklearn.utils import shuffle
from sklearn.metrics import mean_squared_error, r2_score

import tensorflow as tf
from tensorflow.keras import *
tf.__version__

In [None]:
from tensorflow.keras.metrics import Metric
class RSquare(Metric):
    """Compute R^2 score.
     This is also called as coefficient of determination.
     It tells how close are data to the fitted regression line.
     - Highest score can be 1.0 and it indicates that the predictors
       perfectly accounts for variation in the target.
     - Score 0.0 indicates that the predictors do not
       account for variation in the target.
     - It can also be negative if the model is worse.
     Usage:
     ```python
     actuals = tf.constant([1, 4, 3], dtype=tf.float32)
     preds = tf.constant([2, 4, 4], dtype=tf.float32)
     result = tf.keras.metrics.RSquare()
     result.update_state(actuals, preds)
     print('R^2 score is: ', r1.result().numpy()) # 0.57142866
    ```
    """

    def __init__(self, name='r_square', dtype=tf.float32):
        super(RSquare, self).__init__(name=name, dtype=dtype)
        self.squared_sum = self.add_weight("squared_sum", initializer="zeros")
        self.sum = self.add_weight("sum", initializer="zeros")
        self.res = self.add_weight("residual", initializer="zeros")
        self.count = self.add_weight("count", initializer="zeros")

    def update_state(self, y_true, y_pred):
        y_true = tf.convert_to_tensor(y_true, tf.float32)
        y_pred = tf.convert_to_tensor(y_pred, tf.float32)
        self.squared_sum.assign_add(tf.reduce_sum(y_true**2))
        self.sum.assign_add(tf.reduce_sum(y_true))
        self.res.assign_add(
            tf.reduce_sum(tf.square(tf.subtract(y_true, y_pred))))
        self.count.assign_add(tf.cast(tf.shape(y_true)[0], tf.float32))

    def result(self):
        mean = self.sum / self.count
        total = self.squared_sum - 2 * self.sum * mean + self.count * mean**2
        return 1 - (self.res / total)

    def reset_states(self):
        # The state of the metric will be reset at the start of each epoch.
        self.squared_sum.assign(0.0)
        self.sum.assign(0.0)
        self.res.assign(0.0)
        self.count.assign(0.0)

In [None]:
target_df = pd.read_csv('./results/SW2_greenhouse.csv', index_col='Unnamed: 0')
target_df = target_df[['temp', 'hum', 'rad', 'loadcell_1', 'loadcell_2']]
target_df.index = pd.DatetimeIndex(target_df.index)
target_df = target_df.interpolate(limit=35)

In [None]:
MAXS = target_df.max().values
MINS = target_df.min().values
SCREEN_SIZE = 50
BEST_PATH = './checkpoints/UNet_loadcell_%d' % SCREEN_SIZE

In [None]:
class ConvBlock(layers.Layer):
    def __init__(self, filters, kernel_size):
        super(ConvBlock, self).__init__()
        self.filters = filters
        self.kernel_size = kernel_size
        
        self.conv1 = layers.Conv2D(self.filters, self.kernel_size, kernel_initializer='he_normal', padding='same')
        self.batch1 = layers.BatchNormalization()
        self.activation1 = layers.Activation(tf.nn.leaky_relu)
        self.conv2 = layers.Conv2D(self.filters, self.kernel_size, kernel_initializer='he_normal', padding='same')
        self.batch2 = layers.BatchNormalization()
        self.activation2 = layers.Activation(tf.nn.leaky_relu)
        
    def call(self, inp, TRAINING):
        
        inp = self.activation1(self.batch1(self.conv1(inp), training=TRAINING))
        inp = self.activation2(self.batch2(self.conv2(inp), training=TRAINING))
        
        return inp

In [None]:
class DeconvBlock(layers.Layer):
    def __init__(self, filters, kernel_size, strides):
        super(DeconvBlock, self).__init__()
        self.filters = filters
        self.kernel_size = kernel_size
        self.strides = strides
        
        self.deconv1 = layers.Conv2DTranspose(self.filters, self.kernel_size, kernel_initializer='he_normal',
                                              strides=self.strides, padding='same')
        self.activation1 = layers.Activation(tf.nn.leaky_relu)
        
    def call(self, inp):
        
        inp = self.activation1(self.deconv1(inp))
        
        return inp

In [None]:
class UNet(Model):
    def __init__(self):
        super(UNet, self).__init__()
        
        self.conv_block1 = ConvBlock(64, (2, 2))
        self.pool1 = layers.MaxPooling2D()
        
        self.conv_block2 = ConvBlock(128, (2, 2))
        self.pool2 = layers.MaxPooling2D()
        
        self.conv_block3 = ConvBlock(256, (2, 2))
        self.pool3 = layers.MaxPooling2D()
        
        self.conv_block4 = ConvBlock(512, (2, 2))
        self.pool4 = layers.MaxPooling2D()
        
        self.conv_bottom = ConvBlock(1024, (2, 2))
        
        self.deconv_block1 = DeconvBlock(512, (2, 2), (2, 2))
        self.conv_block_r1 = ConvBlock(512, (2, 2))
        
        self.deconv_block2 = DeconvBlock(256, (2, 2), (2, 2))
        self.conv_block_r2 = ConvBlock(256, (2, 2))
        
        self.deconv_block3 = DeconvBlock(128, (2, 2), (2, 2))
        self.conv_block_r3 = ConvBlock(128, (2, 2))
        
        self.deconv_block4 = DeconvBlock(64, (2, 2), (2, 2))
        self.conv_block_r4 = ConvBlock(64, (2, 2))
        
        self.padding = layers.ZeroPadding2D(((1, 0), (0, 1)))
        self.output_conv = layers.Conv2D(1, (1, 1), activation='sigmoid')
        
    def call(self, inp, TRAINING=False):
        
        conv1 = self.conv_block1(inp, TRAINING)
        pooled1 = self.pool1(conv1)
        conv2 = self.conv_block2(pooled1, TRAINING)
        pooled2 = self.pool2(conv2)
        conv3 = self.conv_block3(pooled2, TRAINING)
        pooled3 = self.pool3(conv3)
        conv4 = self.conv_block4(pooled3, TRAINING)
        pooled4 = self.pool4(conv4)
        
        bottom = self.conv_bottom(pooled4, TRAINING)
        
        deconv1 = self.deconv_block1(bottom)
        deconv1 = layers.concatenate([deconv1, conv4])
        deconv1 = self.conv_block_r1(deconv1, TRAINING)
        deconv2 = self.deconv_block2(deconv1)
        deconv2 = layers.concatenate([deconv2, conv3])
        deconv2 = self.conv_block_r2(deconv2, TRAINING)
        deconv3 = self.padding(self.deconv_block3(deconv2))
        deconv3 = layers.concatenate([deconv3, conv2])
        deconv3 = self.conv_block_r3(deconv3, TRAINING)
        deconv4 = self.deconv_block4(deconv3)
        deconv4 = layers.concatenate([deconv4, conv1])
        deconv4 = self.conv_block_r4(deconv4, TRAINING)
        
        return self.output_conv(deconv4)

In [None]:
unet_model = UNet()

checkpoint_path = BEST_PATH
ckpt = tf.train.Checkpoint(unet_model=unet_model)
ckpt_manager = tf.train.CheckpointManager(ckpt, checkpoint_path, max_to_keep=10)
if ckpt_manager.checkpoints:
    ckpt.restore(ckpt_manager.checkpoints[-1])
    print ('Checkpoint ' + ckpt_manager.checkpoints[-1][-2:] +' restored.')

In [None]:
def index_selector(x, sr): #x: sorted count indices (argmax), sr: Screen size
    new_indices = []
    while len(x) > 0:
        new_indices.append(x[0])
        x = np.setdiff1d(x, np.arange(x[0] - sr, x[0] + sr), assume_unique=True)
    return np.array(new_indices)

In [None]:
datetime = pd.DatetimeIndex(target_df.index)
recover_df = (target_df - MINS)/(MAXS - MINS)
recover_df = pd.DataFrame(recover_df, index=target_df.index, columns=target_df.columns)
recover_df.index = pd.DatetimeIndex(recover_df.index)
filling_indices = np.array([True])
prev_filling_indices = np.array([False])
error = 0
iteration = 0
while True:
    iteration += 1
    if iteration > 1:
        break
    new_mask = recover_df.mask(recover_df >= 0, 1).where(recover_df >= 0, 0).astype('int').values
    if np.array_equal(prev_filling_indices, filling_indices):
        error += 1
        if error > 50:
            print('TOO MANY ERRORS!')
            break

    missing_df = recover_df.copy().fillna(-1).values

    temp_current = []
    temp_prev = []
    temp_next = []
    temp_mask = []
    temp_label = []
    missing_counts_c = []
    missing_counts_r = []
    datetimes = []
    for INDEX in range(SCREEN_SIZE*2, missing_df.shape[0]-SCREEN_SIZE):
        _ = np.count_nonzero(missing_df.mean(axis=1)[(INDEX-SCREEN_SIZE*2):INDEX+SCREEN_SIZE] == -1) # number of missing raw
        __ = np.count_nonzero(missing_df[(INDEX-SCREEN_SIZE):INDEX, :] == -1) # number of missing values in current screen
        missing_counts_r.append(_)
        missing_counts_c.append(__)
    missing_counts_r = np.array(missing_counts_r)
    missing_counts_c = np.array(missing_counts_c)
    drop_index = np.arange(0, missing_counts_r.shape[0])
    if error > 1:
        drop_index = drop_index[(missing_counts_c > 0)]
    else:
        drop_index = drop_index[(missing_counts_r < SCREEN_SIZE*2) & (missing_counts_c > 0)]
    count_sort = index_selector(drop_index, SCREEN_SIZE)

    filling_indices = count_sort + SCREEN_SIZE*2
    if len(filling_indices) == 0:
        print('recovery ended.')
        break
    for INDEX_ in filling_indices:
        temp_prev.append(missing_df[(INDEX_-SCREEN_SIZE*2):(INDEX_-SCREEN_SIZE), :])
        temp_current.append(missing_df[(INDEX_-SCREEN_SIZE):INDEX_, :])
        temp_next.append(missing_df[INDEX_:INDEX_+SCREEN_SIZE, :])
        temp_mask.append(new_mask[(INDEX_-SCREEN_SIZE):INDEX_, :])
        datetimes.append(datetime[(INDEX_-SCREEN_SIZE):INDEX_])

    temp_prev = np.stack(temp_prev)
    temp_current = np.stack(temp_current)
    temp_next = np.stack(temp_next)
    temp_mask = np.stack(temp_mask)
    datetimes = np.array(datetimes)

    test_input = np.stack([temp_current, temp_mask, temp_prev, temp_next], axis = -1)
    test_input = np.concatenate([test_input]*(int(SCREEN_SIZE/target_df.shape[1])), axis=2)

    pred_result = unet_model.predict(test_input)
    avg_pred = []
    OUTLIER = 2
    for __ in range(target_df.shape[1]):
        temp = []
        for _ in range(int(pred_result.shape[1]/target_df.shape[1])):
            temp.append(pred_result[..., _*target_df.shape[1]:(_+1)*target_df.shape[1], 0][..., __])
        temp = np.stack(temp, axis=2)
        temp.sort(axis=2)
        avg_pred.append(temp[..., OUTLIER:-OUTLIER].mean(axis=2))
    avg_pred = np.stack(avg_pred, axis=2)
    masking = test_input[..., 1]
    avg_masking = masking[..., :target_df.shape[1]]
    final_pred = np.ma.array(avg_pred, mask=avg_masking, fill_value=np.nan).filled()
    pred_df = pd.DataFrame(final_pred.reshape(-1, target_df.shape[1]),
                           index = pd.DatetimeIndex(datetimes.reshape(-1, 1)[:, 0]),
                           columns=target_df.columns)
    recover_df[recover_df == -1] = np.nan
    recover_df = recover_df.combine_first(pred_df)

    prev_filling_indices = filling_indices
    if iteration % 1 == 0:
        print('%04d iteration...' % iteration)
((MAXS-MINS)*recover_df + MINS).to_csv('./results/recovered_SW2_gh_part.csv')
print('Done.')

In [None]:
print(recover_df.shape)
print(recover_df.dropna().shape)

In [None]:
# ((MAXS-MINS)*recover_df + MINS).to_csv('./results/recovered_SW2_gh.csv')
# print('Done.')