In [1]:
# importing modules
import h5py, random, datetime, tensorflow as tf, numpy as np
from sklearn.metrics import r2_score

In [2]:
# run important functions
def normalizingLabels(adta):
    '''
    Return normalized input data from 0 to 1, min, max value to convert back to predicted label
    '''
    minStat = np.min(adta)
    maxStat = np.max(adta)

    norm = (adta - minStat)/(maxStat - minStat)

    return norm, minStat, maxStat

def splitTrainTest(data, label, startBound=None, endBound=None, split=0.8, shuffle=False, randomSeed=None):
    if shuffle:
        random.seed(randomSeed)
        merge = list(zip(data, label))
        try:
            print(data.shape, label.shape)
        except Exception:
            pass
        random.shuffle(merge)
        data, label = zip(*merge)
        data = np.array(data)
        label = np.array(label)
        #random.shuffle(data)
        #random.shuffle(label)

    boundData = data[startBound:endBound]
    boundLabel = label[startBound:endBound]

    splitBound = round(split*len(boundLabel))
    trainData = boundData[:splitBound]
    trainLabel = boundLabel[:splitBound]
    testData = boundData[splitBound:]
    testLabel = boundLabel[splitBound:]

    return (trainData, trainLabel), (testData, testLabel)
  
def model6(flayer, slayer, epoch):
    tf.keras.backend.clear_session()
    initializer = tf.keras.initializers.GlorotNormal()
    model = tf.keras.Sequential([
      tf.keras.layers.Flatten(input_shape=(trainData.shape[1],)),
      tf.keras.layers.Dense(flayer, activation='relu', kernel_initializer=initializer),
      tf.keras.layers.Dense(slayer, activation='relu', kernel_initializer=initializer),
      tf.keras.layers.Dense(1)
    ])

    # compiling model
    opt = tf.keras.optimizers.Adam()
    model.compile(optimizer='adam',
                  loss=tf.keras.losses.MeanSquaredError(),
                  metrics=[tf.keras.metrics.RootMeanSquaredError()])

    # feed and train the model
    model.fit(trainData, trainLabel, epochs=epoch, verbose=0)
    return model

def rmse(yreal, ypred):
    return np.sqrt(np.mean(np.square(yreal-ypred)))

def nse(yreal, ypred):
    a = np.sum(np.square(ypred-yreal))
    b = np.sum(np.square(yreal-np.mean(yreal)))
    return 1-(a/b)

In [3]:
# Load dataset
# split train and test set
# run the simulations
# model variables

ROOT_PATH = './'
ITERATION = '69'
DATASETSNAME = ['cloudpsfqvaporrain', 'cloudpsfqvaporsst', 'cloudpsfqvaporwind', 'cloudpsfqvaporwinu', 'cloudpsfqvaporwn10', 'cloudpsfrainsst', 'cloudpsfrainwind', 'cloudpsfrainwinu', 'cloudpsfrainwn10', 'cloudpsfsstwind', 'cloudpsfsstwinu', 'cloudpsfsstwn10', 'cloudpsfwindwinu', 'cloudpsfwindwn10']
WLSTATION = 'manggarai'
DIMENSION = 400
FLAYERS = (4,8,12,16)
SLAYERS = (1,2,3,4,5,6)
EPOCHS = (50,100,250,500)

# load katulampa / manggarai adta
with h5py.File(f'{ROOT_PATH}adta_{WLSTATION}.hdf5', 'r') as f:
    adta = f['datas'][()]
normalizedLabel, minStat, maxStat = normalizingLabels(adta)

loops = 0
#HOTSTART = 202
for datasetname in DATASETSNAME:
    # Load dataset
    try:
        with h5py.File(f'{ROOT_PATH}dataset/{datasetname}{DIMENSION}f.hdf5', 'r') as f:
            data = f['datas'][()]
    except Exception: # forgotten to put "f" in the end of the filename on double, triple, and tetra data although haven been flattened in the process
        with h5py.File(f'{ROOT_PATH}dataset/{datasetname}{DIMENSION}.hdf5', 'r') as f:
            data = f['datas'][()]

    # split train and test set
    (trainData, trainLabel), (testData, testLabel) = splitTrainTest(data, normalizedLabel, split=0.7, shuffle=True, randomSeed=10)

    for flayer in FLAYERS:
        for slayer in SLAYERS:
            for epoch in EPOCHS:
                # hotstart
                #if loops < HOTSTART:
                    #loops+=1
                    #continue

                tick = datetime.datetime.now()
                model = model6(flayer, slayer, epoch)

                # evaluating model accuracy
                prediction_model = tf.keras.Sequential([model,
                                                        tf.keras.layers.ReLU()])
                testPredictions = prediction_model.predict(testData)
                trainPredictions = prediction_model.predict(trainData)

                # make predictions
                testPredictions = testPredictions*(maxStat-minStat)+minStat
                trainPredictions = trainPredictions*(maxStat-minStat)+minStat
                realTestLabel = testLabel*(maxStat-minStat)+minStat
                realTrainLabel = trainLabel*(maxStat-minStat)+minStat

                # Mean Squared Error : 
                mse = tf.keras.losses.MeanSquaredError()
                mseTestError = mse(realTestLabel, testPredictions).numpy()
                mseTrainError = mse(realTrainLabel, trainPredictions).numpy()

                # RMSE
                rmseTest = rmse(np.squeeze(testPredictions), realTestLabel)
                rmseTrain = rmse(np.squeeze(trainPredictions), realTrainLabel)

                # NSE
                nseTest = nse(realTestLabel, np.squeeze(testPredictions))
                nseTrain = nse(realTrainLabel, np.squeeze(trainPredictions))

                # R^2
                r2Test = r2_score(realTestLabel, testPredictions)
                r2Train = r2_score(realTrainLabel, trainPredictions)

                # save statistics to csv
                statistics = '{},{},{},{},{},{},{},{},{},{},{}\n'.format(flayer, slayer, epoch, mseTrainError, mseTestError, rmseTrain, rmseTest, r2Train, r2Test, nseTrain, nseTest)
                with open('{}models_statistics/{}_GS_{}.csv'.format(ROOT_PATH, ITERATION, datasetname), 'a') as stat:
                    stat.write(statistics)

                # save model to drive
                model.save('{}models/{}/{}_GS_{}_{}_{}.h5'.format(ROOT_PATH, ITERATION, datasetname, flayer, slayer, epoch))

                # loop identifier :
                tock = datetime.datetime.now()
                print('{} : {} - {} - {} - {} : time : {} - R^2 err : train[{}] test[{}]'.format(loops, f'{datasetname}{DIMENSION}', flayer, slayer, epoch, tock-tick, r2Train, r2Test))
                loops+=1
      

(10727, 1600) (10727,)
0 : cloudpsfqvaporrain400 - 4 - 1 - 50 : time : 0:00:16.299006 - R^2 err : train[-0.002581877648257791] test[-0.00095952058613058]
1 : cloudpsfqvaporrain400 - 4 - 1 - 100 : time : 0:00:32.492999 - R^2 err : train[-0.00347065720185058] test[-0.006934083792274892]
2 : cloudpsfqvaporrain400 - 4 - 1 - 250 : time : 0:01:26.220697 - R^2 err : train[-2.6587379742259998e-06] test[-0.00040932968122842084]
3 : cloudpsfqvaporrain400 - 4 - 1 - 500 : time : 0:03:27.155003 - R^2 err : train[-0.0007706540422971475] test[-4.863894080719966e-05]
4 : cloudpsfqvaporrain400 - 4 - 2 - 50 : time : 0:00:21.820994 - R^2 err : train[0.1984780532618151] test[0.192370203466126]
5 : cloudpsfqvaporrain400 - 4 - 2 - 100 : time : 0:00:41.906006 - R^2 err : train[-1.3052448622596913e-05] test[-0.0006600420378253258]
6 : cloudpsfqvaporrain400 - 4 - 2 - 250 : time : 0:01:41.395001 - R^2 err : train[-5.299863596497367e-05] test[-0.0008708499489316068]
7 : cloudpsfqvaporrain400 - 4 - 2 - 500 : time