In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import sys


import functions.parse_data as parse
import functions.handy_functions as hf

from multivariate_quantile_regression.network_model import QuantileNetwork
from py_torch_qrnn_adapt.funcs import Sanity, Scenario1, Scenario2, Scenario3, Scenario4, Scenario5,\
                  MultivariateScenario1, MultivariateScenario2

from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split




In [2]:
channel_labels= ['Cloud_B02','Cloud_B03','Cloud_B04','Cloud_B05','Cloud_B06','Cloud_B07',
                 'Cloud_B08','Cloud_B09','Cloud_B10','Cloud_B11','Cloud_B12','Cloud_B13']

data_water=parse.parse('cloudrm_water.dat')
data_clear=parse.parse('cloudrm_clear.dat')
data_ice=parse.parse('cloudrm_ice.dat')
data_mixed=parse.parse('cloudrm_mixed.dat')

#Concatinate all datasets
data_all=pd.concat([data_water, data_clear, data_ice, data_mixed])

In [3]:
channel_labels= ['Cloud_B02','Cloud_B03','Cloud_B04','Cloud_B05','Cloud_B06','Cloud_B07',
                 'Cloud_B08','Cloud_B09','Cloud_B10','Cloud_B11','Cloud_B12','Cloud_B13']

data_water=parse.parse('cloudrm_water.dat')
data_clear=parse.parse('cloudrm_clear.dat')
data_ice=parse.parse('cloudrm_ice.dat')
data_mixed=parse.parse('cloudrm_mixed.dat')

#Concatinate all datasets
data_all=pd.concat([data_water, data_clear, data_ice, data_mixed])
data_all=data_all.drop(columns=['Surface_Desc','Cloud_B01','Clear_B01'])
data_all=hf.add_MSI_noise(data_all,channel_labels)

Noise standard deviation for Cloud_B02: 0.00335001428051948
Noise standard deviation for Cloud_B03: 0.002912530185416667
Noise standard deviation for Cloud_B04: 0.004058081082042254
Noise standard deviation for Cloud_B05: 0.0046524891611111115
Noise standard deviation for Cloud_B06: 0.007455351321348316
Noise standard deviation for Cloud_B07: 0.008871707484285717
Noise standard deviation for Cloud_B08: 0.04489677938000001
Noise standard deviation for Cloud_B09: 0.005688141120114942
Noise standard deviation for Cloud_B10: 0.003909328971491229
Noise standard deviation for Cloud_B11: 0.0014014724139999996
Noise standard deviation for Cloud_B12: 0.005030040539999999
Noise standard deviation for Cloud_B13: 0.004041267081999999


In [4]:
##Train test validation split##
X_labels= ['Cloud_B02','Cloud_B03','Cloud_B04','Cloud_B05','Cloud_B06',
           'Cloud_B07','Cloud_B08','Cloud_B09','Cloud_B10','Cloud_B11','Cloud_B12','Cloud_B13',
           'Sat_Zenith_Angle','Sun_Zenith_Angle','Azimuth_Diff_Angle']

#Leave out 'GOT', 'Water_Vapor'
#Band 1 no go.
y_labels=['Clear_B02','Clear_B03','Clear_B04','Clear_B05','Clear_B06',
           'Clear_B07','Clear_B08','Clear_B09','Clear_B10','Clear_B11','Clear_B12','Clear_B13']

df=hf.normalise_input_df(data_all,X_labels)
df=hf.add_noise(df,X_labels,sigma=0.001)
num_epochs=2
batch_size=100

##Split data##
X=df[X_labels]
y=df[y_labels]

X_train, X_test, y_train, y_test=train_test_split(X,y,test_size=0.05)

In [5]:
import random

val_size=0.05
validation_indices=np.array(random.sample(range(len(X_train['Cloud_B02'])), int(len(X_train['Cloud_B02'])*val_size)))
train_indices=[i for i in range(len(X_train['Cloud_B02'])) if np.any(validation_indices==i)==False]

In [6]:
len(train_indices)

180500

In [7]:
quantiles=np.array([0.1,0.5,0.9])
batch_size=100
nepochs=10

model=QuantileNetwork(quantiles=quantiles)
model.fit(X_train.to_numpy(),y_train.to_numpy(),train_indices,validation_indices,batch_size=batch_size,nepochs=nepochs)

Epoch 1


Batch number: 1805it [00:02, 611.06it/s]


Training loss 192.5401153564453 Validation loss 34997.984375
Epoch 2


Batch number: 1805it [00:03, 542.69it/s]


Training loss 177.94204711914062 Validation loss 32594.345703125
Epoch 3


Batch number: 1805it [00:03, 540.50it/s]

Training loss 166.33506774902344 Validation loss 30607.955078125
Epoch 4



Batch number: 1805it [00:03, 513.22it/s]

Training loss 156.8061065673828 Validation loss 28993.8984375
Epoch 5



Batch number: 1805it [00:03, 505.26it/s]


Training loss 149.3293914794922 Validation loss 27770.93359375
Epoch 6


Batch number: 1805it [00:03, 532.59it/s]


Training loss 143.88873291015625 Validation loss 26920.9921875
Epoch 7


Batch number: 1805it [00:03, 500.83it/s]

Training loss 140.3376007080078 Validation loss 26387.443359375
Epoch 8



Batch number: 1805it [00:03, 495.08it/s]


Training loss 138.048828125 Validation loss 26014.38671875
Epoch 9


Batch number: 1805it [00:03, 530.54it/s]


Training loss 136.27542114257812 Validation loss 25699.357421875
Epoch 10


Batch number: 1805it [00:03, 506.65it/s]


Training loss 134.6904754638672 Validation loss 25408.068359375


In [8]:
preds = model.predict(X_test.to_numpy())

In [9]:
mean_squared_error(y_test.to_numpy(),preds[:,:,1])

0.07781808355937571

In [10]:
def PSNR(y_true,y_pred):
    mse = mean_squared_error(y_true,y_pred)
    maxval = np.amax(y_true)
    PSNR = 10*np.log10((maxval)**2/mse)
    
    return PSNR

In [11]:
PSNR(y_test,preds[:,:,1])

20.426843537295962

In [12]:
np.shape(y_test)[0]

10000

In [13]:
y_test_np=y_test.to_numpy()

In [14]:
def calc_outrate(y_test,preds):
    outcount = 0
    for i in range(np.shape(y_test_np)[0]):
        for j in range(np.shape(y_test_np)[1]):
            if y_test_np[i,j] < preds[i,j,0] or y_test_np[i,j] > preds[i,j,2]:
                outcount = outcount +1

    outrate = outcount/np.size(y_test)
    return outrate

In [15]:
outcount = 0
for i in range(np.shape(y_test_np)[0]):
    for j in range(np.shape(y_test_np)[1]):
        if y_test_np[i,j] < preds[i,j,0] or y_test_np[i,j] > preds[i,j,2]:
            outcount = outcount +1

outrate = outcount/np.size(y_test)

outrate

0.026941666666666666