In [9]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import sys

%matplotlib inline
from IPython import display


import functions.parse_data as parse
import functions.handy_functions as hf
import torch.nn as nn
import torch.optim as optim
import torch


from multivariate_quantile_regression.network_model import QuantileNetwork

from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split

from tqdm import tqdm

import os

In [2]:
# Check if CUDA (GPU support) is available
if torch.cuda.is_available():
    # CUDA is available, so let's set default device to GPU
    torch.set_default_device(0)
    print("CUDA is available. Using GPU.")
else:
    # CUDA is not available, so let's use the CPU
    print("CUDA is not available. Using CPU.")

# Example usage:
tensor = torch.randn(3, 3)  # Create a tensor on the selected device
print("Tensor is on device:", tensor.device)

CUDA is available. Using GPU.
Tensor is on device: cuda:0


In [5]:
channel_labels= ['Cloud_B02','Cloud_B03','Cloud_B04','Cloud_B05','Cloud_B06','Cloud_B07',
                 'Cloud_B08','Cloud_B09','Cloud_B10','Cloud_B11','Cloud_B12','Cloud_B13']

data_water=parse.parse('cloudrm_water.dat')
data_clear=parse.parse('cloudrm_clear.dat')
data_ice=parse.parse('cloudrm_ice.dat')
data_mixed=parse.parse('cloudrm_mixed.dat')

#Concatinate all datasets
#data_all=pd.concat([data_water, data_clear, data_ice, data_mixed])
data_all=pd.concat([data_water, data_ice, data_mixed])
#data_all=pd.concat([data_ice])
data_all=data_all.drop(columns=['Surface_Desc','Cloud_B01','Clear_B01'])
df_truth=data_all.copy()

data_all=data_all[data_all['COT']<3.6]

#data_all=hf.add_MSI_noise(data_all,channel_labels)

In [6]:
##Train test validation split##
X_labels= ['Cloud_B02','Cloud_B03','Cloud_B04','Cloud_B05','Cloud_B06',
           'Cloud_B07','Cloud_B08','Cloud_B09','Cloud_B10','Cloud_B11','Cloud_B12','Cloud_B13',
           'Sat_Zenith_Angle','Sun_Zenith_Angle','Azimuth_Diff_Angle']

#Leave out 'GOT', 'Water_Vapor'
#Band 1 no go.
y_labels=['Clear_B12']

X_truth=df_truth[X_labels]

df=data_all.copy()
#df=hf.normalise_input_df(data_all,X_labels)
#df=hf.add_noise(df,X_labels,sigma=0.001)

##Split data##
X=df[X_labels]
y=df[y_labels]

X_train, X_test, y_train, y_test=train_test_split(X,y,test_size=0.05)

X_test_truth=X_truth.iloc[X_test.index] #Save truth values without normalisation

In [7]:
nepochs=300
batch_size=500

In [15]:
import random

val_size=0.05
validation_indices=np.array(random.sample(range(len(X_train['Cloud_B02'])), int(len(X_train['Cloud_B02'])*val_size)))
train_indices=[i for i in range(len(X_train['Cloud_B02'])) if np.any(validation_indices==i)==False]

quantiles=np.array([0.1,0.5,0.9])

In [16]:
class MLP(nn.Module):
    def __init__(self,n_inputs):
        super(MLP, self).__init__()
        n_nodes=64
        self.linear=nn.Sequential(
                    nn.Linear(n_inputs, n_nodes),
                    nn.ReLU(),
                    nn.Dropout(p=0.2),
                    nn.Linear(n_nodes, n_nodes),
                    nn.ReLU(),
                    nn.Linear(n_nodes, 1) #Output dimesion is number of quantiles times number of target variables
                )

    def forward(self,X):
        X=self.linear(X)
        return X

In [17]:
model=MLP(len(X_labels))

loss_fn=nn.MSELoss()
optimizer=optim.Adam(model.parameters(),lr=0.03)

In [18]:
from multivariate_quantile_regression.utils import batches

In [None]:
tx=torch.tensor(X_train.to_numpy(),dtype=torch.float)#,device=device)   

for epoch in range(300):

    for batch in batches(train_indices, batch_size, shuffle=True):

        idx = torch.tensor(batch,dtype=torch.int64)#,device=device)

        model.train() #Initialise train mode
        model.zero_grad() #Reset gradient

        yhat = model(tX[idx]) #Run algorithm

        loss=loss_fn(yhat,idx).sum() #Run loss function
        loss.backward() #Calculate gradient

        optimizer.step()

        train_loss=train_loss+loss.data #Increment loss


        validation_loss = torch.tensor([0],dtype=torch.float)#,device=device)
    
    for batch_idx, batch in enumerate(batches(validation_indices, batch_size, shuffle=False)):


        idx = torch.tensor(batch,dtype=torch.int64)#,device=device)

        model.eval() #Set evaluation mode
        model.zero_grad() #Reset gradient

        yhat=model(tX[idx])

        validation_loss=validation_loss+loss_fn(yhat, idx).sum()