In [1]:
from netCDF4 import Dataset
from skimage import filters
import torch.nn as nn
import netCDF4 as nc
import pandas as pd
import numpy as np
import random
import torch
import os
import matplotlib.pyplot as plt
from models.forecastors.GeneralUnet.general_unet import *
from models.forecastors.utils.loss import * 
import torch.optim as optim
from models.utils.configtrain import *
from data_prep.config.env import *

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
def setup_seed(seed):
    np.random.seed(seed)
    random.seed(seed)
    torch.manual_seed(seed)  # cpu
    torch.cuda.manual_seed_all(seed)  
    torch.backends.cudnn.deterministic = True  
    torch.backends.cudnn.benchmark = True 
setup_seed(30)

In [3]:
import matplotlib.pyplot as plt
from netCDF4 import Dataset
from skimage import filters
import torch.nn as nn
import netCDF4 as nc
import pandas as pd
import numpy as np
import random
import torch
import os
from skimage import filters


DATAPATH = os.environ.get("DATAPATH","/home/resifis/Desktop/kaustcode/Packages/processed_clean_data")



class Dataset2D():
    def __init__(self,solar_type,data_type = "Train"):
        self.solar_type  = "DHI"
        self.hcloud = Dataset2D._read_data("hcloud")
        self.mcloud = Dataset2D._read_data("mcloud")
        self.lcloud = Dataset2D._read_data("lcloud")
        self.water_vapor = Dataset2D._read_data("water_vapor")
        self.ozone = Dataset2D._read_data("ozone")
        self.aerosol = Dataset2D._read_data("aerosol")
        self.len = 48222
        self.solar_type = solar_type
        self.target_data = Dataset2D._read_target(self.solar_type)
        self.output = dict()
        self.eps = 1e-5
        
    
    @staticmethod
    def _read_data(data_type):
        if data_type == "hcloud":
            hcloud = Dataset(os.path.join(DATAPATH,"hcloud.nc"))
            return hcloud
        elif data_type == "mcloud":
            mcloud = Dataset(os.path.join(DATAPATH,"mcloud.nc"))
            return mcloud
        elif data_type == "lcloud":
            lcloud = Dataset(os.path.join(DATAPATH,"lcloud.nc"))
            return lcloud
        elif data_type == "water_vapor":
            water_vapor = Dataset(os.path.join(DATAPATH,"water_vapor_new.nc"))
            return water_vapor
        elif data_type == "ozone":
            ozone = Dataset(os.path.join(DATAPATH,"ozone.nc"))
            return ozone
        else:
            aerosol = Dataset(os.path.join(DATAPATH,"aod.nc"))
            return aerosol
    @staticmethod
    def _read_target(target_type):
        if target_type == "GHI":
            GHI = Dataset(os.path.join(DATAPATH,"ghi.nc"))
            return GHI
        elif target_type == "DHI":
            DHI = Dataset(os.path.join(DATAPATH,"dhi.nc"))
            return DHI
        else :
            DNI = Dataset(os.path.join(DATAPATH,"dni.nc"))
            return DNI
        
        
    @staticmethod
    def _filtering(list_data):
        data = list_data.copy()
        list_tensors = []
        for i in range(len(data)):
            arr = filters.sobel(np.array(data[i]))
            list_tensors.append(torch.tensor(arr,dtype = torch.float))
        return list_tensors
    
    def _get_tensors(self,item):
        all_data = []
        data_hcloud =  torch.tensor(self.hcloud.variables["cc"][item,:,:],dtype = torch.float)
        data_mcloud =  torch.tensor(self.mcloud.variables["cc"][item,:,:],dtype = torch.float)
        data_lcloud =  torch.tensor(self.lcloud.variables["cc"][item,:,:],dtype = torch.float)
        data_aerosol = torch.tensor(self.aerosol.variables["aod5503d"][item,:,:],dtype = torch.float)
        data_ozone =   torch.tensor(self.ozone.variables["o3rad"][item,:,:],dtype = torch.float)
        data_water_vapor = torch.tensor(self.water_vapor.variables["qvapor"][item,:,:],dtype = torch.float)
        
        all_data.extend([torch.flip(data_hcloud,dims = [0]).unsqueeze(0)/(data_hcloud.max()+self.eps),
                         torch.flip(data_mcloud,dims = [0]).unsqueeze(0)/(data_mcloud.max()+self.eps),
                         torch.flip(data_lcloud,dims = [0]).unsqueeze(0)/(data_lcloud.max()+self.eps),
                         torch.flip(data_aerosol,dims = [0]).unsqueeze(0)/(data_aerosol.max()+self.eps),
                         torch.flip(data_ozone,dims = [0]).unsqueeze(0)/(data_ozone.max()+self.eps),
                         torch.flip(data_water_vapor,dims = [0]).unsqueeze(0)/(data_water_vapor.max()+self.eps),
                        ])
        
        #filtered = Dataset2D._filtering(all_data)
        
        #all_data.extend(filtered)
        target = torch.tensor(self.target_data.variables[self.solar_type.lower()][item],dtype = torch.float)
        target = torch.flip(target,dims = [0])
        return all_data,target
        
    def __len__(self):
        return self.len
    
    
    def __getitem__(self,item):
        out = dict()
        all_data,target = self._get_tensors(item)
        out['data'] = torch.cat(all_data,dim = 0)
        out['target'] = target/target.max()
        return out
        

In [4]:
DataSet = Dataset2D("GHI")

In [5]:
input_features = 6  
output_feature = 1
model = UNet2D(in_channels = input_features,
               out_channels = output_feature,
            )

def init_weights(m):
    if isinstance(m,nn.Conv2d):
        torch.nn.init.normal_(m.weight)
        m.bias.data.fill_(1)
    if isinstance(m,nn.ConvTranspose2d):
        torch.nn.init.normal_(m.weight)
        m.bias.data.fill_(1)
    if isinstance(m,nn.BatchNorm2d):
        torch.nn.init.normal_(m.weight)
        m.bias.data.fill_(1)
    if isinstance(m, nn.Linear):
        torch.nn.init.xavier_uniform(m.weight)
        m.bias.data.fill_(1)

criterion = Loss("RMSE")
device = "cuda:0" if torch.cuda.is_available() else "cpu"
criterion = criterion.to(device)
model = model.to(device)
optimizer = optim.AdamW(model.parameters(), lr=0.001)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                       factor = 0.2,
                                                       patience = 3,
                                                       verbose = True)

shuffle_trainloader = False
train_batch_size = 16
shuffle_validloader = False
valid_batch_size = 16
epochs = 100
verbose = True

In [6]:
from tqdm import tqdm
import torch
import os
from tqdm import tqdm
import torch.nn as nn
import torchvision
import neptune.new as neptune
from neptune.new.types import File
import torch.nn.init as weight_init
from torch.utils.data import RandomSampler
import matplotlib.pyplot as plt
plt.style.use('classic')





MODELS_WEIGHTS = os.environ.get("MODELS_WEIGHTS","/home/resifis/Desktop/kaustcode/Packages/weights2d")


class AverageMeter:
    """
    Computes and stores the average and current value
    """

    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count


class Training():
    def __init__(self):
        self.aa = 1
        

        
    def _initialize_weights(self,net):
        for name, param in net.named_parameters(): 
            weight_init.normal_(param)
        
    def train_fn(self,model,train_loader):
        model.train()
        
        tr_loss = 0
        counter = 0
        losses = AverageMeter()
        tqt = tqdm(enumerate(train_loader),total = len(train_loader))
        for index,train_batch in tqt:
            list_sparse = []
            data = train_batch["data"].to(device)
            target = train_batch["target"].to(device)
            pred_target = model(data)
            pred_target = pred_target.squeeze(1)
            train_loss = criterion(pred_target,target)
            optimizer.zero_grad()
            train_loss.backward()
            optimizer.step()
            tr_loss += train_loss.item()        
            counter = counter + 1
            losses.update(train_loss.item(),pred_target.size(0))
            tqt.set_postfix(Loss = losses.avg, Batch_number = index )
        return tr_loss/counter

    def valid_fn(self,model,validation_loader):
        model.eval()
        val_loss = 0
        counter = 0
        losses = AverageMeter()
        tqt = tqdm(enumerate(validation_loader),total = len(validation_loader))
        with torch.no_grad():
            for index, valid_batch in tqt :
                list_sparse = []
                data = valid_batch["data"].to(device)
                target = valid_batch["target"].to(device)
                #optimizer.zero_grad()
                pred_target = model(data)
                pred_target = pred_target.squeeze(1)
                validation_loss = criterion(pred_target,target)
                val_loss += validation_loss.item()        
                counter = counter + 1
                losses.update(validation_loss.item(),pred_target.size(0))
                tqt.set_postfix(loss = losses.avg, batch_number = index)
        return losses.avg
    
    
    
    @staticmethod
    def checkpoints(epoch,model,optimizer,loss):
        path = os.path.join(MODELS_WEIGHTS,f"epoch_{epoch}.pt")
        torch.save({
                'epoch': epoch,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'loss': loss,
                }, path)
        
    def get_dataloader(self,train_dataset,valid_dataset):

        train_sampler = RandomSampler(train_dataset,replacement = True,num_samples = 1500)
        valid_sampler = RandomSampler(valid_dataset,replacement = True,num_samples = 1000)

        train_data_loader = torch.utils.data.DataLoader(train_dataset,
                                                        shuffle = shuffle_trainloader,
                                                        batch_size = train_batch_size,
                                                        sampler = train_sampler,
                                                       )
        valid_data_loader = torch.utils.data.DataLoader(valid_dataset,
                                                        shuffle = shuffle_validloader,
                                                        batch_size = valid_batch_size,
                                                        sampler = valid_sampler,
                                                       )
        return train_data_loader,valid_data_loader
        
    def fit(self,model,train_dataset,valid_dataset):
        train_loss = []
        valid_loss = []
        best = 5000
        #self._initialize_weights(model)
        model = model.apply(init_weights)
        for epoch in range(epochs):
            train_data_loader,valid_data_loader = self.get_dataloader(train_dataset,valid_dataset)
            if verbose :
                print(f".........EPOCH {epoch}........")
            tr_loss = self.train_fn(model,train_data_loader)
            train_loss.append(tr_loss)
            if verbose :
                print(f".........Train Loss = {tr_loss}........")
            val_loss = self.valid_fn(model,valid_data_loader)
            valid_loss.append(val_loss)
            Training.checkpoints(epoch,model,optimizer,val_loss)
            scheduler.step(val_loss)initial
            if verbose:
                print(f"...........Validation Loss = {val_loss}.......")

            if val_loss < best :
                best = val_loss
                patience = 0
            else:
                print("Score is not improving with patient = ",patience)
                patience +=1

            if patience >= epochs:
                print(f"Early Stopping on Epoch {epoch}")
                print(f"Best Loss = {best}")
                break
                
        
        PATH = os.path.join(MODELS_WEIGHTS,"model_2d.pth")
        torch.save(model.state_dict(),PATH)
        model.load_state_dict(torch.load(PATH))

In [7]:
job = Training()
job.fit(model,DataSet,DataSet)

model bias initial   Parameter containing:
tensor([ 0.0222, -0.0062,  0.0366,  0.0068, -0.0202,  0.0180, -0.0076, -0.0323,
        -0.0175, -0.0118,  0.0301,  0.0367,  0.0254, -0.0237,  0.0319, -0.0165,
        -0.0102, -0.0232,  0.0097,  0.0164,  0.0173,  0.0121,  0.0164,  0.0231,
        -0.0361,  0.0182, -0.0256,  0.0051,  0.0108, -0.0095,  0.0328,  0.0123,
        -0.0242, -0.0080, -0.0074, -0.0152, -0.0092, -0.0101, -0.0119,  0.0298,
         0.0237, -0.0022,  0.0285, -0.0283,  0.0207, -0.0267, -0.0043,  0.0171,
        -0.0102,  0.0146, -0.0038,  0.0349,  0.0150, -0.0285, -0.0205,  0.0293,
        -0.0387, -0.0060,  0.0193, -0.0317,  0.0057, -0.0403, -0.0104,  0.0122],
       device='cuda:0', requires_grad=True)
model after intialization Parameter containing:
tensor([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
        1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
        1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 

100%|██████████████| 94/94 [00:25<00:00,  3.73it/s, Batch_number=93, Loss=0.449]


.........Train Loss = 0.4490426480770111........


100%|██████████████| 63/63 [00:08<00:00,  7.13it/s, batch_number=62, loss=0.452]


model bias after 0 epoch    Parameter containing:
tensor([0.9991, 0.9991, 0.9991, 0.9991, 0.9991, 0.9991, 0.9991, 0.9991, 0.9991,
        0.9991, 0.9991, 0.9991, 0.9991, 0.9991, 0.9991, 0.9991, 0.9991, 0.9991,
        0.9991, 0.9991, 0.9991, 0.9991, 0.9991, 0.9991, 0.9991, 0.9991, 0.9991,
        0.9991, 0.9991, 0.9991, 0.9991, 0.9991, 0.9991, 0.9991, 0.9991, 0.9991,
        0.9991, 0.9991, 0.9991, 0.9991, 0.9991, 0.9991, 0.9991, 0.9991, 0.9991,
        0.9991, 0.9991, 0.9991, 0.9991, 0.9991, 0.9991, 0.9991, 0.9991, 0.9991,
        0.9991, 0.9991, 0.9991, 0.9991, 0.9991, 0.9991, 0.9991, 0.9991, 0.9991,
        0.9991], device='cuda:0', requires_grad=True)
...........Validation Loss = 0.45152729415893555.......
.........EPOCH 1........


 47%|██████▌       | 44/94 [00:09<00:10,  4.67it/s, Batch_number=43, Loss=0.453]


KeyboardInterrupt: 

In [None]:
input_ = DataSet[0]['data'].unsqueeze(0)
input_ = input_.to(device)

In [None]:
input_.shape

In [None]:
out = model(input_)

In [None]:
out.min()

In [None]:
out = out.detach().cpu()

In [None]:
plt.imshow(out)

In [None]:
class Loss(nn.Module):
    def __init__(self,loss_name):
        super(Loss,self).__init__()
        self.mse = nn.MSELoss()
        self.l1  = nn.L1Loss()
        self.loss_name = loss_name
        self.psnr = PSNR()
        self.ssim = SSIM3D(window_size = 11)
        
    def forward(self,yhat,y):
        if self.loss_name == "RMSE":
            return torch.sqrt(self.mse(yhat,y))
        elif self.loss_name == "MSE":
            return self.mse(yhat,y)
        elif self.loss_name == "L1Loss":
            return self.l1(yhat,y)
        elif self.loss_name == "PSNR":
            return self.psnr(yhat,y)
        elif self.loss_name == "SSIM":
            return 1-self.ssim(yhat,y)