<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Indicator" data-toc-modified-id="Indicator-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Indicator</a></span></li><li><span><a href="#Dataloader" data-toc-modified-id="Dataloader-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Dataloader</a></span></li></ul></div>

# Indicator

In [1]:
from sklearn.metrics import confusion_matrix
import numpy as np

In [2]:
class Indicator:
    def __init__(self):
        self.y_true=[]
        self.y_pred=[]
        
    def indicator_cls(self):
        tn, fp, fn, tp=confusion_matrix(self.y_true,self.y_pred ).ravel()
        POD=tp/(tp+fn)
        FAR=fp/(tp+fp)
        CSI=tp/(tp+fn+fp)
        return {'POD':POD,'FAR':FAR,'CSI':CSI}
    
    def indicator_reg(self):
        conv=np.cov([self.y_true,self.y_pred])
        CC=conv[0,1]/np.sqrt(conv[0,0]*conv[1,1])
        BIAS=np.sum(np.array(self.y_pred)-np.array(self.y_true))/np.sum(self.y_true)
        MSE=np.mean((np.array(self.y_pred)-np.array(self.y_true))**2)
        return {'CC':CC,'BIAS':BIAS,'MSE':MSE}
    
    def reset(self):
        self.y_true=[]
        self.y_pred=[]

In [3]:
indicator=Indicator()

y_true=np.array([0,1,0,1])
y_pred=np.array([0,0,0,1])
indicator.y_true.extend(y_true.tolist())
indicator.y_pred.extend(y_pred.tolist())
print(indicator.indicator_cls())
indicator.reset()

y_true=np.random.rand(10)
y_pred=np.random.rand(10)
indicator.y_true.extend(y_true.tolist())
indicator.y_pred.extend(y_pred.tolist())
print(indicator.indicator_reg())

{'POD': 0.5, 'FAR': 0.0, 'CSI': 0.5}
{'CC': 0.5219991862216652, 'BIAS': -0.16938088345983845, 'MSE': 0.08807572443283634}


# Dataloader

In [54]:
from datetime import date
from datetime import timedelta


In [60]:
start_date = date(2011, 12, 31)
end_date = date(2012, 10, 1)
diff=end_date-start_date
print(diff.days)

275


In [2]:
from torch.utils.data import Dataset
from datetime import date
from datetime import timedelta
import numpy as np
import random
import tqdm
import os

GOSE=np.load('/usr/commondata/weather/IR_data/IR_dataset_QingHua/X_train_hourly.npz')
StageIV=np.load('/usr/commondata/weather/IR_data/IR_dataset_QingHua/Y_train_hourly.npz')
GOSE=GOSE['arr_0']
StageIV=StageIV['arr_0']

In [49]:
class IRDataset(Dataset):
    def __init__(self,task='identification',mode='train',shuffle=False,win_size=14):
        self.X=GOSE
        self.Y=StageIV
        self.win_size=win_size
        self.R_samples,self.NR_samples=self.split_R_NR(self.Y)
        
        
        if task=='identification':
            self.samples=np.vstack([np.array(random.choices(self.R_samples,k=340000)),
                                    np.array(random.choices(self.NR_samples,k=340000))])
        
        if task=='estimation':
            self.samples=np.array(random.choices(self.R_samples,k=470000))
        
        if shuffle:
            np.random.shuffle(self.samples)
        L=len(self.samples)
        
        
        self.mode=mode
        if mode=='train':
            self.sample_idx=range(0,int(L*0.6))
        
        if mode=='test':
            self.sample_idx=range(int(L*0.6),int(L*0.8))
        
        if mode=='val':
            self.sample_idx=range(int(L*0.8),int(L*1))
        
        self.L=len(self.sample_idx)

    
    def safe_crop_center(self,img,x,y,cropx,cropy):
        startx = x-(cropx)
        endx=x+(cropx)+1
        starty = y-(cropy)   
        endy= y+(cropy)+1  
        
        if len(img.shape)==3:
            _,H,W=img.shape
            if startx<0 or starty<0 or endx>=H or endy>=H:
                return None
            return img[:,startx:endx,starty:endy]
            
        if len(img.shape)==2:
            H,W=img.shape
            if startx<0 or starty<0 or endx>=H or endy>=H:
                return None
            return img[startx:endx,starty:endy]
    

    def unsafe_crop_center(self,img,x,y,cropx,cropy):
        startx = x-(cropx)
        endx=x+(cropx)+1
        starty = y-(cropy)   
        endy= y+(cropy)+1
        if len(img.shape)==2:
            return img[startx:endx,starty:endy]
        
        if len(img.shape)==3:
            return img[:,startx:endx,starty:endy]
    
    
    def split_R_NR(self,StageIV):
        R_samples=[]
        NR_samples=[]
        for T in range(StageIV.shape[0]):
            for row in range(self.win_size,StageIV.shape[1]-self.win_size,self.win_size):
                for col in range(self.win_size,StageIV.shape[2]-self.win_size,self.win_size):
                    if StageIV[T,row,col]>0.1:
                        R_samples.append((T,row,col))
                    else:
                        NR_samples.append((T,row,col))
                        
        R_samples=np.array(R_samples)
        NR_samples=np.array(NR_samples)
        return R_samples,NR_samples
        

    def __getitem__(self, idx):
        T,row,col=self.samples[idx]
        X_croped=self.unsafe_crop_center(self.X[T],row,col,self.win_size,self.win_size)
        Y_croped=self.Y[T,row,col]
        return X_croped,Y_croped,T,row,col


    def __len__(self):
        return self.L

    def name(self):
        return 'IRDataset'

In [47]:
dataset=IRDataset(mode='train',shuffle=True)

In [48]:
X_croped,Y_croped,T,row,col=dataset[0]
print(X_croped.shape,Y_croped,T,row,col)

(3, 29, 29) 0.0 32 182 336
