<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Indicator" data-toc-modified-id="Indicator-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Indicator</a></span></li><li><span><a href="#Dataloader" data-toc-modified-id="Dataloader-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Dataloader</a></span></li></ul></div>

# Indicator

In [2]:
from sklearn.metrics import confusion_matrix
import numpy as np

In [57]:
class Indicator:
    def __init__(self):
        self.y_true=[]
        self.y_pred=[]
        
    def indicator_cls(self):
        tn, fp, fn, tp=confusion_matrix(self.y_true,self.y_pred ).ravel()
        POD=tp/(tp+fn)
        FAR=fp/(tp+fp)
        CSI=tp/(tp+fn+fp)
        return {'POD':POD,'FAR':FAR,'CSI':CSI}
    
    def indicator_reg(self):
        conv=np.cov([self.y_true,self.y_pred])
        CC=conv[0,1]/np.sqrt(conv[0,0]*conv[1,1])
        BIAS=np.sum(np.array(self.y_pred)-np.array(self.y_true))/np.sum(self.y_true)
        MSE=np.mean((np.array(self.y_pred)-np.array(self.y_true))**2)
        return {'CC':CC,'BIAS':BIAS,'MSE':MSE}
    
    def reset(self):
        self.y_true=[]
        self.y_pred=[]

In [58]:
indicator=Indicator()

y_true=np.array([0,1,0,1])
y_pred=np.array([0,0,0,1])
indicator.y_true.extend(y_true.tolist())
indicator.y_pred.extend(y_pred.tolist())
print(indicator.indicator_cls())
indicator.reset()

y_true=np.random.rand(10)
y_pred=np.random.rand(10)
indicator.y_true.extend(y_true.tolist())
indicator.y_pred.extend(y_pred.tolist())
print(indicator.indicator_reg())

{'POD': 0.5, 'FAR': 0.0, 'CSI': 0.5}
{'CC': 0.3769509070511113, 'BIAS': 0.516089721511217, 'MSE': 0.12756994742513755}


# Dataloader

In [1]:
from torch.utils.data import Dataset
from datetime import date
from datetime import timedelta
import numpy as np
import random
import tqdm

GOSE=np.load('/usr/commondata/weather/New/GOSE.npy',allow_pickle=True).item()
StageIV=np.load('/usr/commondata/weather/New/StageIV.npy',allow_pickle=True).item()

def date2num(start_date,end_dates):
    result=[]
    for T in end_dates:
        end_date = date(int(T[:4]), int(T[4:6]), int(T[6:8]))
        delta = (end_date - start_date)
        day=delta.days
        hour=T[8:]
        result.append('{}.{}'.format(day,hour))
    return result
    
start_date = date(2011, 12, 31)
end_dates=list(StageIV.keys())
StageIV_keys=date2num(start_date,end_dates)
for i in range(len(StageIV_keys)):
    StageIV[StageIV_keys[i]]=StageIV.pop(end_dates[i])


balance=True
if balance:
    global_samples=np.load('/usr/commondata/weather/New/samples_B.npy')
else:
    global_samples=np.load('/usr/commondata/weather/New/samples.npy')

In [2]:
class IRDataset(Dataset):
    def __init__(self,mode='train',balance=True):
        self.X=GOSE
        self.Y=StageIV
        self.samples=global_samples
        
        
        L=len(self.samples)
        self.mode=mode
        if mode=='train':
            self.sample_idx=range(0,int(L*0.6))
        
        if mode=='test':
            self.sample_idx=range(int(L*0.6),int(L*0.8))
        
        if mode=='val':
            self.sample_idx=range(int(L*0.8),int(L*1))
        
        self.L=len(self.sample_idx)
    
    
    def crop_center(self,img,x,y,cropx,cropy):
        startx = x-(cropx)
        endx=x+(cropx)+1
        starty = y-(cropy)   
        endy= y+(cropy)+1  
        
        if len(img.shape)==3:
            _,H,W=img.shape
            if startx<0 or starty<0 or endx>=H or endy>=H:
                return None
            return img[:,startx:endx,starty:endy]
            
        if len(img.shape)==2:
            H,W=img.shape
            if startx<0 or starty<0 or endx>=H or endy>=H:
                return None
            return img[startx:endx,starty:endy]
            
    def sampling(self,key,img):
        R_samples=[]
        NR_samples=[]
        for i in range(img.shape[0]):
            for j in range(img.shape[1]):
                Y=self.crop_center(img,i,j,14,14)
                if Y is not None:
                    if Y[14,14]>0.1:
                        R_samples.append((key,i,j))
                    else:
                        NR_samples.append((key,i,j))
        NR_samples_B=random.sample(NR_samples, len(R_samples))
        return R_samples+NR_samples,R_samples+NR_samples_B
    
    
    def get_samples(self):
        useful_keys=list(set(self.X.keys())&set(self.Y.keys()))
        self.useful_keys=sorted(useful_keys)
        
        samples=[]
        samples_B=[]
        for key in tqdm.tqdm(self.useful_keys):
            samples_tmp,samples_B_tmp=self.sampling(key,self.Y[key])
            samples+=samples_tmp
            samples_B+=samples_B_tmp
        return samples,samples_B
    
    def save_samples(self):
        samples,samples_B=dataset.get_samples()
        samples=np.array(samples)
        samples_B=np.array(samples_B)
        np.save('/usr/commondata/weather/New/samples_B.npy',samples_B)
        np.save('/usr/commondata/weather/New/samples.npy',samples)
        

    def __getitem__(self, idx):
        key,i,j=self.samples[idx]
        X,Y=self.X[key],self.Y[key]
        i,j=int(i),int(j)
        X_croped=self.crop_center(X,i,j,14,14)
        Y_croped=self.crop_center(Y,i,j,14,14)
        return X_croped,Y_croped,i,j,key


    def __len__(self):
        return self.L

    def name(self):
        return 'IRDataset'

In [3]:
dataset=IRDataset(mode='test',balance=True)

In [9]:
data=np.array(list(GOSE.values()))

In [18]:
data[np.isnan(data)]=0

In [19]:
for i in range(0,5):
    mean=np.mean(data[:,i,:,:])
    var=np.var(data[:,i,:,:])
    print(mean,var)

422.6626917616589 489.64054904540046
442.8474696204027 1279.0312110649393
442.8474696204027 1279.0312110649393
-199.98306597322008 3892.523190931211
442.8474696204027 1279.0312110649393


In [13]:
np.isnan(data[:,0,:,:]).any()

True

In [16]:
import matplotlib.pyplot as plt