In [1]:
import torch
import json
from PIL import Image
from models.predict import Custom_AlexNet
from torchvision.transforms import Compose, Resize, ToTensor
from torch.nn.functional import softmax
import warnings
warnings.simplefilter("ignore", Warning)
import numpy as np
import pandas as pd
from torch.utils.data import Dataset, DataLoader
import os
from sklearn.metrics import confusion_matrix
from datetime import timedelta

In [2]:
class MyJP2Dataset(Dataset):
    def __init__(self, csv_file, root_dir, transform=None):
        self.annotations = pd.read_csv(csv_file)
        self.root_dir = root_dir
        self.transform = transform

    def __getitem__(self, index):
        img_path = os.path.join(self.root_dir, self.annotations.iloc[index, 0])
        hmi = Image.open(img_path)

        if self.transform:
            image = self.transform(hmi)
            
        y_label = torch.tensor(int(self.annotations.iloc[index, 1]))
        
        return (image, y_label, img_path)

    def __len__(self):
        return len(self.annotations)

In [3]:
# CUDA for PyTorch
use_cuda = torch.cuda.is_available()
device = torch.device('cuda:0' if use_cuda else 'cpu')
torch.backends.cudnn.benchmark = True
print(device)

cuda:0


In [4]:
# Load Data
datapath = '/data/hmi_jpgs/'
partition1_path = '/data/hmi_jpgs/vids_data_labels/Fold1_val.csv'
partition2_path = '/data/hmi_jpgs/vids_data_labels/Fold2_val.csv'
partition3_path = '/data/hmi_jpgs/vids_data_labels/Fold3_val.csv'
partition4_path = '/data/hmi_jpgs/vids_data_labels/Fold4_val.csv'


transformations = Compose([
    Resize(512),
    ToTensor()
])

part1 = MyJP2Dataset(csv_file = partition1_path, 
                             root_dir = datapath,
                             transform = transformations)
part2 = MyJP2Dataset(csv_file = partition2_path, 
                             root_dir = datapath,
                             transform = transformations)
part3 = MyJP2Dataset(csv_file = partition3_path, 
                             root_dir = datapath,
                             transform = transformations)
part4 = MyJP2Dataset(csv_file = partition4_path, 
                             root_dir = datapath,
                             transform = transformations)

In [5]:
part1_loader = DataLoader(dataset=part1, batch_size=24, num_workers=4, shuffle=False)
part2_loader = DataLoader(dataset=part2, batch_size=24, num_workers=4, shuffle=False)
part3_loader = DataLoader(dataset=part3, batch_size=24, num_workers=4, shuffle=False)
part4_loader = DataLoader(dataset=part4, batch_size=24, num_workers=4, shuffle=False)

In [6]:
model_PATH1 = 'trained_models/fold1_final.pth'
model_PATH2 = 'trained_models/fold2_final.pth'
model_PATH3 = 'trained_models/fold3_final.pth'
model_PATH4 = 'trained_models/fold4_final.pth'
weights1 = torch.load(model_PATH1)
weights2 = torch.load(model_PATH2)
weights3 = torch.load(model_PATH3)
weights4 = torch.load(model_PATH4)
test_model = Custom_AlexNet().to(device)


#Generalize this
# checkpoint = torch.load(PATH)


In [7]:
def sklearn_Compatible_preds_and_targets(model_prediction_list, model_target_list, model_path_list):
    y_pred_list = []
    preds = []
    target_list = []
    tgts = []
    path_list = []
    path = []
    y_pred_list = [a.squeeze().tolist() for a in model_prediction_list]
    preds = [item for sublist in y_pred_list for item in sublist]
    target_list = [a.squeeze().tolist() for a in model_target_list]
    tgts = [item for sublist in target_list for item in sublist]
    path_list = [a for a in model_path_list]
    path = [item for sublist in path_list for item in sublist]
    return preds,tgts, path


def accuracy_score(prediction, target):
    TN, FP, FN, TP = confusion_matrix(target, prediction).ravel()
    print("TP: ", TP, "FP: ", FP, "TN: ", TN, "FN: ", FN)
    #TSS Computation also known as "recall"
    tp_rate = TP / float(TP + FN) if TP > 0 else 0  
    fp_rate = FP / float(FP + TN) if FP > 0 else 0
    TSS = tp_rate - fp_rate
    
    #HSS2 Computation
    N = TN + FP
    P = TP + FN
    HSS = (2 * (TP * TN - FN * FP)) / float((P * (FN + TN) + (TP + FP) * N))

    return TSS, HSS

In [8]:
def predict(checkpoint, test_loader, desc ):
    test_target_list=[]
    test_prediction_list=[]
    test_path_list = []
    test_model.load_state_dict(checkpoint['model_state_dict'])
    test_model.eval()
    print('***********************', desc, '*************************')
    with torch.no_grad():
        for d, t, path in test_loader:
            # Get data to cuda if possible
            d = d.to(device=device)
            t = t.to(device=device)
    #         pa = path.to(device=device)
            test_target_list.append(t)
            test_path_list.append(list(path))
    #         print(list(path))
            # forward pass
            s = test_model(d)
            #print("scores", s)

            # validation batch loss and accuracy
    #         l = criterion(s, t)
            p = softmax(s,dim=1)
    #         print(p[:,1])
            test_prediction_list.append(p[:,1])
            # accumulating the val_loss and accuracy
    #         val_loss += l.item()
            #val_acc += acc.item()
            del d,t,s,p
    a, b, c = sklearn_Compatible_preds_and_targets(test_prediction_list, test_target_list, test_path_list)
    preds = [int(i >=0.5) for i in a]
    print(accuracy_score(preds, b))
    prob_list = pd.DataFrame(
    {'timestamp': c,
     'flare_prob': a,
     'target': b
    })

    print(prob_list['target'].value_counts())
    prob_list['timestamp'] = prob_list['timestamp'].apply(lambda row: row[31:-4])
    prob_list['timestamp'] = pd.to_datetime(prob_list['timestamp'], format='%Y.%m.%d_%H.%M.%S')
    return prob_list
        


In [9]:
fold1 = predict(weights1, part1_loader, 'Fold-1 Results')
fold2 = predict(weights2, part2_loader, 'Fold-2 Results')
fold3 = predict(weights3, part3_loader, 'Fold-3 Results')
fold4 = predict(weights4, part4_loader, 'Fold-4 Results')

*********************** Fold-1 Results *************************
TP:  1328 FP:  1304 TN:  11150 FN:  1006
(0.46427497578406446, 0.4413785199896545)
0    12454
1     2334
Name: target, dtype: int64
*********************** Fold-2 Results *************************
TP:  1380 FP:  5324 TN:  8531 FN:  232
(0.47181379638277693, 0.19694653229950476)
0    13855
1     1612
Name: target, dtype: int64
*********************** Fold-3 Results *************************
TP:  1155 FP:  1738 TN:  12570 FN:  1209
(0.36710817419242225, 0.33574905430288804)
0    14308
1     2364
Name: target, dtype: int64
*********************** Fold-4 Results *************************
TP:  2443 FP:  5041 TN:  8991 FN:  247
(0.5489281535989963, 0.319097109146011)
0    14032
1     2690
Name: target, dtype: int64


In [10]:
fold1.to_csv(r'fold1_res.csv', index=False, header=True, columns=['timestamp', 'flare_prob', 'target'])
fold2.to_csv(r'fold2_res.csv', index=False, header=True, columns=['timestamp', 'flare_prob', 'target'])
fold3.to_csv(r'fold3_res.csv', index=False, header=True, columns=['timestamp', 'flare_prob', 'target'])
fold4.to_csv(r'fold4_res.csv', index=False, header=True, columns=['timestamp', 'flare_prob', 'target'])

In [11]:
fold1

Unnamed: 0,timestamp,flare_prob,target
0,2011-01-01 00:00:00,0.005396,0
1,2011-01-01 01:00:00,0.016785,0
2,2011-01-01 02:00:00,0.018118,0
3,2011-01-01 03:00:00,0.006522,0
4,2011-01-01 04:00:00,0.009711,0
...,...,...,...
14783,2018-03-31 19:00:00,0.000375,0
14784,2018-03-31 20:00:00,0.000551,0
14785,2018-03-31 21:00:00,0.000722,0
14786,2018-03-31 22:00:00,0.000584,0


In [15]:
def averaging(df):
    lis=[]
    for i in range(len(df)):
        sel = df[((df.timestamp<=df.timestamp[i]) & (df.timestamp>(df.timestamp[i]-timedelta(hours=12))))]
        avg = sel['flare_prob'].mean()
        lis.append([str(df.timestamp[i]), avg, df.target[i]])
    df_result = pd.DataFrame(lis, columns=['time', 'prob', 'tar'])
    return df_result
    

def max_voting(df):
    lis=[]
    for i in range(len(df)):
        sel = df[((df.timestamp<=df.timestamp[i]) & (df.timestamp>(df.timestamp[i]-timedelta(hours=12))))]
        t = threshold(sel['flare_prob'])
        vals,counts = np.unique(t, return_counts=True)
        index = np.argmax(counts)
        lis.append([str(df.timestamp[i]), vals[index], df.target[i]])
    df_result = pd.DataFrame(lis, columns=['time', 'prob', 'tar'])
    return df_result
    

def weighted(df):
    lis=[]
#     weights = np.array([0.025, 0.025, 0.05, 0.05, 0.10, 0.10, 0.10, 0.10, 0.10, 0.10, 0.10, 0.15])
    weights = np.array([0.025, 0.026, 0.027, 0.0285, 0.0295, 0.0305])

    for i in range(len(df)):
        sel = df[((df.timestamp<=df.timestamp[i]) & (df.timestamp>(df.timestamp[i]-timedelta(hours=6))))]
        if sel['flare_prob'].values.size<12:
            prob = sel['flare_prob'].mean()
        else:
            temp = np.multiply(sel['flare_prob'].values, weights)
            prob = np.mean(temp)
        lis.append([str(df.timestamp[i]), prob, df.target[i]])
    df_result = pd.DataFrame(lis, columns=['time', 'prob', 'tar'])
    return df_result

def threshold(df):
    y_pred = df.to_numpy().reshape(len(df),)
    yp = np.where(y_pred >= 0.5, 1, 0)
    return yp

In [16]:
def compare_sequential(fold, func, desc):
    pred = func(fold)
    zero_ones = threshold(pred['prob'])
    TSS, HSS = accuracy_score(zero_ones, pred['tar'].to_numpy().reshape(len(pred['tar']),))
    print('************************', desc, '***************************')
    print('TSS: {:.4f} | HSS: {:.4f}'.format(TSS, HSS))
    print('\n\n')


In [17]:
#Averaging
compare_sequential(fold1, averaging, 'Averaging Fold-1')
compare_sequential(fold2, averaging, 'Averaging Fold-2')
compare_sequential(fold3, averaging, 'Averaging Fold-3')
compare_sequential(fold4, averaging, 'Averaging Fold-4')

#Max Voting
compare_sequential(fold1, max_voting, 'max_voting Fold-1')
compare_sequential(fold2, max_voting, 'max_voting Fold-2')
compare_sequential(fold3, max_voting, 'max_voting Fold-3')
compare_sequential(fold4, max_voting, 'max_voting Fold-4')

#Weighted
compare_sequential(fold1, weighted, 'weighted Fold-1')
compare_sequential(fold2, weighted, 'weighted Fold-2')
compare_sequential(fold3, weighted, 'weighted Fold-3')
compare_sequential(fold4, weighted, 'weighted Fold-4')

TP:  1380 FP:  1230 TN:  11224 FN:  954
************************ Averaging Fold-1 ***************************
TSS: 0.4925 | HSS: 0.4699



TP:  1354 FP:  5343 TN:  8512 FN:  258
************************ Averaging Fold-2 ***************************
TSS: 0.4543 | HSS: 0.1898



TP:  1140 FP:  1788 TN:  12520 FN:  1224
************************ Averaging Fold-3 ***************************
TSS: 0.3573 | HSS: 0.3249



TP:  2436 FP:  5035 TN:  8997 FN:  254
************************ Averaging Fold-4 ***************************
TSS: 0.5468 | HSS: 0.3182



TP:  1359 FP:  1196 TN:  11258 FN:  975
************************ max_voting Fold-1 ***************************
TSS: 0.4862 | HSS: 0.4682



TP:  1348 FP:  5265 TN:  8590 FN:  264
************************ max_voting Fold-2 ***************************
TSS: 0.4562 | HSS: 0.1924



TP:  1118 FP:  1722 TN:  12586 FN:  1246
************************ max_voting Fold-3 ***************************
TSS: 0.3526 | HSS: 0.3252



TP:  2424 FP:  4994 TN:  