In [1]:
import torch
import json
from PIL import Image
from model import Custom_AlexNet, Custom_VGG16, Custom_ResNet34
from torchvision.transforms import Compose, Resize, ToTensor
from torch.nn.functional import softmax
import warnings
warnings.simplefilter("ignore", Warning)
import numpy as np
import pandas as pd
from torch.utils.data import Dataset, DataLoader
import os
from sklearn.metrics import confusion_matrix
from datetime import timedelta
# from evaluation import sklearn_Compatible_preds_and_targets

In [2]:
class MyJP2Dataset(Dataset):
    def __init__(self, csv_file, root_dir, transform=None):
        self.annotations = pd.read_csv(csv_file)
        self.root_dir = root_dir
        self.transform = transform

    def __getitem__(self, index):
        img_path = os.path.join(self.root_dir, self.annotations.iloc[index, 0])
        hmi = Image.open(img_path)

        if self.transform:
            image = self.transform(hmi)
            
        y_label = torch.tensor(int(self.annotations.iloc[index, 1]))
        
        return (image, y_label, img_path)

    def __len__(self):
        return len(self.annotations)

In [3]:
# Load Data
im_size = 512
datapath = '/scratch/cpandey1/hmi_jpgs_512/'
partition1_path = '../data_labeling/data_labels/Fold1_val.csv'
partition2_path = '../data_labeling/data_labels/Fold2_val.csv'
partition3_path = '../data_labeling/data_labels/Fold3_val.csv'
partition4_path = '../data_labeling/data_labels/Fold4_val.csv'


transformations = Compose([
    Resize(im_size),
    ToTensor()
])

part1 = MyJP2Dataset(csv_file = partition1_path, 
                             root_dir = datapath,
                             transform = transformations)
part2 = MyJP2Dataset(csv_file = partition2_path, 
                             root_dir = datapath,
                             transform = transformations)
part3 = MyJP2Dataset(csv_file = partition3_path, 
                             root_dir = datapath,
                             transform = transformations)
part4 = MyJP2Dataset(csv_file = partition4_path, 
                             root_dir = datapath,
                             transform = transformations)

In [4]:
part1_loader = DataLoader(dataset=part1, batch_size=48, num_workers=4, shuffle=False)
part2_loader = DataLoader(dataset=part2, batch_size=48, num_workers=4, shuffle=False)
part3_loader = DataLoader(dataset=part3, batch_size=48, num_workers=4, shuffle=False)
part4_loader = DataLoader(dataset=part4, batch_size=48, num_workers=4, shuffle=False)

In [5]:
device = torch.device('cuda')
#Saved Models Path
model_PATH1_alex = '../../tsas_models/Model_alex_Epoch_16_fold1.pth'
model_PATH2_alex = '../../tsas_models/Model_alex_Epoch_12_fold2.pth'
model_PATH3_alex = '../../tsas_models/Model_alex_Epoch_14_fold3.pth'
model_PATH4_alex = '../../tsas_models/Model_alex_Epoch_16_fold4.pth'


model_PATH1_vgg = '../../tsas_models/Model_vgg_Epoch_40_fold1.pth'
model_PATH2_vgg = '../../tsas_models/Model_vgg_Epoch_21_fold2.pth'
model_PATH3_vgg = '../../tsas_models/Model_vgg_Epoch_17_fold3.pth'
model_PATH4_vgg = '../../tsas_models/Model_vgg_Epoch_18_fold4.pth'


model_PATH1_resnet = '../../tsas_models/Model_resnet_Epoch_27_fold1.pth'
model_PATH2_resnet = '../../tsas_models/Model_resnet_Epoch_12_fold2.pth'
model_PATH3_resnet = '../../tsas_models/Model_resnet_Epoch_30_fold3.pth'
model_PATH4_resnet = '../../tsas_models/Model_resnet_Epoch_22_fold4.pth'

#Loading Models Weight
weights1_alex = torch.load(model_PATH1_alex)
weights2_alex = torch.load(model_PATH2_alex)
weights3_alex = torch.load(model_PATH3_alex)
weights4_alex = torch.load(model_PATH4_alex)


weights1_vgg = torch.load(model_PATH1_vgg)
weights2_vgg = torch.load(model_PATH2_vgg)
weights3_vgg = torch.load(model_PATH3_vgg)
weights4_vgg = torch.load(model_PATH4_vgg)

weights1_resnet = torch.load(model_PATH1_resnet)
weights2_resnet = torch.load(model_PATH2_resnet)
weights3_resnet = torch.load(model_PATH3_resnet)
weights4_resnet = torch.load(model_PATH4_resnet)

#Defining Models Arch.
test_model_alex = Custom_AlexNet(ipt_size=(512, 512), train=False).to(device)
test_model_vgg = Custom_VGG16(ipt_size=(512, 512), train=False).to(device)
test_model_resnet = Custom_ResNet34(ipt_size=(512, 512), train=False).to(device)

In [6]:
def sklearn_Compatible_preds_and_targets(model_prediction_list, model_target_list, model_path_list):
    y_pred_list = []
    preds = []
    target_list = []
    tgts = []
    path_list = []
    path = []
    y_pred_list = [a.squeeze().tolist() for a in model_prediction_list]
    preds = [item for sublist in y_pred_list for item in sublist]
    target_list = [a.squeeze().tolist() for a in model_target_list]
    tgts = [item for sublist in target_list for item in sublist]
    path_list = [a for a in model_path_list]
    path = [item for sublist in path_list for item in sublist]
    return preds,tgts, path


def accuracy_score(prediction, target):
    TN, FP, FN, TP = confusion_matrix(target, prediction).ravel()
    print("TP: ", TP, "FP: ", FP, "TN: ", TN, "FN: ", FN)
    #TSS Computation also known as "recall"
    tp_rate = TP / float(TP + FN) if TP > 0 else 0  
    fp_rate = FP / float(FP + TN) if FP > 0 else 0
    TSS = tp_rate - fp_rate
    
    #HSS2 Computation
    N = TN + FP
    P = TP + FN
    HSS = (2 * (TP * TN - FN * FP)) / float((P * (FN + TN) + (TP + FP) * N))

    return TSS, HSS

In [7]:
def predict(checkpoint, test_model, test_loader, desc ):
    test_target_list=[]
    test_prediction_list=[]
    test_path_list = []
    test_model.load_state_dict(checkpoint['model_state_dict'])
    test_model.eval()
    print('***********************', desc, '*************************')
    with torch.no_grad():
        for d, t, path in test_loader:
            # Get data to cuda if possible
            d = d.to(device=device)
            t = t.to(device=device)
    #         pa = path.to(device=device)
            test_target_list.append(t)
            test_path_list.append(list(path))
    #         print(list(path))
            # forward pass
            s = test_model(d)
            #print("scores", s)

            # validation batch loss and accuracy
    #         l = criterion(s, t)
            p = softmax(s,dim=1)
    #         print(p[:,1])
            test_prediction_list.append(p[:,1])
            # accumulating the val_loss and accuracy
    #         val_loss += l.item()
            #val_acc += acc.item()
            del d,t,s,p
    a, b, c = sklearn_Compatible_preds_and_targets(test_prediction_list, test_target_list, test_path_list)
    preds = [int(i >=0.5) for i in a]
    print(accuracy_score(preds, b))
    prob_list = pd.DataFrame(
    {'timestamp': c,
     'flare_prob': a,
     'target': b
    })

    print(prob_list['target'].value_counts())
#     prob_list['timestamp'] = prob_list['timestamp'].apply(lambda row: row[35:-4])
#     prob_list['timestamp'] = pd.to_datetime(prob_list['timestamp'], format='%Y.%m.%d_%H.%M.%S')
    return prob_list

In [8]:
print('*********************** ALEXNET *************************')
fold1_alex = predict(weights1_alex, test_model_alex, part1_loader, 'Fold-1 Results')
fold2_alex = predict(weights2_alex, test_model_alex, part2_loader, 'Fold-2 Results')
fold3_alex = predict(weights3_alex, test_model_alex, part3_loader, 'Fold-3 Results')
fold4_alex = predict(weights4_alex, test_model_alex, part4_loader, 'Fold-4 Results')

print('*********************** VGG16 *************************')
fold1_vgg = predict(weights1_vgg, test_model_vgg, part1_loader, 'Fold-1 Results')
fold2_vgg = predict(weights2_vgg, test_model_vgg, part2_loader, 'Fold-2 Results')
fold3_vgg = predict(weights3_vgg, test_model_vgg, part3_loader, 'Fold-3 Results')
fold4_vgg = predict(weights4_vgg, test_model_vgg, part4_loader, 'Fold-4 Results')

print('*********************** ResNet34 *************************')
fold1_resnet = predict(weights1_resnet, test_model_resnet, part1_loader, 'Fold-1 Results')
fold2_resnet = predict(weights2_resnet, test_model_resnet, part2_loader, 'Fold-2 Results')
fold3_resnet = predict(weights3_resnet, test_model_resnet, part3_loader, 'Fold-3 Results')
fold4_resnet = predict(weights4_resnet, test_model_resnet, part4_loader, 'Fold-4 Results')

*********************** ALEXNET *************************
*********************** Fold-1 Results *************************
TP:  1729 FP:  2225 TN:  10229 FN:  605
(0.5621308867360248, 0.43847814062565577)
0    12454
1     2334
Name: target, dtype: int64
*********************** Fold-2 Results *************************
TP:  1075 FP:  2298 TN:  11557 FN:  537
(0.5010127490232495, 0.3379136966876905)
0    13855
1     1612
Name: target, dtype: int64
*********************** Fold-3 Results *************************
TP:  1660 FP:  3291 TN:  11017 FN:  704
(0.47218847903531064, 0.3241356703323698)
0    14308
1     2364
Name: target, dtype: int64
*********************** Fold-4 Results *************************
TP:  2209 FP:  3549 TN:  10483 FN:  481
(0.5682676982616472, 0.3889743690364631)
0    14032
1     2690
Name: target, dtype: int64
*********************** VGG16 *************************
*********************** Fold-1 Results *************************
TP:  1704 FP:  2067 TN:  10387 FN:  630

In [10]:
def save_results(fold, fold_name, model_name):
    fold.to_csv(f'results/{fold_name}_{model_name}.csv', index=False, header=True, columns=['timestamp', 'flare_prob', 'target'])

save_results(fold1_alex, 'fold1', 'alex')
save_results(fold2_alex, 'fold2', 'alex')
save_results(fold3_alex, 'fold3', 'alex')
save_results(fold4_alex, 'fold4', 'alex')
save_results(fold1_vgg, 'fold1', 'vgg')
save_results(fold2_vgg, 'fold2', 'vgg')
save_results(fold3_vgg, 'fold3', 'vgg')
save_results(fold4_vgg, 'fold4', 'vgg')
save_results(fold1_resnet, 'fold1', 'resnet')
save_results(fold2_resnet, 'fold2', 'resnet')
save_results(fold3_resnet, 'fold3', 'resnet')
save_results(fold4_resnet, 'fold4', 'resnet')

In [15]:
details = pd.read_csv('M_full_dataset_cleaned_1_hours_with_loc_and_time_new.csv')
details['timestamp'] = details['label'].apply(lambda row: row[16:-4])
details['timestamp'] = pd.to_datetime(details['timestamp'], format='%Y.%m.%d_%H.%M.%S')
details.drop(columns=['label'], inplace=True)

def aggregateAndAddLocation(fold_name1,fold_name2,fold_name3,fold_name4, model_name):
    fold1_val = pd.read_csv(f'results/{fold_name1}_{model_name}.csv')
    fold2_val = pd.read_csv(f'results/{fold_name2}_{model_name}.csv')
    fold3_val = pd.read_csv(f'results/{fold_name3}_{model_name}.csv')
    fold4_val = pd.read_csv(f'results/{fold_name4}_{model_name}.csv')
    total = pd.concat([fold1_val, fold2_val, fold3_val, fold4_val])
    total['timestamp'] = total['timestamp'].apply(lambda row: row[47:-4])
    total['timestamp'] =  pd.to_datetime(total['timestamp'], format='%Y.%m.%d_%H.%M.%S')
    total.reset_index(inplace=True)
    df = total.merge(details, how='left', on='timestamp')
    return df

alex_results = aggregateAndAddLocation('fold1', 'fold2', 'fold3', 'fold4' ,'alex')
vgg_results = aggregateAndAddLocation('fold1', 'fold2', 'fold3', 'fold4' ,'vgg')
resnet_results = aggregateAndAddLocation('fold1', 'fold2', 'fold3', 'fold4' ,'resnet')
# resnet_results

In [18]:
def location_analysis(df, flareclass):
    X = df.loc[(df.goes_class.str.startswith(flareclass))].copy()
    X[["x", "y"]] = X["fl_location"].str.strip(r"[()]").str.split(",", expand=True).astype(str)
    X['x'] = pd.to_numeric(X['x']).round(decimals=2).astype(str).replace(r'\.0$', '', regex=True)
    X[["x", "y"]] = X[['x', 'y']].astype(float)
    pos = X[(X.flare_prob>=0.5)]
    neg = X[(X.flare_prob<0.5)]
    pos_limb = len(pos.loc[(pos.x<-70) | (pos.x>70)])+len(pos.loc[(pos.y<-70) | (pos.y>70)])
    neg_limb = len(neg.loc[(neg.x<-70) | (neg.x>70)])+ len(neg.loc[(neg.y<-70) | (neg.y>70)])
    pos_center = len(pos.loc[(pos.y>=-70) & (pos.y<=70) & (pos.x>=-70) & (pos.x<=70)])
    neg_center = len(neg.loc[(neg.y>=-70) & (neg.y<=70) & (neg.x>=-70) & (neg.x<=70)])
    print(f'*************{flareclass} Class flares locations***************')
    print("Total Instances: ", len(X))
    print('With in Central Locations')
    print('TP: ', pos_center, 'FN: ', neg_center)
    print('Beyond Central Locations (Limb Locations)')
    print('TP: ', pos_limb, 'FN: ', neg_limb, '\n')
    
print('\n*****************************AlexNet*****************************')
location_analysis(alex_results.copy(), 'X')
location_analysis(alex_results.copy(), 'M')

print('\n*****************************VGG16*****************************')
location_analysis(vgg_results.copy(), 'X')
location_analysis(vgg_results.copy(), 'M')

print('\n*****************************ResNet*****************************')
location_analysis(resnet_results.copy(), 'X')
location_analysis(resnet_results.copy(), 'M')



*****************************AlexNet*****************************
*************X Class flares locations***************
Total Instances:  880
With in Central Locations
TP:  614 FN:  54
Beyond Central Locations (Limb Locations)
TP:  138 FN:  74 

*************M Class flares locations***************
Total Instances:  8120
With in Central Locations
TP:  4645 FN:  1185
Beyond Central Locations (Limb Locations)
TP:  1276 FN:  1014 


*****************************VGG16*****************************
*************X Class flares locations***************
Total Instances:  880
With in Central Locations
TP:  560 FN:  108
Beyond Central Locations (Limb Locations)
TP:  165 FN:  47 

*************M Class flares locations***************
Total Instances:  8120
With in Central Locations
TP:  4473 FN:  1357
Beyond Central Locations (Limb Locations)
TP:  1273 FN:  1017 


*****************************ResNet*****************************
*************X Class flares locations***************
Total Instances:  

In [19]:
def date_to_filename(df):
    cols=['timestamp']
    for items in cols:

        df[items] = pd.to_datetime(df[items], format='%Y-%m-%d %H:%M:%S')

        #Renaming label(Date) to this format of file HMI.m2010.05.21_12.00.00 
        df[items] = df[items].dt.year.astype(str) + '/' \
            + df[items].dt.month.map("{:02}".format).astype(str) + '/'\
            + df[items].dt.day.map("{:02}".format).astype(str) + '/'+ 'HMI.m'+ df[items].dt.year.astype(str) + '.' \
            + df[items].dt.month.map("{:02}".format).astype(str) + '.'\
            + df[items].dt.day.map("{:02}".format).astype(str) + '_' \
            + df[items].dt.hour.map("{:02}".format).astype(str) + '.'\
            + df[items].dt.minute.map("{:02}".format).astype(str) + '.'\
            + df[items].dt.second.map("{:02}".format).astype(str) + '.jpg'
    
    return df

In [25]:
def save_results_final(df, modelname):
    new_df = df.copy()
    df_x = new_df.loc[(new_df.goes_class.str.startswith('X'))]
    df_m = new_df.loc[(new_df.goes_class.str.startswith('M'))]
    df_c = new_df.loc[(new_df.goes_class.str.startswith('C'))]
    cols = ['timestamp', 'flare_prob', 'goes_class', 'fl_location', 'flare_start']
    df_x.to_csv(f'results/{modelname}_x_class.csv', index=False, header=True, columns=cols)
    df_m.to_csv(f'results/{modelname}_m_class.csv', index=False, header=True, columns=cols)
    df_c.to_csv(f'results/{modelname}_c_class.csv', index=False, header=True, columns=cols)
save_results_final(alex_results, 'alex')
save_results_final(vgg_results, 'vgg')
save_results_final(resnet_results, 'resnet')

In [None]:

new_df = date_to_filename(df)
df_x = new_df.loc[(new_df.goes_class.str.startswith('X'))]
df_m = new_df.loc[(new_df.goes_class.str.startswith('M'))]
df_c = new_df.loc[(new_df.goes_class.str.startswith('C')) & (new_df.flare_prob>=0.8)]
cols = ['timestamp', 'flare_prob', 'goes_class', 'fl_location', 'flare_start']
df_x.to_csv(r'x_class.csv', index=False, header=True, columns=cols)
df_m.to_csv(r'm_class.csv', index=False, header=True, columns=cols)
df_c.to_csv(r'c_class.csv', index=False, header=True, columns=cols)