In [3]:
import os
import pandas as pd
import math
from sklearn.metrics import confusion_matrix

def print_filenames(directory):
    # Iterate over all files in the directory
    for filename in os.listdir(directory):
        # Check if the current item is a file
        if os.path.isfile(os.path.join(directory, filename)):
            print(filename)

# Specify the directory path
directory_path = "preds/"

# Call the function to print filenames
print_filenames(directory_path)

val_bce_resnet.csv
val_bcesf_resnet.csv
test_bce_resnet.csv
test_bcesf_resnet.csv


In [11]:
def accuracy_score(prediction, target):
    TN, FP, FN, TP = confusion_matrix(target, prediction).ravel()
    # TSS Computation also known as "recall"
    tp_rate = TP / float(TP + FN) if TP > 0 else 0  
    fp_rate = FP / float(FP + TN) if FP > 0 else 0
    TSS = tp_rate - fp_rate
    
    # HSS2 Computation
    N = TN + FP
    P = TP + FN
    HSS = (2 * (TP * TN - FN * FP)) / float((P * (FN + TN) + (TP + FP) * N))
    
    geomean = math.sqrt(abs(TSS) * abs(HSS))
#     geomean = (2 * TSS * HSS)/(TSS+HSS)

    return TSS, HSS, geomean

def find_optimal_threshold(file_path):
    data = pd.read_csv(file_path)
    thresholds = [i / 100 for i in range(101)]

    best_geomean = 0
    best_threshold = None
    best_TSS = None
    best_HSS = None

    for threshold in thresholds:
        prediction = (data['flare_prob'] > threshold).astype(int)
        TSS, HSS, geomean = accuracy_score(prediction, data['target'])
        if geomean > best_geomean:
            best_geomean = geomean
            best_threshold = threshold
            best_TSS = TSS
            best_HSS = HSS

    return best_threshold, best_TSS, best_HSS, best_geomean

def main():
    preds_dir = 'preds/'
    loss_functions = ['bce', 'bcesf']
    models = ['resnet']
    data = []

    for model in models:
        for loss_function in loss_functions:
            val_file_path = os.path.join(preds_dir, f'val_{loss_function}_{model}.csv')
            test_file_path = os.path.join(preds_dir, f'test_{loss_function}_{model}.csv')
            
            val_optimal_threshold, val_TSS, val_HSS, val_geomean = find_optimal_threshold(val_file_path)
            test_TSS, test_HSS, test_geomean = accuracy_score((pd.read_csv(test_file_path)['flare_prob'] > val_optimal_threshold).astype(int), pd.read_csv(test_file_path)['target'])

            data.append({
                'Model': model,
                'Loss Function': loss_function,
                'Val Optimal Threshold': val_optimal_threshold,
                'Val TSS': val_TSS,
                'Val HSS': val_HSS,
                'Val Geomean': val_geomean,
                'Test TSS': test_TSS,
                'Test HSS': test_HSS,
                'Test Geomean': test_geomean
            })

    df = pd.DataFrame(data)
    return df
#     df.to_csv('evaluation_results.csv', index=False)
#     print("Evaluation results saved to evaluation_results.csv")

if __name__ == "__main__":
    df = main()
    df

In [12]:
 df

Unnamed: 0,Model,Loss Function,Val Optimal Threshold,Val TSS,Val HSS,Val Geomean,Test TSS,Test HSS,Test Geomean
0,resnet,bce,0.69,0.587022,0.304834,0.423018,0.504311,0.315022,0.398584
1,resnet,bcesf,0.69,0.592184,0.289324,0.413924,0.565432,0.326887,0.429921


In [15]:
import os
import pandas as pd
import math
from sklearn.metrics import confusion_matrix

def accuracy_score(prediction, target):
    TN, FP, FN, TP = confusion_matrix(target, prediction).ravel()
    # TSS Computation also known as "recall"
    tp_rate = TP / float(TP + FN) if TP > 0 else 0  
    fp_rate = FP / float(FP + TN) if FP > 0 else 0
    TSS = tp_rate - fp_rate
    
    # HSS2 Computation
    N = TN + FP
    P = TP + FN
    HSS = (2 * (TP * TN - FN * FP)) / float((P * (FN + TN) + (TP + FP) * N))
    
    geomean = math.sqrt(abs(TSS) * abs(HSS))

    return TSS, HSS, geomean

def find_optimal_threshold(data, threshold_range):
    best_geomean = -float('inf')
    best_threshold = None

    for threshold in threshold_range:
        prediction = (data['flare_prob'] > threshold).astype(int)
        TSS, HSS, geomean = accuracy_score(prediction, data['target'])
        if geomean > best_geomean:
            best_geomean = geomean
            best_threshold = threshold

    return best_threshold

def main():
    preds_dir = 'preds/'
    loss_functions = ['bce', 'bcesf']
    models = ['resnet']
    threshold_range = [i / 100 for i in range(101)]  # Threshold values from 0 to 1 with an increment of 0.05

    results = []

    for model in models:
        for loss_function in loss_functions:
            val_file_path = os.path.join(preds_dir, f'val_{loss_function}_{model}.csv')
            test_file_path = os.path.join(preds_dir, f'test_{loss_function}_{model}.csv')
            
            val_data = pd.read_csv(val_file_path)
            test_data = pd.read_csv(test_file_path)
            
            # Calculate optimal threshold for the entire validation set
            val_optimal_threshold = find_optimal_threshold(val_data, threshold_range)
            
            # Apply the same threshold to each lon_fwt range in both validation and test sets
            for lon_range in [(-30, 30), (-60, 60), (-90, 90)]:
                val_data_subset = val_data[(val_data['lon_fwt'] >= lon_range[0]) & (val_data['lon_fwt'] <= lon_range[1])]
                test_data_subset = test_data[(test_data['lon_fwt'] >= lon_range[0]) & (test_data['lon_fwt'] <= lon_range[1])]
                
                val_TSS, val_HSS, val_geomean = accuracy_score((val_data_subset['flare_prob'] > val_optimal_threshold).astype(int), val_data_subset['target'])
                test_TSS, test_HSS, test_geomean = accuracy_score((test_data_subset['flare_prob'] > val_optimal_threshold).astype(int), test_data_subset['target'])

                results.append({
                    'Model': model,
                    'Loss Function': loss_function,
                    'lon_fwt Range': lon_range,
                    'Optimal Validation Threshold': val_optimal_threshold,
                    'TSS (Validation)': val_TSS,
                    'HSS (Validation)': val_HSS,
                    'Geomean (Validation)': val_geomean,
                    'TSS (Test)': test_TSS,
                    'HSS (Test)': test_HSS,
                    'Geomean (Test)': test_geomean
                })

    results_df = pd.DataFrame(results)
    return results_df

if __name__ == "__main__":
    n_results_df = main()
    


In [19]:
n_results_df.to_csv('overall_Result.csv', index=False, header=True)

In [17]:
import os
import pandas as pd
import math
from sklearn.metrics import confusion_matrix

def accuracy_score(prediction, target):
    TN, FP, FN, TP = confusion_matrix(target, prediction).ravel()
    # TSS Computation also known as "recall"
    tp_rate = TP / float(TP + FN) if TP > 0 else 0  
    fp_rate = FP / float(FP + TN) if FP > 0 else 0
    TSS = tp_rate - fp_rate
    
    # HSS2 Computation
    N = TN + FP
    P = TP + FN
    HSS = (2 * (TP * TN - FN * FP)) / float((P * (FN + TN) + (TP + FP) * N))
    
    geomean = math.sqrt(abs(TSS) * abs(HSS))

    return TSS, HSS, geomean

def find_optimal_threshold(data, threshold_range):
    best_geomean = -float('inf')
    best_threshold = None

    for threshold in threshold_range:
        prediction = (data['flare_prob'] > threshold).astype(int)
        TSS, HSS, geomean = accuracy_score(prediction, data['target'])
        if geomean > best_geomean:
            best_geomean = geomean
            best_threshold = threshold

    return best_threshold

def main():
    preds_dir = 'preds/'
    loss_functions = ['bce', 'bcesf']
    models = ['resnet']
    threshold_range = [i / 100 for i in range(101)]  # Threshold values from 0 to 1 with an increment of 0.05

    results = []

    lon_ranges = [[(-30, 30)], [(-60, -30), (30, 60)], [(-90, -60), (60, 90)]]

    for model in models:
        for loss_function in loss_functions:
            val_file_path = os.path.join(preds_dir, f'val_{loss_function}_{model}.csv')
            test_file_path = os.path.join(preds_dir, f'test_{loss_function}_{model}.csv')
            
            val_data = pd.read_csv(val_file_path)
            test_data = pd.read_csv(test_file_path)
            
            # Calculate optimal threshold for the entire validation set
            val_optimal_threshold = find_optimal_threshold(val_data, threshold_range)
            
            # Apply the same threshold to each lon_fwt range in both validation and test sets
            for lon_range_group in lon_ranges:
                val_subset = pd.concat([val_data[(val_data['lon_fwt'] >= lon_range[0]) & (val_data['lon_fwt'] <= lon_range[1])] for lon_range in lon_range_group])
                test_subset = pd.concat([test_data[(test_data['lon_fwt'] >= lon_range[0]) & (test_data['lon_fwt'] <= lon_range[1])] for lon_range in lon_range_group])
                
                val_TSS, val_HSS, val_geomean = accuracy_score((val_subset['flare_prob'] > val_optimal_threshold).astype(int), val_subset['target'])
                test_TSS, test_HSS, test_geomean = accuracy_score((test_subset['flare_prob'] > val_optimal_threshold).astype(int), test_subset['target'])

                results.append({
                    'Model': model,
                    'Loss Function': loss_function,
                    'lon_fwt Range': lon_range_group,
                    'Optimal Validation Threshold': val_optimal_threshold,
                    'TSS (Validation)': val_TSS,
                    'HSS (Validation)': val_HSS,
                    'Geomean (Validation)': val_geomean,
                    'TSS (Test)': test_TSS,
                    'HSS (Test)': test_HSS,
                    'Geomean (Test)': test_geomean
                })

    results_df = pd.DataFrame(results)
    return results_df

if __name__ == "__main__":
    Zone_df = main()
#     print(n_results_df)


In [20]:
Zone_df.to_csv('zone.csv', index=False, header=True)