In [None]:
from datetime import timedelta
import dateutil
import glob
from intervaltree import IntervalTree
import pandas as pd
import matplotlib.cm as cm
import numpy as np
import pylab as plt
import os

%matplotlib inline

# Load Hand Picked Events

In [None]:
df_rows = []

for sat in os.listdir('./data/validation_selections/'):
    for img in os.listdir(f'./data/validation_selections/{sat}'):
        start_time = dateutil.parser.parse(img.split('_', 5)[-1].split('.')[0][:-3].replace('_', ':'))
        end_time = start_time + timedelta(minutes=5)
        
        df_rows.append((sat, start_time, end_time, img))
        
df_hand = pd.DataFrame(df_rows, columns=['sat', 'start_time', 'end_time', 'image_path'])
df_hand

# Load Algorithm Output

In [None]:
WINDOW_SIZES = [60]

thresholds = {}

for window_size in WINDOW_SIZES:
    files = glob.glob(f'data/Dec20_2015_storm_F1?_3.0_{window_size}_*.csv')
    thresholds[window_size] = sorted(set(float(f.split('_')[-1][:-4]) for f in files))

thresholds[60]

In [None]:
df_alg = {}

for window_size in WINDOW_SIZES:
    for threshold in thresholds[window_size]:
        files = glob.glob(f'data/Dec20_2015_storm_F1?_3.0_{window_size}_{threshold:.1f}.csv')
        files.sort()

        dataframes = []

        if not files:
            print(window_size, threshold, f'data/Dec20_2015_storm_F1?_3.0_{window_size}_{threshold:.1f}.csv')
        for f in files:
            sat = f.split("_")[3]
            df = pd.read_csv(f, parse_dates=['start_time', 'end_time'])
            df.insert(0, 'sat', [sat]*len(df.index))
            dataframes.append(df)

        df_alg[window_size, threshold] = pd.concat(dataframes).reset_index(drop=True)

df_alg[60,.8]

# Compute TP and FP

In [None]:
tree_hand = IntervalTree()

for _, row in df_hand.iterrows():
    tree_hand[row.start_time:row.end_time] = row.sat

tree_hand

In [None]:
df_roc = {}

for window_size in WINDOW_SIZES:
    df_rows = []

    for threshold in thresholds[window_size]:
        TP = 0
        FP = 0
        total = 0

        for _, row in df_alg[window_size, threshold].iterrows():
            results = tree_hand[row.start_time:row.end_time]

            if any(result.data == row.sat for result in results):
                TP += 1
            else:
                FP += 1

            total += 1

        if total == 0:
            df_rows.append((threshold, np.nan, np.nan, TP, FP, total))
        else:        

            total_false = timedelta(hours=24*5)/timedelta(minutes=5) - len(tree_hand)
            df_rows.append((threshold, TP/len(tree_hand), FP/total_false, TP, FP, total))

        df_roc[window_size] = pd.DataFrame(df_rows, columns=['threshold', 'TPR', 'FPR', 'TP', 'FP', 'Total_Count'])
df_roc[60]

In [None]:
plt.figure(figsize=(8, 6))
colors = cm.cool(WINDOW_SIZES/np.max(WINDOW_SIZES))

for window_size, color in zip(WINDOW_SIZES, colors):
    plt.plot(df_roc[window_size].FPR, df_roc[window_size].TPR, color=color, label='Window = %d sec' % window_size)
plt.ylabel('True Positive Rate', fontsize=20)
plt.xlabel('False Positive Rate', fontsize=20)
plt.ylim([0, 1.01])
plt.grid(linestyle='dashed', color='#cccc')
plt.xlim([0, 0.06])
plt.legend()

In [None]:
for threshold in thresholds:
    tree_alg = IntervalTree()
    for _, row in df_alg[window_size, threshold].iterrows():
        tree_alg[row.start_time:row.end_time] = row.sat
    
    print(f'Threshold {threshold:.1f} Missing')
    for _, row in df_hand.iterrows():
        results = tree_alg[row.start_time:row.end_time]
        if not any(result.data == row.sat for result in results):
            print(row.start_time, row.end_time, row.image_path)
    print()