In [1]:
import pandas as pd
import urllib.request
import datetime as dt
import requests
from bs4 import BeautifulSoup
from datetime import datetime, timedelta
from haversine import haversine, Unit
from shapely.geometry import Point
import geopandas as gpd
from geopandas import GeoDataFrame
import pytz
from sklearn.metrics import accuracy_score
import numpy as np
from prettytable import PrettyTable
from tqdm import tqdm
pd.set_option('display.max_columns', None)
p = print

### Initial Setup, Dataframe Creation

In [2]:
# rounds timestamps to nearest minute on the dot
def round_secs(x):
    x = x + timedelta(minutes = 1)
    x = x.replace(second=0)
    return x

# determines if a WFABBA detection is within the same direction as the camera
def is_in_camera_direction(camera_geometry_pt, direction, wfabba_geometry_pt):
    if direction == "north":
        # Has to be true for the image to be in front of the camera
        return wfabba_geometry_pt.y >= camera_geometry_pt.y
    elif direction == "south":
        return wfabba_geometry_pt.y <= camera_geometry_pt.y
    elif direction == "east":
        return wfabba_geometry_pt.x >= camera_geometry_pt.x
    elif direction == "west":
        return wfabba_geometry_pt.x <= camera_geometry_pt.x
    else:
        # unknown or something else
        pass
    
# finds any matches with specified WFABBA dataset based off of 
# whether distance to camera is within specified radius & camera direction
def matches_distance_prox(camera_geometry, direction, radius_miles, wfabba_df):    
    wfabba_df["distance_m"] = wfabba_df["geometry"].distance(camera_geometry)
    wfabba_df["distance_mi"] = wfabba_df["distance_m"]/1609.344        
    match_results_df = wfabba_df[(wfabba_df["distance_mi"] <= radius_miles)].copy()
    
    #filter for detections within same direction
    match_results_df["is_in_direction"] = match_results_df.apply(
        lambda row: is_in_camera_direction(camera_geometry, direction, row["geometry"]), axis=1
    )
    match_results_df = match_results_df[match_results_df["is_in_direction"] == True]

    return match_results_df

In [3]:
# definte the processed and raw data directories
processed_data_dir = "../../data/processed/wfabba/"
raw_data_dir = "../../data/raw/"
# read in GOES 16 inputs
wfabba_goes_16_2019_df = pd.read_csv(processed_data_dir + "GOES-16-2019.csv")
wfabba_goes_16_2020_df = pd.read_csv(processed_data_dir + "GOES-16-2020.csv")
wfabba_goes_16_jan_2021_df = pd.read_csv(processed_data_dir + "GOES-16-Jan-2021.csv")
wfabba_goes_16_2021_df = pd.read_csv(processed_data_dir + "GOES-16-2021.csv")
wfabba_goes_16_2022_df = pd.read_csv(processed_data_dir + "GOES-16-2022.csv")
# get rid of unnecessary columns including ones which contain the same values or all NaN
wfabba_goes_16_2019_df = wfabba_goes_16_2019_df.drop(columns = ["Unnamed: 0", "Algorithm","Instrument","DataSource", "DataCreationTimestamp", "NavProjSubPtLong", "ActualSatSubPtLong", "NumFire", "Line", "Element"])
wfabba_goes_16_2020_df = wfabba_goes_16_2020_df.drop(columns = ["Unnamed: 0", "Algorithm","Instrument","DataSource", "DataCreationTimestamp", "NavProjSubPtLong", "ActualSatSubPtLong", "NumFire", "Line", "Element"])
wfabba_goes_16_jan_2021_df = wfabba_goes_16_jan_2021_df.drop(columns = ["Unnamed: 0", "Algorithm","Instrument","DataSource", "DataCreationTimestamp", "NavProjSubPtLong", "ActualSatSubPtLong", "NumFire", "Line", "Element"])
wfabba_goes_16_2021_df = wfabba_goes_16_2021_df.drop(columns = ["Unnamed: 0", "Algorithm","Instrument","DataSource", "DataCreationTimestamp", "NavProjSubPtLong", "ActualSatSubPtLong", "NumFire", "Line", "Element"]) #2021 detections
wfabba_goes_16_2022_df = wfabba_goes_16_2022_df.drop(columns = ["Unnamed: 0", "Algorithm","Instrument","DataSource", "DataCreationTimestamp", "NavProjSubPtLong", "ActualSatSubPtLong", "NumFire", "Line", "Element"]) #2022 detections
# filter out any January data in wfabba_goes_16_2021_df since it already exists in wfabba_goes_16_jan_2021_df
print(len(wfabba_goes_16_2021_df))
wfabba_goes_16_2021_df = wfabba_goes_16_2021_df[wfabba_goes_16_2021_df["Timestamp"] >= "2021-02-01"]
wfabba_goes_16_2021_df = wfabba_goes_16_2021_df.reset_index()
wfabba_goes_16_2021_df = wfabba_goes_16_2021_df.drop(columns=["index"])
print(len(wfabba_goes_16_2021_df))
# wfabba_goes_16_2021_df
# join all GOES-16 dataframes into unified wfabba_goes_16_df
wfabba_goes_16_df = pd.concat([wfabba_goes_16_2019_df, wfabba_goes_16_2020_df, wfabba_goes_16_jan_2021_df, wfabba_goes_16_2021_df, wfabba_goes_16_2022_df])
wfabba_goes_16_df["timestamp_converted"] = pd.to_datetime(wfabba_goes_16_df["Timestamp"], infer_datetime_format=True, origin="unix", utc=True)
wfabba_goes_16_df = wfabba_goes_16_df.reset_index()
wfabba_goes_16_df = wfabba_goes_16_df.drop(columns=["index"])
# wfabba_goes_16_df
#read in GOES 17 inputs
wfabba_goes_17_2019_df = pd.read_csv(processed_data_dir + "GOES-17-2019.csv")
wfabba_goes_17_2020_df = pd.read_csv(processed_data_dir + "GOES-17-2020.csv")
wfabba_goes_17_jan_2021_df = pd.read_csv(processed_data_dir + "GOES-17-Jan-2021.csv")
wfabba_goes_17_2021_df = pd.read_csv(processed_data_dir + "GOES-17-2021.csv")
wfabba_goes_17_2022_df = pd.read_csv(processed_data_dir + "GOES-17-2022.csv")
#get rid of unnecessary columns including ones which contain the same values or all NaN
wfabba_goes_17_2019_df = wfabba_goes_17_2019_df.drop(columns = ["Unnamed: 0", "Algorithm","Instrument","DataSource", "DataCreationTimestamp", "NavProjSubPtLong", "ActualSatSubPtLong", "NumFire", "Line", "Element"])
wfabba_goes_17_2020_df = wfabba_goes_17_2020_df.drop(columns = ["Unnamed: 0", "Algorithm","Instrument","DataSource", "DataCreationTimestamp", "NavProjSubPtLong", "ActualSatSubPtLong", "NumFire", "Line", "Element"])
wfabba_goes_17_jan_2021_df = wfabba_goes_17_jan_2021_df.drop(columns = ["Unnamed: 0", "Algorithm","Instrument","DataSource", "DataCreationTimestamp", "NavProjSubPtLong", "ActualSatSubPtLong", "NumFire", "Line", "Element"])
wfabba_goes_17_2021_df = wfabba_goes_17_2021_df.drop(columns = ["Unnamed: 0", "Algorithm","Instrument","DataSource", "DataCreationTimestamp", "NavProjSubPtLong", "ActualSatSubPtLong", "NumFire", "Line", "Element"]) #2021 detections
wfabba_goes_17_2022_df = wfabba_goes_17_2022_df.drop(columns = ["Unnamed: 0", "Algorithm","Instrument","DataSource", "DataCreationTimestamp", "NavProjSubPtLong", "ActualSatSubPtLong", "NumFire", "Line", "Element"]) #2022 detections
# filter out any January data in wfabba_goes_17_2021_df since it already exists in wfabba_goes_17_jan_2021_df
print(len(wfabba_goes_17_2021_df))
wfabba_goes_17_2021_df = wfabba_goes_17_2021_df[wfabba_goes_17_2021_df["Timestamp"] >= "2021-02-01"]
wfabba_goes_17_2021_df = wfabba_goes_17_2021_df.reset_index()
wfabba_goes_17_2021_df = wfabba_goes_17_2021_df.drop(columns=["index"])
print(len(wfabba_goes_17_2021_df))
# wfabba_goes_17_2021_df
# join all GOES-17 dataframes into unified wfabba_goes_17_df
wfabba_goes_17_df = pd.concat([wfabba_goes_17_2019_df, wfabba_goes_17_2020_df, wfabba_goes_17_jan_2021_df, wfabba_goes_17_2021_df, wfabba_goes_17_2022_df])
wfabba_goes_17_df["timestamp_converted"] = pd.to_datetime(wfabba_goes_17_df["Timestamp"], infer_datetime_format=True, origin="unix", utc=True)
wfabba_goes_17_df = wfabba_goes_17_df.reset_index()
wfabba_goes_17_df = wfabba_goes_17_df.drop(columns=["index"])
# wfabba_goes_17_df
#convert WFABBA GOES 16 coordinates from EPSG 4326 to EPSG 3310
coords = [Point(xy) for xy in zip(wfabba_goes_16_df['Longitude'], wfabba_goes_16_df['Latitude'])]
wfabba_goes_16_df = GeoDataFrame(wfabba_goes_16_df, crs = "EPSG:4326", geometry = coords) 
wfabba_goes_16_df = wfabba_goes_16_df.to_crs('EPSG:3310')
# wfabba_goes_16_df[["Latitude","Longitude","geometry"]]
#convert WFABBA GOES 17 coordinates from EPSG 4326 to EPSG 3310
coords = [Point(xy) for xy in zip(wfabba_goes_17_df['Longitude'], wfabba_goes_17_df['Latitude'])]
wfabba_goes_17_df = GeoDataFrame(wfabba_goes_17_df, crs = "EPSG:4326", geometry = coords) 
wfabba_goes_17_df = wfabba_goes_17_df.to_crs('EPSG:3310')
# wfabba_goes_17_df[["Latitude","Longitude","geometry"]]
# read in camera metadata
camera_metadata_df = pd.read_csv("../../data/processed/camera_metadata_hpwren.csv")
# camera_metadata_df
# read in camera metadata
camera_metadata_df = pd.read_csv("../../data/processed/camera_image_id_mappings.csv")
# camera_metadata_df
#Create dataframe for every minute of specified time period
times = []
start = datetime(2019, 6 , 1, 0, 0, 0, 0, pytz.UTC)
end = datetime(2021, 7, 11, 23, 59, 0, 0, pytz.UTC)

while start <= end:
    times.append(start)
    start = start + timedelta(minutes = 1)

minutes_df = pd.DataFrame(times, columns = ["timestamp"])
# minutes_df
# Create testing SmokeyNet df
df_test = pd.read_json(raw_data_dir + "smokeynet_test.json", orient="index").reset_index().rename(columns={"index":"filepath"})
df_test["type"] = "test"
# df_test
#Create validating SmokeyNet df
df_valid = pd.read_json(raw_data_dir + "smokeynet_valid.json", orient="index").reset_index().rename(columns={"index":"filepath"})
df_valid["type"] = "valid"
# df_valid
#Join the SmokeyNet DFs together. For now just joining validation and test DFs
df_labels = pd.concat([df_test, df_valid]).reset_index().drop(columns = ["index"])
# df_labels
# set the date and year columns
df_labels["date"] = df_labels["camera_name"].str.split("_", n=1, expand=True)[0]
df_labels["year"] = df_labels["date"].str[:4]
df_labels["date"] = pd.to_datetime(df_labels["date"])
# df_labels
# keeping only entries from 2019-06-01 onwards
df_labels_filtered = df_labels[df_labels["date"] >= "2019-06-01"].reset_index().drop(columns=["index"])
# df_labels_filtered
#create time, datetime, event_name, camera_name attributes
df_labels_filtered["time"] = df_labels_filtered["filepath"].str.split("/").str[1]
df_labels_filtered["time"] = df_labels_filtered["time"].str.split("_").str[0]
df_labels_filtered["datetime"] = pd.to_datetime(df_labels_filtered["time"], unit="s", origin="unix", utc=True)
df_labels_filtered["event_name"] = df_labels_filtered["filepath"].str.split("/").str[0]
df_labels_filtered["camera_name"] = df_labels_filtered["camera_name"].str.split("_").str[-1]
# df_labels_filtered
# join SmokeyNet data with camera metadata
df_labels_filtered = df_labels_filtered.merge(camera_metadata_df, left_on="camera_name", right_on="image_id", how="left")
# df_labels_filtered
# convert joined SmokeyNet-camera metadata dataframe's coordinates from EPSG 4326 to EPSG 3310
coords = [Point(xy) for xy in zip(df_labels_filtered['long'], df_labels_filtered['lat'])]
df_labels_filtered = GeoDataFrame(df_labels_filtered, crs = "EPSG:4326", geometry = coords) 
df_labels_filtered = df_labels_filtered.to_crs('EPSG:3310')
# df_labels_filtered
# round the SmokeyNet timestamps to nearest minute on the dot
df_labels_filtered["datetime_rounded"] = df_labels_filtered["datetime"].apply(lambda x: round_secs(x))
# df_labels_filtered
# get all unique cameras being considered
unique_cameras = df_labels_filtered["camera_name"].unique()
# unique_cameras
# if there are cameras that don't have associated directions, filter them out
unusable_cameras = df_labels_filtered[df_labels_filtered["direction"].isna()]["camera_name"].unique()
unique_cameras = np.setdiff1d(unique_cameras, unusable_cameras)
unique_cameras 

93784
92739
376689
334881


array(['69bravo-e-mobo-c', 'bh-w-mobo-c', 'bl-n-mobo-c', 'bl-s-mobo-c',
       'bm-e-mobo-c', 'bm-w-mobo-c', 'cp-s-mobo-c', 'dwpgm-n-mobo-c',
       'hp-e-mobo-c', 'hp-n-mobo-c', 'hp-s-mobo-c', 'hp-w-mobo-c',
       'lo-s-mobo-c', 'lp-e-mobo-c', 'lp-n-mobo-c', 'lp-s-mobo',
       'lp-s-mobo-c', 'lp-w-mobo-c', 'marconi-n-mobo-c', 'mg-n-mobo-c',
       'ml-s-mobo-c', 'ml-w-mobo-c', 'mlo-n-mobo-c', 'mlo-s-mobo-c',
       'om-e-mobo-c', 'om-n-mobo-c', 'om-s-mobo', 'om-s-mobo-c',
       'om-w-mobo', 'om-w-mobo-c', 'pi-e-mobo-c', 'pi-n-mobo-c',
       'pi-s-mobo', 'pi-s-mobo-c', 'pi-w-mobo-c', 'rm-e-mobo-c',
       'rm-n-mobo-c', 'rm-w-mobo-c', 'sclm-e-mobo-c', 'sjh-n-mobo-c',
       'sm-e-mobo-c', 'sm-n-mobo-c', 'sm-s-mobo-c', 'sm-w-mobo-c',
       'smer-tcs8-mobo-c', 'smer-tcs9-mobo-c', 'so-w-mobo-c',
       'sp-n-mobo-c', 'syp-w-mobo-c', 'tp-s-mobo-c', 'tp-w-mobo-c',
       'vo-n-mobo-c', 'wc-e-mobo-c', 'wc-n-mobo-c', 'wc-s-mobo-c'],
      dtype=object)

In [5]:
wfabba_goes_16_df.columns

Index(['Version', 'Timestamp', 'Satellite', 'FlightModel', 'ScanMode',
       'ProductType', 'FileName', 'MissingValueCode', 'Latitude', 'Longitude',
       'Code', 'FRP', 'Fire Size', 'Fire Temp', 'Pixel Size', 'Obs BT4',
       'Obs BT11', 'Bkg BT4', 'Bkg BT11', 'SolZen', 'SatZen', 'RelAzi', 'Eco',
       'timestamp_converted', 'geometry'],
      dtype='object')

### At what point is there a recorded positive GOES observation given there is a fire

In [None]:
# Example 1

df_labels_filtered.head()
distance_miles = 35
# camera_metadata_df.head()
# wfabba_goes_16_df.head()
wfabba_goes_16_df.Code.value_counts()
wfabba_goes_16_df.loc[(wfabba_goes_16_df.Code == 11) & (wfabba_goes_16_df["Fire Size"] == 0.0) & (wfabba_goes_16_df["Fire Temp"] == 0.0)].head()
wfabba_goes_16_df.loc[(wfabba_goes_16_df["Fire Temp"] < 0)]["Code"].value_counts() # [12, 13, 14, 15]
wfabba_goes_16_df.loc[~wfabba_goes_16_df.geometry.isna()].shape[0], wfabba_goes_16_df.shape[0]
wfabba_goes_16_df[(wfabba_goes_16_df["Timestamp"] >= "2019-08-13 21:00:00+00:00") 
    & (wfabba_goes_16_df["Timestamp"] <= "2019-08-13 21:50:00+00:00") 
    & (wfabba_goes_16_df["distance_mi"] <= distance_miles)].head()      # Fire observed at 20th minute
# wfabba_goes_16_df
# wfabba_goes_16_df.columns

In [None]:
# Example 2
# Timestamp = 2020-10-13 23:45:00+00:00
wfabba_goes_16_df[(wfabba_goes_16_df["Timestamp"] >= "2020-10-13 23:15:00+00:00") 
    & (wfabba_goes_16_df["Timestamp"] <= "2020-10-14 00:15:00+00:00") 
    & (wfabba_goes_16_df["distance_mi"] <= 60)].head()      # Fire observed at 45th minute

#### The provided wfabba data contains fires with codes 10-15 only
#### No observation has been tagged with the high confidence temporally filtered fire codes (30-35)

In [None]:
# Random
camera = "69bravo-e-mobo-c"
distance_miles = 35
camera_df = df_labels_filtered[df_labels_filtered["camera_name"].str.contains(camera)].copy()
camera_instance = camera_df.iloc[0]
goes_16_dist_match_df = matches_distance_prox(camera_instance["geometry"], camera_instance["direction"], distance_miles, wfabba_goes_16_df)
print ("$", goes_16_dist_match_df.shape[0], wfabba_goes_16_df.shape[0], camera_df.shape[0])
goes_16_dist_match_df["timestamp_converted_rounded"] = goes_16_dist_match_df["timestamp_converted"].apply(lambda x: round_secs(x))
print ("#", goes_16_dist_match_df[goes_16_dist_match_df.duplicated("timestamp_converted_rounded")].shape[0])
print ("*", goes_16_dist_match_df.drop_duplicates(subset = ["timestamp_converted_rounded"]).shape[0])
goes_16_dist_match_df[goes_16_dist_match_df.duplicated("timestamp_converted_rounded")].sort_values("timestamp_converted_rounded")["timestamp_converted_rounded"].value_counts()

### Join smokeynet predictions with the minute_df for a given camera

In [4]:
df_labels_filtered.head()
wfabba_goes_16_df.head()

distance_miles = 35
# camera = 'lp-e-mobo-c' 
camera = unique_cameras[0]
camera_df = df_labels_filtered[df_labels_filtered["camera_name"].str.contains(camera)].copy()
camera_instance = camera_df.iloc[0]
#Find GOES-16 matches
goes_16_dist_match_df = matches_distance_prox(camera_instance["geometry"], camera_instance["direction"], distance_miles, wfabba_goes_16_df)
goes_16_dist_match_df["timestamp_converted_rounded"] = goes_16_dist_match_df["timestamp_converted"].apply(lambda x: round_secs(x))
goes_16_dist_match_df = goes_16_dist_match_df.drop_duplicates(subset = ["timestamp_converted_rounded"], keep="last")

#Find GOES-17 matches
goes_17_dist_match_df = matches_distance_prox(camera_instance["geometry"], camera_instance["direction"], distance_miles, wfabba_goes_17_df)
goes_17_dist_match_df["timestamp_converted_rounded"] = goes_17_dist_match_df["timestamp_converted"].apply(lambda x: round_secs(x))
goes_17_dist_match_df = goes_17_dist_match_df.drop_duplicates(subset = ["timestamp_converted_rounded"], keep="last")

#SmokeyNet_join --> changed join type to right so as to only keep non null values --> change back to left after testing
test_df = minutes_df.merge(camera_df, left_on = "timestamp", right_on = "datetime_rounded",how="right")
test_df = test_df.rename(columns = {"geometry":"HPWREN_Station_geometry", "lat":"HPWREN_lat", "long":"HPWREN_long", "datetime_rounded":"SmokeyNet_datetime_rounded"})
# print("joined SmokeyNet")


In [None]:
print (test_df.date.value_counts())
df_labels_filtered.date.value_counts()
print (min(df_labels_filtered.date), max(df_labels_filtered.date))
print (camera_df.date.unique())
print (min(df_labels_filtered.datetime_rounded.dt.date), max(df_labels_filtered.datetime_rounded.dt.date))
test_df.date.value_counts()
'2019-08-13' in goes_16_dist_match_df.timestamp_converted_rounded.dt.date
blah = [x for x in goes_16_dist_match_df.timestamp_converted_rounded if datetime.strptime('2019-08-13', '%Y-%m-%d').date() == x.date()]
print (blah)
# datetime.strptime('2022', '%Y').year == datetime.now().year
# datetime.strptime('2022-07-09', '%Y-%m-%d').date() == datetime.now().date()
test_df[(test_df.image_gt == 1) & (test_df.timestamp == '2019-08-13 21:43:00+0000')].tail()
goes_16_dist_match_df.timestamp_converted_rounded
print ([x for x in test_df.timestamp if x in goes_16_dist_match_df.timestamp_converted_rounded])
print (min(test_df.timestamp), min(goes_16_dist_match_df.timestamp_converted_rounded))
print (max(test_df.timestamp), max(goes_16_dist_match_df.timestamp_converted_rounded))
goes_16_dist_match_df.timestamp_converted_rounded
[x for x in test_df.date if x in goes_16_dist_match_df.timestamp_converted_rounded.dt.date]


### SmokeyNet Observations per camera
### Extract number of unique dates when smokeynet predictions were generated per camera

In [5]:
def extract_date(e):
    dates = e.datetime_rounded.dt.date
    # camera_name = e.camera_name.iloc[0]
    return pd.DataFrame({'camera_name': e.camera_name, 'no_of_unique_dates': len(dates.unique())})

In [6]:
max_min_per_camera_df = pd.DataFrame(df_labels_filtered['camera_name'].unique())
max_min_per_camera_df.columns = ['camera_name']
max_min_per_camera_df = max_min_per_camera_df.merge(
    df_labels_filtered[['camera_name', 'datetime_rounded']].groupby(by='camera_name').min(),
    left_on='camera_name', right_on = 'camera_name',
    how='left'
).rename(columns = {'datetime_rounded':'min_time_for_camera'})
max_min_per_camera_df = max_min_per_camera_df.merge(
    df_labels_filtered[['camera_name', 'datetime_rounded']].groupby(by='camera_name').max(),
    left_on='camera_name', right_on = 'camera_name',
    how='left'
).rename(columns = {'datetime_rounded':'max_time_for_camera'})
max_min_per_camera_df = max_min_per_camera_df.merge(
    df_labels_filtered[['camera_name', 'datetime_rounded']].groupby(by='camera_name').apply(extract_date).drop_duplicates(),
    left_on='camera_name', right_on = 'camera_name',
    how='left'
)
max_min_per_camera_df.head(55)

Unnamed: 0,camera_name,min_time_for_camera,max_time_for_camera,no_of_unique_dates
0,lp-s-mobo-c,2019-07-12 13:43:00+00:00,2021-01-10 19:39:00+00:00,8
1,pi-s-mobo-c,2019-07-14 16:05:00+00:00,2021-01-13 21:51:00+00:00,6
2,pi-n-mobo-c,2020-03-06 17:04:00+00:00,2020-03-06 18:23:00+00:00,1
3,ml-w-mobo-c,2019-09-22 19:46:00+00:00,2019-10-06 18:28:00+00:00,3
4,lo-s-mobo-c,2019-10-06 17:37:00+00:00,2019-10-06 18:56:00+00:00,1
5,om-e-mobo-c,2019-08-14 21:16:00+00:00,2020-11-05 22:57:00+00:00,5
6,lp-n-mobo-c,2019-09-13 23:32:00+00:00,2020-09-05 22:00:00+00:00,8
7,mlo-s-mobo-c,2020-08-29 17:13:00+00:00,2021-01-13 21:49:00+00:00,2
8,bh-w-mobo-c,2019-06-10 19:44:00+00:00,2019-10-01 21:20:00+00:00,2
9,sm-e-mobo-c,2020-08-07 01:01:00+00:00,2020-09-05 22:00:00+00:00,2


### Unique dates for goes observations after proximity matching per camera

In [95]:
df_labels_filtered.columns

Index(['filepath', 'camera_name', 'image_gt', 'tile_gt', 'image_pred',
       'tile_pred', 'type', 'date', 'year', 'time', 'datetime', 'event_name',
       'camera_id', 'image_id', 'lat', 'long', 'direction', 'geometry',
       'datetime_rounded'],
      dtype='object')

In [19]:
'''GOES columns : 'Version', 'Timestamp', 'Satellite', 'FlightModel', 'ScanMode',
       'ProductType', 'FileName', 'MissingValueCode', 'Latitude', 'Longitude',
       'Code', 'FRP', 'Fire Size', 'Fire Temp', 'Pixel Size', 'Obs BT4',
       'Obs BT11', 'Bkg BT4', 'Bkg BT11', 'SolZen', 'SatZen', 'RelAzi', 'Eco',
       'timestamp_converted', 'geometry', 'distance_m', 'distance_mi',
       'is_in_direction'
'''
'''df_labels_filtered columns: 'filepath', 'camera_name', 'image_gt', 'tile_gt', 'image_pred',
       'tile_pred', 'type', 'date', 'year', 'time', 'datetime', 'event_name',
       'camera_id', 'image_id', 'lat', 'long', 'direction', 'geometry',
       'datetime_rounded'
'''
distance_miles = 35

smokey_net_camera_date = []
goes_16_camera_date = []
goes_17_camera_date = []

for camera in tqdm(unique_cameras):
    camera_df = df_labels_filtered[df_labels_filtered["camera_name"].str.contains(camera)].copy()
    camera_instance = camera_df.iloc[0]
    unique_dates = camera_df.date.unique()
    smokey_net_camera_date += list(zip([camera]*len(unique_dates), unique_dates))
    #Find GOES-16 matches
    goes_16_dist_match_df = matches_distance_prox(camera_instance["geometry"], camera_instance["direction"], distance_miles, wfabba_goes_16_df)
    goes_16_dist_match_df['date_val'] = pd.to_datetime(goes_16_dist_match_df.Timestamp).dt.date
    unique_dates = goes_16_dist_match_df.date_val.unique()
    goes_16_camera_date += list(zip([camera]*len(unique_dates), unique_dates))
    #Find GOES-17 matches
    goes_17_dist_match_df = matches_distance_prox(camera_instance["geometry"], camera_instance["direction"], distance_miles, wfabba_goes_17_df)
    goes_17_dist_match_df['date_val'] = pd.to_datetime(goes_17_dist_match_df.Timestamp).dt.date
    unique_dates = goes_17_dist_match_df.date_val.unique()
    goes_17_camera_date += list(zip([camera]*len(unique_dates), unique_dates))

smokey_net_camera_date_df = pd.DataFrame(smokey_net_camera_date, columns=['camera_name', 'smokey_net_dates'])
goes_16_camera_date_df = pd.DataFrame(goes_16_camera_date, columns=['camera_name', 'goes16_dates'])
goes_16_camera_date_df.goes16_dates = goes_16_camera_date_df.goes16_dates.astype('datetime64[ns]')
goes_17_camera_date_df = pd.DataFrame(goes_17_camera_date, columns=['camera_name', 'goes17_dates'])
goes_17_camera_date_df.goes17_dates = goes_17_camera_date_df.goes17_dates.astype('datetime64[ns]')

agg_camera_date_df = smokey_net_camera_date_df.copy()
agg_camera_date_df = agg_camera_date_df.merge(
       goes_16_camera_date_df,
       left_on=['camera_name', 'smokey_net_dates'],
       right_on=['camera_name', 'goes16_dates'],
       how='outer'
)
agg_camera_date_df = agg_camera_date_df.merge(
       goes_17_camera_date_df,
       left_on=['camera_name', 'smokey_net_dates'],
       right_on=['camera_name', 'goes17_dates'],
       how='outer'
)

100%|██████████| 55/55 [10:21<00:00, 11.30s/it]


In [22]:
agg_camera_date_df.to_csv('../../data/processed/agg_camera_date_mapping.csv')

## Experiment to test sliding window merge using pandas merge_asof

In [6]:
print (wfabba_goes_16_df.columns)
print (df_labels_filtered.columns)

Index(['Version', 'Timestamp', 'Satellite', 'FlightModel', 'ScanMode',
       'ProductType', 'FileName', 'MissingValueCode', 'Latitude', 'Longitude',
       'Code', 'FRP', 'Fire Size', 'Fire Temp', 'Pixel Size', 'Obs BT4',
       'Obs BT11', 'Bkg BT4', 'Bkg BT11', 'SolZen', 'SatZen', 'RelAzi', 'Eco',
       'timestamp_converted', 'geometry'],
      dtype='object')
Index(['filepath', 'camera_name', 'image_gt', 'tile_gt', 'image_pred',
       'tile_pred', 'type', 'date', 'year', 'time', 'datetime', 'event_name',
       'camera_id', 'image_id', 'lat', 'long', 'direction', 'geometry',
       'datetime_rounded'],
      dtype='object')


In [16]:
# Original method, for comparison
for camera in tqdm(unique_cameras):
    
    # print("Camera:",camera)
    camera_df = df_labels_filtered[df_labels_filtered["camera_name"].str.contains(camera)].copy()

    camera_instance = camera_df.iloc[0]
    
    #Find GOES-16 matches
    goes_16_dist_match_df = matches_distance_prox(camera_instance["geometry"], camera_instance["direction"], distance_miles, wfabba_goes_16_df)
    goes_16_dist_match_df["timestamp_converted_rounded"] = goes_16_dist_match_df["timestamp_converted"].apply(lambda x: round_secs(x))
    goes_16_dist_match_df = goes_16_dist_match_df.drop_duplicates(subset = ["timestamp_converted_rounded"], keep="last")

    #Find GOES-17 matches
    goes_17_dist_match_df = matches_distance_prox(camera_instance["geometry"], camera_instance["direction"], distance_miles, wfabba_goes_17_df)
    goes_17_dist_match_df["timestamp_converted_rounded"] = goes_17_dist_match_df["timestamp_converted"].apply(lambda x: round_secs(x))
    goes_17_dist_match_df = goes_17_dist_match_df.drop_duplicates(subset = ["timestamp_converted_rounded"], keep="last")

    #SmokeyNet_join
    test_df = minutes_df.merge(camera_df, left_on = "timestamp", right_on = "datetime_rounded",how="left")
    test_df = test_df.rename(columns = {"geometry":"HPWREN_Station_geometry", "lat":"HPWREN_lat", "long":"HPWREN_long", "datetime_rounded":"SmokeyNet_datetime_rounded"})
    # print("joined SmokeyNet")
    
    #GOES-16 Join
    test_df = test_df.merge(goes_16_dist_match_df[["timestamp_converted_rounded", "geometry", "Code"]], left_on = "timestamp", right_on = "timestamp_converted_rounded",how="left")
    test_df = test_df.rename(columns = {"geometry":"WFABBA_GOES16_geometry", "timestamp_converted_rounded":"WFABBA_GOES16_timestamp_converted_rounded", "Code":"WFABBA_GOES16_Code"})
    test_df = test_df[["timestamp","camera_name", "image_gt", "image_pred", "type", "WFABBA_GOES16_geometry", "WFABBA_GOES16_Code"]]
    test_df.loc[test_df["WFABBA_GOES16_geometry"] != None,'goes16_pred'] = 1
    test_df.loc[test_df["WFABBA_GOES16_geometry"] == None,'goes16_pred'] = 0
    # print("joined GOES16")

    #GOES-17 Join
    test_df = test_df.merge(goes_17_dist_match_df[["timestamp_converted_rounded", "geometry", "Code"]], left_on = "timestamp", right_on = "timestamp_converted_rounded",how="left")
    test_df = test_df.rename(columns = {"geometry":"WFABBA_GOES17_geometry", "timestamp_converted_rounded":"WFABBA_GOES17_timestamp_converted_rounded", "Code":"WFABBA_GOES17_Code"})
    test_df = test_df[["timestamp","camera_name", "image_gt", "image_pred", "type", "WFABBA_GOES16_geometry", "goes16_pred", "WFABBA_GOES17_geometry", "WFABBA_GOES16_Code", "WFABBA_GOES17_Code"]]
    test_df.loc[test_df["WFABBA_GOES17_geometry"] != None,'goes17_pred'] = 1
    test_df.loc[test_df["WFABBA_GOES17_geometry"] == None,'goes17_pred'] = 0
    # print("joined GOES17")

    #Get all votes and determine if smoke was detected by majority rule
    test_df["final_vote"] = test_df["image_pred"] + test_df["goes16_pred"] + test_df["goes17_pred"]
    test_df.loc[test_df["final_vote"] >= 2,'final_pred'] = 1
    test_df.loc[test_df["final_vote"] < 2,'final_pred'] = 0
    # break

  0%|          | 0/55 [00:17<?, ?it/s]


In [23]:
print (sum(test_df.loc[test_df['camera_name'] == unique_cameras[0]].goes16_pred == 0))

79


In [36]:
distance_miles = 35

for camera in unique_cameras:
    camera_df = df_labels_filtered[df_labels_filtered["camera_name"].str.contains(camera)].copy()
    camera_instance = camera_df.iloc[0]
    
    #Find GOES-16 matches
    goes_16_dist_match_df = matches_distance_prox(camera_instance["geometry"], camera_instance["direction"], distance_miles, wfabba_goes_16_df)
    goes_16_dist_match_df["timestamp_converted_rounded"] = goes_16_dist_match_df["timestamp_converted"].apply(lambda x: round_secs(x))
    goes_16_dist_match_df = goes_16_dist_match_df.drop_duplicates(subset = ["timestamp_converted_rounded"], keep="last")

    camera_df.sort_values(by=['datetime_rounded'], inplace=True)
    goes_16_dist_match_df.sort_values(by=['timestamp_converted_rounded'], inplace=True)

    temp_df = pd.merge_asof(
        left=camera_df,
        right=goes_16_dist_match_df,
        left_on='datetime_rounded',
        right_on='timestamp_converted_rounded',
        tolerance=pd.Timedelta(minutes=60),
        direction='forward'
    )

    temp_df.loc[temp_df["geometry_y"] != None,'goes16_pred'] = 1
    temp_df.loc[temp_df["geometry_y"] == None,'goes16_pred'] = 0
    temp_df.head()
    break

In [38]:
print (sum(temp_df.goes16_pred == 0))
temp_df.head(80)
temp_df.columns
temp_df['time_diff'] = temp_df['timestamp_converted_rounded'] - temp_df['datetime_rounded']
temp_df[['datetime_rounded', 'image_gt', 'image_pred', 'timestamp_converted_rounded', 'time_diff', 'goes16_pred']].loc[(temp_df.goes16_pred == 0) & (temp_df.image_gt == 1)].head(80)
# temp_df[['datetime_rounded', 'image_gt', 'image_pred', 'timestamp_converted_rounded', 'time_diff', 'goes16_pred']].loc[(temp_df.goes16_pred != 0)].head(80)


42


Unnamed: 0,datetime_rounded,image_gt,image_pred,timestamp_converted_rounded,time_diff,goes16_pred
39,2019-08-13 21:20:00+00:00,1,0,NaT,NaT,0.0
40,2019-08-13 21:21:00+00:00,1,1,NaT,NaT,0.0
41,2019-08-13 21:22:00+00:00,1,1,NaT,NaT,0.0


In [None]:
# manual join
print (test_df.shape)
print (goes_16_dist_match_df.shape)
test_df.head()
# max(list(camera_df.datetime_rounded)), max(list(minutes_df.timestamp))
# df_labels_filtered[df_labels_filtered["camera_name"].str.contains(camera)].shape
# setting the offset to 10 minutes before and after the current time
offset = 20
for index, a_row in test_df.iterrows():
    curr_time = a_row['timestamp']
    temp_df = goes_16_dist_match_df[
        (goes_16_dist_match_df.timestamp_converted_rounded >= (curr_time - timedelta(minutes=offset)))
        &
        (goes_16_dist_match_df.timestamp_converted_rounded <= (curr_time + timedelta(minutes=offset)))
    ]
    print (temp_df.head())
    break

### Time difference between first positive ground truth and first positive goes detection

In [6]:
df_labels_filtered[df_labels_filtered["camera_name"].str.contains("lp-s-mobo-c")].columns

Index(['filepath', 'camera_name', 'image_gt', 'tile_gt', 'image_pred',
       'tile_pred', 'type', 'date', 'year', 'time', 'datetime', 'event_name',
       'camera_id', 'image_id', 'lat', 'long', 'direction', 'geometry',
       'datetime_rounded'],
      dtype='object')

In [8]:
def fetch_first_positive_obs(e):
    temp = e.loc[(e.image_gt == 1)].iloc[0][['event_name', 'datetime_rounded']].to_list()
    return pd.Series({'event_name': temp[0], 'datetime_rounded': temp[1]})

distance_miles = 35

obs_list = []

for camera in tqdm(unique_cameras):
    # camera = 'lp-s-mobo-c'
    camera_df = df_labels_filtered[df_labels_filtered["camera_name"].str.contains(camera)].copy()
    camera_instance = camera_df.iloc[0]
    
    #Find GOES-16 matches
    goes_16_dist_match_df = matches_distance_prox(camera_instance["geometry"], camera_instance["direction"], distance_miles, wfabba_goes_16_df)
    goes_16_dist_match_df["timestamp_converted_rounded"] = goes_16_dist_match_df["timestamp_converted"].apply(lambda x: round_secs(x))
    goes_16_dist_match_df = goes_16_dist_match_df.drop_duplicates(subset = ["timestamp_converted_rounded"], keep="last")

    #Find GOES-17 matches
    goes_17_dist_match_df = matches_distance_prox(camera_instance["geometry"], camera_instance["direction"], distance_miles, wfabba_goes_17_df)
    goes_17_dist_match_df["timestamp_converted_rounded"] = goes_17_dist_match_df["timestamp_converted"].apply(lambda x: round_secs(x))
    goes_17_dist_match_df = goes_17_dist_match_df.drop_duplicates(subset = ["timestamp_converted_rounded"], keep="last")

    # Sort by event_name as well --> since not doing so could mess up order of images in the sequence
    camera_df.sort_values(by=['event_name', 'datetime_rounded'], inplace=True)
    goes_16_dist_match_df.sort_values(by=['timestamp_converted_rounded'], inplace=True)
    goes_17_dist_match_df.sort_values(by=['timestamp_converted_rounded'], inplace=True)

    first_positive_gt_obs_arr = camera_df.groupby(by='event_name').apply(fetch_first_positive_obs)
    
    for _, obs in first_positive_gt_obs_arr.iterrows():
        first_smokeynet_obs = camera_df.loc[(camera_df.datetime_rounded >= obs.datetime_rounded) & (camera_df.image_pred == 1) & (camera_df.event_name == obs.event_name)]
        if len(first_smokeynet_obs) != 0: first_smokeynet_obs = first_smokeynet_obs.iloc[0]['datetime_rounded']
        else: first_smokeynet_obs = None
        first_goes16_record = goes_16_dist_match_df.loc[goes_16_dist_match_df.timestamp_converted_rounded >= obs.datetime_rounded].iloc[0]
        first_goes16_obs = first_goes16_record['timestamp_converted_rounded']
        first_goes16_code = first_goes16_record['Code']
        first_goes17_record = goes_17_dist_match_df.loc[goes_17_dist_match_df.timestamp_converted_rounded >= obs.datetime_rounded].iloc[0]
        first_goes17_obs = first_goes16_record['timestamp_converted_rounded']
        first_goes17_code = first_goes17_record['Code']
        obs_list.append([camera, obs.event_name, obs.datetime_rounded, first_smokeynet_obs, first_goes16_obs, first_goes17_obs, first_goes16_code, first_goes17_code])

    # break

100%|██████████| 55/55 [10:32<00:00, 11.50s/it]


In [52]:
# Test Cell --> No useful obs
camera_df.tail()
df_labels_filtered[df_labels_filtered["camera_name"].str.contains(unique_cameras[10])].tail()
# Camera df actually contains both the filter_first_probabiity blocks
for _, obs in camera_df.groupby(by='event_name').apply(fetch_first_positive_obs).iterrows():
    # p (obs.datetime_rounded)
    p (camera_df.loc[(camera_df.datetime_rounded >= obs.datetime_rounded)
        & (camera_df.image_pred == 1) 
        & (camera_df.event_name == obs.event_name)].iloc[0]['datetime_rounded'])
    # [['datetime_rounded', 'image_gt', 'image_pred']]
    # p (camera_df.columns)

# camera_df.image_pred.value_counts()
# obs_list

# Add code for first obs!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

2019-10-06 18:32:00+00:00
2020-08-29 17:52:00+00:00
2021-01-13 21:14:00+00:00


In [10]:
with open('../../data/processed/obs_list_times.tsv', 'w') as ol:
    ol.write("Mapping of first smokey net gt = 1, and first goes observation after that timestamp\n")
    header = "\t".join(['camera', 'event_name', 'gt_ts', 'smokey_ts', 'goes_16_ts', 'goes_17_ts', 'goes16_code', 'goes17_code', '(smokey-gt) min', '(goes16-gt) min', '(goes17-gt) min'])
    ol.write(f"{header}\n")
    for obs in obs_list:
        smokey_temp = [str((obs[3] - obs[2]).seconds/60) if obs[3] is not None else '']
        temp = [str((obs[4] - obs[2]).seconds/60), str((obs[5] - obs[2]).seconds/60)]
        x = obs[:2] + [x.ctime() if x is not None else '' for x in obs[2:-2]] + [str(x) for x in obs[-2:]] + smokey_temp + temp
        out_str = "\t".join(x)
        ol.write(f"{out_str}\n")

In [115]:
# Smokey vs GOES 16
obs_list_time_diff = [(x[3] - x[2]).seconds/60 for x in obs_list]
sorted(obs_list_time_diff, reverse=True)
print (sum([1 if x < 60 else 0 for x in obs_list_time_diff]), len(obs_list_time_diff))

# Smokey vs GOES 17
obs_list_time_diff = [(x[4] - x[2]).seconds/60 for x in obs_list]
sorted(obs_list_time_diff, reverse=True)
print (sum([1 if x < 60 else 0 for x in obs_list_time_diff]), len(obs_list_time_diff))

52 136
69 136


### Average time to detection for each of the three methods wrt first ground truth positive observation

In [58]:
from numpy import mean

smokey_avg = mean([(obs[3] - obs[2]).seconds/60 for obs in obs_list if obs[3] is not None])
goes16_avg = mean([(obs[4] - obs[2]).seconds/60 for obs in obs_list])
goes17_avg = mean([(obs[5] - obs[2]).seconds/60 for obs in obs_list])

p (f"Average time to detection for smokeynet = {smokey_avg} mins")
p (f"Average time to detection for goes16 = {goes16_avg} mins")
p (f"Average time to detection for goes17 = {goes17_avg} mins")

Average time to detection for smokeynet = 3.593984962406015 mins
Average time to detection for goes16 = 409.5882352941176 mins
Average time to detection for goes17 = 373.0735294117647 mins


In [61]:
from numpy import mean, std

smokey_std = std([(obs[3] - obs[2]).seconds/60 for obs in obs_list if obs[3] is not None])
goes16_std = std([(obs[4] - obs[2]).seconds/60 for obs in obs_list])
goes17_std = std([(obs[5] - obs[2]).seconds/60 for obs in obs_list])

p (f"Time to detection std dev for smokeynet = {smokey_std} mins")
p (f"Time to detection std dev for goes16 = {goes16_std} mins")
p (f"Time to detection std dev for goes17 = {goes17_std} mins")

Time to detection std dev for smokeynet = 6.079718031135962 mins
Time to detection std dev for goes16 = 521.9363758637895 mins
Time to detection std dev for goes17 = 530.4913875271534 mins


## Experiment Result Generator

In [59]:
from prettytable import PrettyTable

header = ['experiment', 'smokey', 'goes16', 'goes17', 'ensemble']
result_table = PrettyTable(header)
data = [
    ['Original join methodology with distance radius 35 (baseline)', 0.7833, 0.5179, 0.5666, 0.5652],
    ['Sliding Window Forward Join with Offset = 60 min', 0.7833, 0.4941, 0.4799, 0.5181],
    ['Sliding Window Forward Join with Offset = 120 min', 0.7833, 0.5, 0.5015, 0.5296],
    ['Sliding Window Nearest Join with Offset = 120 min', 0.7833, 0.5236, 0.5310, 0.5634],
    ['Sliding Window Nearest Join with Offset = 60 min', 0.7833, 0.5236, 0.5310, 0.5634],
    ['Sliding Window Nearest Join with Offset = 60 min without code 15', 0.7833, 0.5054, 0.5283, 0.5509],
    ['Sliding Window Nearest Join with Offset = 30 min', 0.7833, 0.5698, 0.5634, 0.6031],
    ['Sliding Window Nearest Join with Offset = 20 min', 0.7833, 0.5971, 0.5884, 0.6264],
    ['Sliding Window Nearest Join with Offset = 20 min without code 15 - same as prev', 0.7833, 0.5971, 0.5884, 0.6264],
    ['Sliding Window Nearest Join with Offset = 10 min', 0.7833, 0.6055, 0.6066, 0.6304],
    ['Sliding Window Nearest Join with Offset = 5 min', 0.7833, 0.5959, 0.6089, 0.6198],
]
result_table.add_rows(data)
print (result_table)

+---------------------------------------------------------------------------------+--------+--------+--------+----------+
|                                    experiment                                   | smokey | goes16 | goes17 | ensemble |
+---------------------------------------------------------------------------------+--------+--------+--------+----------+
|           Original join methodology with distance radius 35 (baseline)          | 0.7833 | 0.5179 | 0.5666 |  0.5652  |
|                 Sliding Window Forward Join with Offset = 60 min                | 0.7833 | 0.4941 | 0.4799 |  0.5181  |
|                Sliding Window Forward Join with Offset = 120 min                | 0.7833 |  0.5   | 0.5015 |  0.5296  |
|                Sliding Window Nearest Join with Offset = 120 min                | 0.7833 | 0.5236 | 0.531  |  0.5634  |
|                 Sliding Window Nearest Join with Offset = 60 min                | 0.7833 | 0.5236 | 0.531  |  0.5634  |
|         Sliding Window