In [1]:
# Generate Sinkhole Attributes

import numpy as np
import pandas as pd
import os
import datetime
from datetime import datetime, timedelta
import matplotlib.pyplot as plt
import math
import warnings
warnings.filterwarnings("ignore")

from w210_attribute_library import haversine_distance, getdate, findname, findAttr, shAttributes, sh_attr2

datdirsat = "../data/satellite_data/"
datdir = "../data/"
attrs = "../attrs/"
modeld = "../model/"

## Load Sinkhole Date for Florida (State)

In [10]:
df_sh = pd.read_excel(datdir+"Florida_Subsidence_Incident_Reports.xlsx", sheet_name=0)
df_sh["DateD"] = df_sh.apply(lambda row: getdate(row["EVENT_DATE"]), axis=1)
df_sh = df_sh[~df_sh["EVENT_DATE"].isna()]
print(len(df_sh))

3944


## Load 365 Data

In [12]:
dfevent365 = pd.read_csv(datdir+"model_satel_attr_365.csv")
dfevent365["DateD"] = dfevent365.apply(lambda row: datetime.strptime(row["DateD"],"%Y-%m-%d"), axis=1)
dfevent365.head(1)

Unnamed: 0,name,imgnum,label,ID,lon,lat,start_date,geometry,AnnualCrop,Forest,...,Pasture,PermanentCrop,Residential,River,SeaLake,prediction,prediction_name,Group,DateD,Key
0,2012-R-2019-11-25-2020-01-24-6.03-0.tif,0,0,2012,-81.399778,30.24471,2019-11-25,POLYGON ((-81.4028157641155 30.241694208355277...,9.091525e-11,5.9e-05,...,5.95627e-07,2.493491e-07,0.878485,0.005469,1.6e-05,7,Residential,1,2020-11-24,2012_0_1


In [13]:
pivot = np.round(pd.pivot_table(dfevent365, values='prediction', 
                                index='label', 
                                columns='Group', 
                                aggfunc='count'),2)
pivot

Group,0,1,2
label,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,,111.0,113.0
1,113.0,,


## Generate Sinkhole Attributes (Using Tile Level Calculations)

In [14]:
daysdelta = timedelta(365)
fields = ["lon", "lat", "ID"]
sh365 = shAttributes(df_sh, dfevent365, daysdelta, fields)
sh365.to_csv(attrs+"model_sat_sh_attr_365.csv", index=False)

26


In [15]:
sh365.head(2)

Unnamed: 0,name,imgnum,label,ID,lon,lat,start_date,geometry,AnnualCrop,Forest,...,Y100,Y150,Y200,Y250,Y300,Y500,Y750,Y1000,Y1000plus,Ycoloc
0,2012-R-2019-11-25-2020-01-24-6.03-0.tif,0,0,2012,-81.399778,30.24471,2019-11-25,POLYGON ((-81.4028157641155 30.241694208355277...,9.091525e-11,5.9e-05,...,0,0,0,0,0,0,0,15,91876,0
1,2463-R-2021-01-14-2021-03-15-6.03-0.tif,0,0,2463,-80.355721,26.067671,2021-01-14,POLYGON ((-80.35879374091179 26.06461796872791...,5.209795e-09,4.1e-05,...,0,0,5,5,5,5,5,10,96257,0


### Sinkhole Attributes for 60 days

In [16]:
dfevent60 = pd.read_csv(datdir+"model_satel_attr_60.csv")
dfevent60["DateD"] = dfevent60.apply(lambda row: datetime.strptime(row["DateD"],"%Y-%m-%d"), axis=1)

## Generate Sinkhole Attributes
daysdelta = timedelta(60)
fields = ["lon", "lat", "ID"]
sh0 = shAttributes(df_sh, dfevent60, daysdelta, fields)
sh0.to_csv(attrs+"model_sat_sh_attr_60.csv", index=False)

26


In [17]:
pivot = np.round(pd.pivot_table(dfevent60, values='prediction', 
                                index='label', 
                                columns='Group', 
                                aggfunc='count'),2)
pivot

Group,0,1,2
label,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,,111.0,113.0
1,113.0,,


### Sinkhole Attributes for 0 days

In [18]:
dfevent0 = pd.read_csv(datdir+"model_satel_attr_0.csv")
dfevent0["DateD"] = dfevent0.apply(lambda row: datetime.strptime(row["DateD"],"%Y-%m-%d"), axis=1)

## Generate Sinkhole Attributes
daysdelta = timedelta(0)
fields = ["lon", "lat", "ID"]
sh0 = shAttributes(df_sh, dfevent0, daysdelta, fields)
sh0.to_csv(attrs+"model_sat_sh_attr_0.csv", index=False)

26
