# Clean Data

## Import Libraries

### External Libraries

In [1]:
import geopandas as gpd

### External Libraries

## Define Variables

In [2]:
nyc_street_flooding_input = 'data/street-flood-complaints_rows-all.geojson'
nyc_street_flooding_output = 'data/clean_street-flood-complaints_rows-all.geojson'


## Get Original Data

In [3]:
street_flooding_gdf = gpd.read_file(nyc_street_flooding_input)

## Convert `datetime64` data type to string

In [4]:
# created_date, resolution_action_updated_date, closed_date

street_flooding_gdf['created_date'] = street_flooding_gdf['created_date'].dt.strftime('%Y-%m-%d %H:%M:%S')
street_flooding_gdf['resolution_action_updated_date'] = street_flooding_gdf['resolution_action_updated_date'].dt.strftime('%Y-%m-%d %H:%M:%S')
street_flooding_gdf['closed_date'] = street_flooding_gdf['closed_date'].dt.strftime('%Y-%m-%d %H:%M:%S')


## Set `unique_key` as Index

In [5]:
street_flooding_gdf.set_index('unique_key', inplace=True)

## Remove Rows With Missing `geometry`

In [6]:
street_flooding_gdf.dropna(subset = ['geometry'], inplace = True)

## Preview Street Flooding Data

In [7]:
street_flooding_gdf[['created_date', 'borough', 'bbl', 'geometry']].head(10)

Unnamed: 0_level_0,created_date,borough,bbl,geometry
unique_key,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
15639934,2010-01-02 08:26:00,BROOKLYN,3089000064.0,POINT (-73.92178 40.58778)
15640572,2010-01-02 12:00:00,STATEN ISLAND,,POINT (-74.14329 40.63866)
15640664,2010-01-02 17:45:00,QUEENS,4120050012.0,POINT (-73.79530 40.68140)
15655327,2010-01-04 16:47:00,QUEENS,4106210008.0,POINT (-73.73843 40.72006)
15668560,2010-01-05 10:37:00,BROOKLYN,3086550021.0,POINT (-73.90969 40.61250)
15674300,2010-01-06 19:26:00,BROOKLYN,3029270015.0,POINT (-73.93297 40.71584)
15674896,2010-01-06 08:24:00,QUEENS,4119960122.0,POINT (-73.80255 40.67925)
15674924,2010-01-06 09:17:00,STATEN ISLAND,5040740044.0,POINT (-74.10646 40.55866)
15675505,2010-01-06 06:00:00,QUEENS,4030030044.0,POINT (-73.87694 40.71804)
15683503,2010-01-07 10:16:00,STATEN ISLAND,5014850078.0,POINT (-74.14943 40.61979)


In [8]:
street_flooding_gdf[['created_date', 'borough', 'bbl', 'geometry']].tail(10)

Unnamed: 0_level_0,created_date,borough,bbl,geometry
unique_key,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
56861586,2023-02-21 18:59:00,BROOKLYN,3068850023.0,POINT (-73.98458 40.59320)
56866123,2023-02-21 14:02:00,QUEENS,4160350046.0,POINT (-73.79850 40.59643)
56867763,2023-02-22 16:50:00,STATEN ISLAND,,POINT (-74.12709 40.56090)
56869243,2023-02-22 14:29:00,BROOKLYN,,POINT (-73.91557 40.65839)
56870344,2023-02-22 23:11:00,BROOKLYN,3071180035.0,POINT (-73.97940 40.59505)
56871278,2023-02-22 09:44:00,BROOKLYN,3013940054.0,POINT (-73.93773 40.66820)
56871467,2023-02-22 22:59:00,QUEENS,4021770036.0,POINT (-73.84646 40.72775)
56872479,2023-02-22 19:29:00,BROOKLYN,,POINT (-73.94631 40.70021)
56872594,2023-02-22 07:26:00,STATEN ISLAND,5002250061.0,POINT (-74.12005 40.62831)
56874606,2023-02-22 19:02:00,BRONX,2057520275.0,POINT (-73.91923 40.88493)


## Save Clean Dataset

In [9]:
street_flooding_gdf.to_file(nyc_street_flooding_output, driver='GeoJSON')