In [88]:
import geopandas as gpd
import holoviews as hv
import hvplot.pandas
import numpy as np
import pandas as pd
import seaborn as sns
from matplotlib import pyplot as plt
import panel as pn
import cenpy
import pygris

import dask.dataframe as da
import intake

import requests

pd.options.display.max_columns = 999
pd.options.display.max_rows = 999


In [41]:
from sklearn.cluster import KMeans

from sklearn.preprocessing import MinMaxScaler, RobustScaler
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()


import altair as alt
from vega_datasets import data as vega_data

In [42]:
claims_clustering = pd.read_csv("data/claims_NJ.csv").dropna(subset=['buildingDamageAmount', 'buildingPropertyValue'])

claims_2020_2023 = claims_clustering[claims_clustering['yearOfLoss'] >= 2022]


In [49]:
kmeans = KMeans(n_clusters=6, n_init=10)

In [50]:
kmeans.fit(claims_clustering[['buildingPropertyValue']])

In [51]:
claims_scaled = scaler.fit_transform(claims_clustering[['buildingPropertyValue', 'buildingDamageAmount']])
kmeans.fit(claims_scaled)

In [52]:
claims_clustering['label'] = kmeans.labels_

In [53]:
claims_clustering.groupby('label').size()

label
0    97125
1       23
2      616
3       47
4    30786
5     5241
dtype: int64

In [48]:
# Number of clusters to try out
n_clusters = list(range(2, 10))

# Run kmeans for each value of k
inertias = []
for k in n_clusters:
    
    # Initialize and run
    kmeans = KMeans(n_clusters=k, n_init=10)
    kmeans.fit(claims_clustering)
    
    # Save the "inertia"
    inertias.append(kmeans.inertia_)
    
# Plot it!
plt.plot(n_clusters, inertias, marker='o', ms=10, mfc='white', lw=4, mew=3);

ValueError: could not convert string to float: '2012-10-29T00:00:00.000Z'

In [83]:
NJ_counties = pygris.counties(
    state="NJ", year=2021
)

Using FIPS code '34' for input 'NJ'


In [95]:
NJ_counties.head()

Unnamed: 0,STATEFP,COUNTYFP,COUNTYNS,GEOID,NAME,NAMELSAD,LSAD,CLASSFP,MTFCC,CSAFP,CBSAFP,METDIVFP,FUNCSTAT,ALAND,AWATER,INTPTLAT,INTPTLON,geometry
111,34,37,882236,34037,Sussex,Sussex County,6,H1,G4020,408,35620,35084.0,A,1343330731,43707483,41.1374609,-74.6919141,"POLYGON ((-74.48244 41.10358, -74.48263 41.103..."
212,34,7,882273,34007,Camden,Camden County,6,H1,G4020,428,37980,15804.0,A,573325497,15688953,39.8024071,-74.9612493,"POLYGON ((-74.90237 39.79104, -74.90207 39.790..."
476,34,39,882235,34039,Union,Union County,6,H1,G4020,408,35620,35084.0,A,266163567,6847407,40.6598707,-74.3086957,"POLYGON ((-74.20067 40.63290, -74.20240 40.631..."
839,34,9,882274,34009,Cape May,Cape May County,6,H1,G4020,428,36140,,A,651437935,955149083,39.0858411,-74.8463541,"POLYGON ((-74.54044 39.29945, -74.54012 39.299..."
914,34,5,882272,34005,Burlington,Burlington County,6,H1,G4020,428,37980,15804.0,A,2070137466,54133685,39.8757858,-74.6630063,"POLYGON ((-74.47024 39.92253, -74.47010 39.922..."


In [93]:
NJ_tracts = pygris.tracts(
    state="NJ", year=2021
)

Using FIPS code '34' for input 'NJ'


In [94]:
NJ_block_groups = pygris.block_groups(
    state="NJ", year=2021
)

Using FIPS code '34' for input 'NJ'


In [113]:
claims_NJ = pd.read_csv("data/claims_NJ.csv")

claims_NJ['countyCode'] = claims_NJ['countyCode'].astype(str).str.rstrip('.0')
claims_NJ['censusTract'] = claims_NJ['censusTract'].astype(str).str.rstrip('.0')
claims_NJ['censusBlockGroupFips'] = claims_NJ['censusBlockGroupFips'].astype(str).str.rstrip('.0')

claims_NJ['observation'] = 1


In [114]:
agg_functions = {
    'amountPaidOnBuildingClaim': ['sum', 'mean'], 
    'amountPaidOnContentsClaim': ['sum', 'mean'], 
    'observation': 'sum',
}

claims_by_county = claims_NJ.groupby(["countyCode", "yearOfLoss"]).agg(agg_functions).reset_index()

claims_by_county.columns = ['countyCode', 'yearOfLoss', 'sum_amountPaidOnBuildingClaim', 'mean_amountPaidOnBuildingClaim', 'sum_amountPaidOnContentsClaim', 'mean_amountPaidOnContentsClaim', 'observations']

claims_by_county.head()


Unnamed: 0,countyCode,yearOfLoss,sum_amountPaidOnBuildingClaim,mean_amountPaidOnBuildingClaim,sum_amountPaidOnContentsClaim,mean_amountPaidOnContentsClaim,observations
0,34001,1995,297625.8,14172.657,101918.72,4853.272,38
1,34001,1996,665885.08,10740.082,251316.48,4053.492,109
2,34001,1997,1306108.12,10203.97,401252.81,3134.788,171
3,34001,1998,1894420.24,6839.062,449247.51,1621.832,399
4,34001,1999,214271.23,9316.14,21865.93,950.693,39


In [115]:
NJ_counties_join = claims_by_county.merge(
    NJ_counties,
    left_on=["countyCode"],
    right_on=["GEOID"]
)

NJ_counties_join.head()

Unnamed: 0,countyCode,yearOfLoss,sum_amountPaidOnBuildingClaim,mean_amountPaidOnBuildingClaim,sum_amountPaidOnContentsClaim,mean_amountPaidOnContentsClaim,observations,STATEFP,COUNTYFP,COUNTYNS,GEOID,NAME,NAMELSAD,LSAD,CLASSFP,MTFCC,CSAFP,CBSAFP,METDIVFP,FUNCSTAT,ALAND,AWATER,INTPTLAT,INTPTLON,geometry
0,34001,1995,297625.8,14172.657,101918.72,4853.272,38,34,1,882270,34001,Atlantic,Atlantic County,6,H1,G4020,428,12100,,A,1438775279,301270067,39.4693555,-74.6337591,"POLYGON ((-74.58640 39.30989, -74.58665 39.309..."
1,34001,1996,665885.08,10740.082,251316.48,4053.492,109,34,1,882270,34001,Atlantic,Atlantic County,6,H1,G4020,428,12100,,A,1438775279,301270067,39.4693555,-74.6337591,"POLYGON ((-74.58640 39.30989, -74.58665 39.309..."
2,34001,1997,1306108.12,10203.97,401252.81,3134.788,171,34,1,882270,34001,Atlantic,Atlantic County,6,H1,G4020,428,12100,,A,1438775279,301270067,39.4693555,-74.6337591,"POLYGON ((-74.58640 39.30989, -74.58665 39.309..."
3,34001,1998,1894420.24,6839.062,449247.51,1621.832,399,34,1,882270,34001,Atlantic,Atlantic County,6,H1,G4020,428,12100,,A,1438775279,301270067,39.4693555,-74.6337591,"POLYGON ((-74.58640 39.30989, -74.58665 39.309..."
4,34001,1999,214271.23,9316.14,21865.93,950.693,39,34,1,882270,34001,Atlantic,Atlantic County,6,H1,G4020,428,12100,,A,1438775279,301270067,39.4693555,-74.6337591,"POLYGON ((-74.58640 39.30989, -74.58665 39.309..."
