# Prepare clusters for web visualization

In [4]:
import numpy as np
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point

In [13]:
path_data = "./data/data_with_labels_and_DBScan_22clusters.geojson"

data = gpd.read_file(path_data)

In [14]:
type(data), data.crs, data.columns

(geopandas.geodataframe.GeoDataFrame,
 {'init': 'epsg:6933'},
 Index(['string_id', 'bws_raw', 'bwd_raw', 'iav_raw', 'sev_raw', 'gtd_raw',
        'rfr_raw', 'cfr_raw', 'drr_raw', 'ucw_raw', 'udw_raw', 'usa_raw',
        'mean_infant_mort', 'mean_light', 'pop_density', 'PR_labels',
        'deplete_labels', 'floodrought_labels', 'DBScan_labels', 'geometry'],
       dtype='object'))

In [15]:
data = data.rename({'bws_raw': 'Baseline Water Stress',
                            'bwd_raw': 'Baseline Water Withdrawals',
                            'iav_raw': 'Interannual Variation',
                            'sev_raw': 'Seasonal Variation',
                            'gtd_raw': 'Groundwater Table Decline',
                            'rfr_raw': 'Riverine Flood Risk',
                            'cfr_raw': 'Coastal Flood Risk',
                            'drr_raw': 'Drought Risk',
                            'ucw_raw': 'Untreated Connected Wastewater',
                            'udw_raw': 'Unimproved Drinking Water',
                            'usa_raw': 'Unimproved Sanitation',
                            'mean_infant_mort': 'Infant Mortality Rate',
                            'mean_light': 'Nighttime Light',
                            'pop_density': 'Population Density',
                            'DBScan_labels': 'Cluster'}, axis=1)
data = data.loc[data['geometry'].is_valid]

In [16]:
data_grouped = data.drop(['string_id','PR_labels','deplete_labels','floodrought_labels'], axis=1
            ).groupby('Cluster').mean().reset_index()

group_count = data[['Cluster']].copy()
group_count['int'] = 1
group_count = group_count.groupby('Cluster').count().reset_index()
group_count["Rank"] = group_count["int"].rank() 

data_grouped['Count'] = group_count["int"]
data_grouped['Rank'] = group_count["Rank"]

In [19]:
type(data_grouped), data_grouped.columns

(pandas.core.frame.DataFrame,
 Index(['Cluster', 'Baseline Water Stress', 'Baseline Water Withdrawals',
        'Interannual Variation', 'Seasonal Variation',
        'Groundwater Table Decline', 'Riverine Flood Risk',
        'Coastal Flood Risk', 'Drought Risk', 'Untreated Connected Wastewater',
        'Unimproved Drinking Water', 'Unimproved Sanitation',
        'Infant Mortality Rate', 'Nighttime Light', 'Population Density',
        'Count', 'Rank'],
       dtype='object'))

In [20]:
data_grouped.to_csv("./app_data/dashboard_data_grouped.csv")

# Dissolved Polygons

In [86]:
data_dissolved = data.loc[data['geometry'].is_valid]
data_dissolved['Cluster_label'] = data_dissolved['Cluster']

In [87]:
data_dissolved = data_dissolved.dissolve(by='Cluster', aggfunc='mean')
type(data_dissolved)

geopandas.geodataframe.GeoDataFrame

In [88]:
# data_dissolved1.head()

In [89]:
data_dissolved['geometry'] = data_dissolved.simplify(tolerance=10000, preserve_topology=True)
# type(data_dissolved), data_dissolved.is_valid

In [90]:
data_dissolved.shape

(23, 19)

In [91]:
data_dissolved = data_dissolved.to_crs({'init':'epsg:3857'})
# type(data_dissolved), data_dissolved.is_valid

In [92]:
data_dissolved['geometry'] = data_dissolved.buffer(0)
# type(data_dissolved2), data_dissolved2.is_valid

In [93]:
# data_dissolved2

In [94]:
data_dissolved.to_file("./app_data/clusters.shp")