# Delhi Colonies Public Services Index (updated 29 Aug 2021)

## Compute the following indices:
* Index with bounding box neighbors [effective service count divided by population]
* Index with bounding box neighbors [effective service count divided by population/area]

### How to compute indices
* Load in colonies dataset (bounding box only) from Pickle **[done]**
* Merge New Population Estimates **[done]**
* Remove Rural Villages **[done]**
* Import services shapefiles **[done]**
    * Make sure correct file paths exist
    * Ensure that all shapefiles are valid using `check_shapefile` function
    * Reproject shapefiles to EPSG 7760 (if needed)
* Compute all Services indices (turn into a function) **[done]**
    * bbox neighbors, Population Size
    * bbox neighbors, Population Density

## Import modules and set constants

In [1]:
import os
import pickle
from importlib import reload
import pandas as pd
import geopandas as gpd
from shapely.geometry import Polygon, box
import spatial_index_utils

In [2]:
reload(spatial_index_utils)

<module 'spatial_index_utils' from 'C:\\Users\\bwbel\\Google Drive\\slum_project\\spatial_index_python\\spatial_index_utils.py'>

In [3]:
# WGS 84 / Delhi
epsg_code = 7760

## Import Colonies Dataset (bounding box only)

In [4]:
with open('colonies_bbox_nbrs29Aug2021.pkl', 'rb') as f:
    colonies_bbox_nbrs = pickle.load(f)
    
colonies_bbox_nbrs.head()

Unnamed: 0,AREA,USO_AREA_U,HOUSETAX_C,USO_FINAL,geometry,area_km2,canal,railway,drain,barrier,centroid,ndmc_dist_km,nbrs_bbox,nbrs_dist_bbox,index
0,NEW DELHI 36,5584,,Planned,"POLYGON Z ((1020282.788 996796.773 0.000, 1020...",1.966739,False,True,True,True,POINT (1020123.175 995898.851),5.159809,"[5598, 5599, 5602, 5603, 3508, 3776, 4011, 349...","[(5598, 1.074790368771482), (5599, 1.015506410...",0
1,NEW DELHI 35,5585,,Planned,"POLYGON Z ((1019724.475 994932.797 0.000, 1019...",0.036429,False,False,False,False,POINT (1019673.024 994869.699),6.273149,"[5594, 4336, 2679, 1256, 4373, 5585, 1697, 180...","[(5594, 0.6299162683011635), (4336, 14.7374799...",1
2,NEW DELHI 34,5586,,Planned,"POLYGON Z ((1019571.955 994876.019 0.000, 1019...",0.230739,False,False,True,True,POINT (1019485.484 994565.783),6.618792,"[5594, 5587, 5585, 5596]","[(5594, 0.4532645992822079), (5587, 0.31380549...",2
3,NEW DELHI 33,5587,,Planned,"POLYGON Z ((1019352.702 994352.546 0.000, 1019...",0.281195,False,False,False,False,POINT (1019171.868 994576.688),6.709542,"[5596, 5587]","[(5596, 0.5564745103109905), (5587, 0.0)]",3
4,NEW DELHI 32,5588,,Planned,"POLYGON Z ((1018793.292 994224.182 0.000, 1018...",0.301253,False,False,True,True,POINT (1018785.675 994590.275),6.839299,"[5621, 5587, 5620, 5596]","[(5621, 0.5779093230804585), (5587, 0.38643264...",4


In [5]:
len(colonies_bbox_nbrs)

4357

## Update/Merge Population Estimates

In [6]:
popfile_2020 = os.path.join("population_data", "pop_colony_wp_2020.csv")
popfile_2019 = os.path.join("population_data", "pop_colony_wp_2019.csv")
for popfile in [popfile_2020, popfile_2019]:
    print(f"{popfile} exists: {os.path.exists(popfile)}")

population_data\pop_colony_wp_2020.csv exists: True
population_data\pop_colony_wp_2019.csv exists: True


In [7]:
# Import updated population data (2019 and 2020)
updated_pop = pd.read_csv(popfile_2020)
updated_pop.head()

Unnamed: 0.1,Unnamed: 0,ID,layer,area,uso_area_u,housetax_c,uso_final
0,1,1,3570.060984,NEW DELHI 36,5584,,Planned
1,2,2,320.568629,NEW DELHI 35,5585,,Planned
2,3,3,2215.206473,NEW DELHI 34,5586,,Planned
3,4,4,3956.166944,NEW DELHI 33,5587,,Planned
4,5,5,3961.943378,NEW DELHI 32,5588,,Planned


In [8]:
# rename population column to make distinct in upcoming merge
updated_pop = updated_pop.rename(columns={"layer":"population_new"})
updated_pop.head()

Unnamed: 0.1,Unnamed: 0,ID,population_new,area,uso_area_u,housetax_c,uso_final
0,1,1,3570.060984,NEW DELHI 36,5584,,Planned
1,2,2,320.568629,NEW DELHI 35,5585,,Planned
2,3,3,2215.206473,NEW DELHI 34,5586,,Planned
3,4,4,3956.166944,NEW DELHI 33,5587,,Planned
4,5,5,3961.943378,NEW DELHI 32,5588,,Planned


In [9]:
# Restrict dataframe to only two columns:
# layer: population data
# uso_area_u: unique id for colonies
updated_pop = updated_pop[['population_new', 'uso_area_u']]
updated_pop.head()

Unnamed: 0,population_new,uso_area_u
0,3570.060984,5584
1,320.568629,5585
2,2215.206473,5586
3,3956.166944,5587
4,3961.943378,5588


In [10]:
# Left merge updated population data with colonies data
colonies_bbox_nbrs = colonies_bbox_nbrs.merge(updated_pop, how='left', 
                          left_on="USO_AREA_U", right_on='uso_area_u')
colonies_bbox_nbrs.head()

Unnamed: 0,AREA,USO_AREA_U,HOUSETAX_C,USO_FINAL,geometry,area_km2,canal,railway,drain,barrier,centroid,ndmc_dist_km,nbrs_bbox,nbrs_dist_bbox,index,population_new,uso_area_u
0,NEW DELHI 36,5584,,Planned,"POLYGON Z ((1020282.788 996796.773 0.000, 1020...",1.966739,False,True,True,True,POINT (1020123.175 995898.851),5.159809,"[5598, 5599, 5602, 5603, 3508, 3776, 4011, 349...","[(5598, 1.074790368771482), (5599, 1.015506410...",0,3570.060984,5584
1,NEW DELHI 36,5584,,Planned,"POLYGON Z ((1020282.788 996796.773 0.000, 1020...",1.966739,False,True,True,True,POINT (1020123.175 995898.851),5.159809,"[5598, 5599, 5602, 5603, 3508, 3776, 4011, 349...","[(5598, 1.074790368771482), (5599, 1.015506410...",0,3640.877478,5584
2,NEW DELHI 35,5585,,Planned,"POLYGON Z ((1019724.475 994932.797 0.000, 1019...",0.036429,False,False,False,False,POINT (1019673.024 994869.699),6.273149,"[5594, 4336, 2679, 1256, 4373, 5585, 1697, 180...","[(5594, 0.6299162683011635), (4336, 14.7374799...",1,320.568629,5585
3,NEW DELHI 35,5585,,Planned,"POLYGON Z ((1019724.475 994932.797 0.000, 1019...",0.036429,False,False,False,False,POINT (1019673.024 994869.699),6.273149,"[5594, 4336, 2679, 1256, 4373, 5585, 1697, 180...","[(5594, 0.6299162683011635), (4336, 14.7374799...",1,2613.571587,5585
4,NEW DELHI 34,5586,,Planned,"POLYGON Z ((1019571.955 994876.019 0.000, 1019...",0.230739,False,False,True,True,POINT (1019485.484 994565.783),6.618792,"[5594, 5587, 5585, 5596]","[(5594, 0.4532645992822079), (5587, 0.31380549...",2,2215.206473,5586


In [11]:
# Identify and save colonies with missing data for updated population estimates
#colonies_bbox_nbrs[colonies_bbox_nbrs['population_new'].isna()]#.to_csv("missing_colonies.csv")

In [12]:
# Create new column that replaces colonies with missing updated population estimates with older estimates
#colonies_bbox_nbrs['population_updated'] = colonies_bbox_nbrs['population_new'].fillna(colonies_bbox_nbrs['population'])

In [13]:
#colonies_bbox_nbrs.head()

In [14]:
[col for col in colonies_bbox_nbrs if "pop" in col]

['population_new']

In [15]:
# Remove extraneous columns
colonies_bbox_nbrs = colonies_bbox_nbrs.drop(columns=['uso_area_u'])

# Rename 'population_new' column as 'population'
colonies_bbox_nbrs = colonies_bbox_nbrs.rename(columns={'population_new': 'population'})

colonies_bbox_nbrs.head()

Unnamed: 0,AREA,USO_AREA_U,HOUSETAX_C,USO_FINAL,geometry,area_km2,canal,railway,drain,barrier,centroid,ndmc_dist_km,nbrs_bbox,nbrs_dist_bbox,index,population
0,NEW DELHI 36,5584,,Planned,"POLYGON Z ((1020282.788 996796.773 0.000, 1020...",1.966739,False,True,True,True,POINT (1020123.175 995898.851),5.159809,"[5598, 5599, 5602, 5603, 3508, 3776, 4011, 349...","[(5598, 1.074790368771482), (5599, 1.015506410...",0,3570.060984
1,NEW DELHI 36,5584,,Planned,"POLYGON Z ((1020282.788 996796.773 0.000, 1020...",1.966739,False,True,True,True,POINT (1020123.175 995898.851),5.159809,"[5598, 5599, 5602, 5603, 3508, 3776, 4011, 349...","[(5598, 1.074790368771482), (5599, 1.015506410...",0,3640.877478
2,NEW DELHI 35,5585,,Planned,"POLYGON Z ((1019724.475 994932.797 0.000, 1019...",0.036429,False,False,False,False,POINT (1019673.024 994869.699),6.273149,"[5594, 4336, 2679, 1256, 4373, 5585, 1697, 180...","[(5594, 0.6299162683011635), (4336, 14.7374799...",1,320.568629
3,NEW DELHI 35,5585,,Planned,"POLYGON Z ((1019724.475 994932.797 0.000, 1019...",0.036429,False,False,False,False,POINT (1019673.024 994869.699),6.273149,"[5594, 4336, 2679, 1256, 4373, 5585, 1697, 180...","[(5594, 0.6299162683011635), (4336, 14.7374799...",1,2613.571587
4,NEW DELHI 34,5586,,Planned,"POLYGON Z ((1019571.955 994876.019 0.000, 1019...",0.230739,False,False,True,True,POINT (1019485.484 994565.783),6.618792,"[5594, 5587, 5585, 5596]","[(5594, 0.4532645992822079), (5587, 0.31380549...",2,2215.206473


In [16]:
# Check that we have same number of colonies
len(colonies_bbox_nbrs)

4367

In [17]:
# Confirm no population estimates are missing
sum(colonies_bbox_nbrs['population'].isna())

0

## Remove "Rural Villages"

In [18]:
colonies_bbox_nbrs = colonies_bbox_nbrs[colonies_bbox_nbrs['USO_FINAL'] != 'RV']

In [19]:
# Check that number of colonies
len(colonies_bbox_nbrs)

4156

## Import services shapefiles

In [20]:
# Define filepaths

services_dir = os.path.join('shapefiles', 'Spatial_Index_GIS', 'Public Services')

bank_fp = os.path.join(services_dir, 'Banking', 'Banking.shp')
health_fp = os.path.join(services_dir, 'Health', 'Health.shp')
road_fp = os.path.join(services_dir, 'Major Road', 'Road.shp')
police_fp = os.path.join(services_dir, 'Police', 'Police Station.shp')
ration_fp = os.path.join(services_dir, 'Ration', 'Ration.shp')
school_fp = os.path.join(services_dir, 'School', 'schools7760.shp')
transport_fp = os.path.join(services_dir, 'Transport', 'Transport.shp')

# boundary of Delhi
delhi_bounds_filepath = os.path.join('shapefiles', 'delhi_bounds_buffer.shp')

# Check that all filepaths exist
filepath_list = [bank_fp, health_fp, road_fp, police_fp, ration_fp, school_fp, transport_fp, delhi_bounds_filepath]

for filepath in filepath_list:
    if not os.path.exists(filepath):
        print('{} does not exist'.format(filepath))

In [21]:
# Import services
bank = gpd.read_file(bank_fp)
health = gpd.read_file(health_fp)
road = gpd.read_file(road_fp)
police = gpd.read_file(police_fp)
ration = gpd.read_file(ration_fp)
school = gpd.read_file(school_fp)
transport = gpd.read_file(transport_fp)

## Check validity of services shapefiles
* Duplicate rows are okay for ATMs (I assume that ATM locations for the same bank in a similar location will seem to be counted twice)
* Look specifically for invalid geometries and whether shapefile is fully contained within Delhi

In [22]:
spatial_index_utils.check_shapefile(gdf=bank, gdf_name='bank', 
                                    geom_type='Point', 
                                    delhi_bounds_filepath=delhi_bounds_filepath)

bank has duplicate rows: True
----------------------------------------------------
rows with invalid geometries 

----------------------------------------------------
all geometries in bank are of type Point: True
----------------------------------------------------
Rows with None value in geometry column are below
Empty GeoDataFrame
Columns: [bank_name, Latitude, Longitude, Type, geometry, geom_type]
Index: []
----------------------------------------------------
bank shapefile is contained within Delhi: True
----------------------------------------------------
Done with shapefile evaluation


In [23]:
spatial_index_utils.check_shapefile(gdf=health, gdf_name='health', 
                                    geom_type='Point', 
                                    delhi_bounds_filepath=delhi_bounds_filepath)

health has duplicate rows: False
----------------------------------------------------
rows with invalid geometries 

----------------------------------------------------
all geometries in health are of type Point: True
----------------------------------------------------
Rows with None value in geometry column are below
Empty GeoDataFrame
Columns: [Hospital_C, ADDRESS, X, Y, geometry, geom_type]
Index: []
----------------------------------------------------
health shapefile is contained within Delhi: True
----------------------------------------------------
Done with shapefile evaluation


In [24]:
spatial_index_utils.check_shapefile(gdf=road, gdf_name='road', 
                                    geom_type='Line', 
                                    delhi_bounds_filepath=delhi_bounds_filepath)

road has duplicate rows: False
----------------------------------------------------
rows with invalid geometries 

----------------------------------------------------
all geometries in road are of type Line: True
----------------------------------------------------
Rows with None value in geometry column are below
Empty GeoDataFrame
Columns: [FID, RD_NM, RD_CLS, RD_LANES, RD_TP_SRF, RD_MB, RD_ONEWAY, EL_GND, DIST_NM, ONEWAY, Speed_kmph, geometry, geom_type]
Index: []
----------------------------------------------------
road shapefile is contained within Delhi: True
----------------------------------------------------
Done with shapefile evaluation


In [25]:
spatial_index_utils.check_shapefile(gdf=police, gdf_name='police', 
                                    geom_type='Point', 
                                    delhi_bounds_filepath=delhi_bounds_filepath)

police has duplicate rows: False
----------------------------------------------------
rows with invalid geometries 

----------------------------------------------------
all geometries in police are of type Point: True
----------------------------------------------------
Rows with None value in geometry column are below
Empty GeoDataFrame
Columns: [NAME, POLICE_STA, DISTRICT, x, y, geometry, geom_type]
Index: []
----------------------------------------------------
police shapefile is contained within Delhi: True
----------------------------------------------------
Done with shapefile evaluation


In [26]:
spatial_index_utils.check_shapefile(gdf=ration, gdf_name='ration', 
                                    geom_type='Point', 
                                    delhi_bounds_filepath=delhi_bounds_filepath)

ration has duplicate rows: False
----------------------------------------------------
rows with invalid geometries 

----------------------------------------------------
all geometries in ration are of type Point: True
----------------------------------------------------
Rows with None value in geometry column are below
Empty GeoDataFrame
Columns: [S No., License No, FPS ID, Circle, FPS Shop N, Address Of, Latitude, Longitude, Source, geometry, geom_type]
Index: []
----------------------------------------------------
ration shapefile is contained within Delhi: True
----------------------------------------------------
Done with shapefile evaluation


In [27]:
spatial_index_utils.check_shapefile(gdf=school, gdf_name='school', 
                                    geom_type='Point', 
                                    delhi_bounds_filepath=delhi_bounds_filepath)

school has duplicate rows: False
----------------------------------------------------
rows with invalid geometries 

----------------------------------------------------
all geometries in school are of type Point: True
----------------------------------------------------
Rows with None value in geometry column are below
Empty GeoDataFrame
Columns: [objectid_1, objectid, vilname, schname, schcd, schcat, school_cat, pincode, rururb, location, schtype, school_typ, schmgt, management, dtname, stname, stcode11, dtcode11, sdtcode11, sdtname, geometry, geom_type]
Index: []

[0 rows x 22 columns]
----------------------------------------------------
school shapefile is contained within Delhi: True
----------------------------------------------------
Done with shapefile evaluation


In [28]:
spatial_index_utils.check_shapefile(gdf=transport, gdf_name='transport', 
                                    geom_type='Point', 
                                    delhi_bounds_filepath=delhi_bounds_filepath)

transport has duplicate rows: False
----------------------------------------------------
rows with invalid geometries 

----------------------------------------------------
all geometries in transport are of type Point: True
----------------------------------------------------
Rows with None value in geometry column are below
Empty GeoDataFrame
Columns: [stop_id, stop_name, stop_lat, stop_lon, Type, geometry, geom_type]
Index: []
----------------------------------------------------
transport shapefile is contained within Delhi: True
----------------------------------------------------
Done with shapefile evaluation


## Check CRS (all shapefiles should be in EPSG: 7760)

In [29]:
bank.crs == health.crs == road.crs == police.crs == ration.crs == school.crs == transport.crs

True

In [30]:
bank.crs

<Projected CRS: EPSG:7760>
Name: WGS 84 / Delhi
Axis Info [cartesian]:
- X[east]: Easting (metre)
- Y[north]: Northing (metre)
Area of Use:
- name: India - Delhi national capital territory.
- bounds: (76.83, 28.4, 77.34, 28.89)
Coordinate Operation:
- name: Delhi NSF LCC
- method: Lambert Conic Conformal (2SP)
Datum: World Geodetic System 1984 ensemble
- Ellipsoid: WGS 84
- Prime Meridian: Greenwich

In [31]:
colonies_bbox_nbrs.crs == bank.crs

True

## Define Point and Line Services

In [32]:
# Define all point services as dictionary
# makes it easier to calculate all point
# services with one function
point_services = {'bank': bank,
                  'health': health,
                  'police': police,
                  'ration': ration,
                  'school': school,
                  'transport': transport}

line_services = {'road': road}

## Calculate all service indices in one function

In [33]:
from spatial_index_utils import calc_all_services

### Calculate PSI for bbox neighbors using Population Size

In [34]:
colonies_bbox_psi_popsize = calc_all_services(polygon_gdf = colonies_bbox_nbrs, 
                                       point_services = point_services, 
                                       line_services = line_services, 
                                       epsg_code = epsg_code, 
                                       pcen_denom = "pop",
                                       nbr_dist_colname = 'nbrs_dist_bbox')

colonies_bbox_psi_popsize = colonies_bbox_psi_popsize.rename(columns={'road_count':'road_length'})

GeoDataFrame now has the following CRS:

epsg:7760
bank service index is completed
--------------------------------------------------------
GeoDataFrame now has the following CRS:

epsg:7760
health service index is completed
--------------------------------------------------------
GeoDataFrame now has the following CRS:

epsg:7760
police service index is completed
--------------------------------------------------------
GeoDataFrame now has the following CRS:

epsg:7760
ration service index is completed
--------------------------------------------------------
GeoDataFrame now has the following CRS:

epsg:7760
school service index is completed
--------------------------------------------------------
GeoDataFrame now has the following CRS:

epsg:7760
transport service index is completed
--------------------------------------------------------
all point services completed
GeoDataFrame now has the following CRS:

epsg:7760
road service is completed


In [35]:
colonies_bbox_psi_popsize.to_csv('./psi_2020_results/delhi_psi_bbox_popsize2020_norv_29Aug2021.csv')
with open('./psi_2020_results/colonies_bbox_psi_popsize2020_norv_29Aug2021.pkl', 'wb') as f:
    pickle.dump(colonies_bbox_psi_popsize, f)

colonies_bbox_psi_popsize.head()

Unnamed: 0,AREA,USO_AREA_U,HOUSETAX_C,USO_FINAL,geometry,area_km2,canal,railway,drain,barrier,...,school_pcen,school_idx,transport_count,transport_pcen,transport_idx,road_length,road_pcen,road_idx,unnorm_psi,norm_psi
0,NEW DELHI 36,5584,,Planned,"POLYGON Z ((1020282.788 996796.773 0.000, 1020...",1.966739,False,True,True,True,...,0.00464,0.018139,8,0.008235,0.004956,3.903855,0.003717,0.005086,0.009798,0.017123
1,NEW DELHI 36,5584,,Planned,"POLYGON Z ((1020282.788 996796.773 0.000, 1020...",1.966739,False,True,True,True,...,0.00455,0.017786,8,0.008074,0.00486,0.0,0.002573,0.00352,0.009398,0.016423
2,NEW DELHI 35,5585,,Planned,"POLYGON Z ((1019724.475 994932.797 0.000, 1019...",0.036429,False,False,False,False,...,0.025344,0.099066,0,0.001914,0.001152,0.0,0.001126,0.001541,0.046083,0.080534
3,NEW DELHI 35,5585,,Planned,"POLYGON Z ((1019724.475 994932.797 0.000, 1019...",0.036429,False,False,False,False,...,0.003109,0.012151,0,0.000235,0.000141,0.0,0.000138,0.000189,0.005652,0.009878
4,NEW DELHI 34,5586,,Planned,"POLYGON Z ((1019571.955 994876.019 0.000, 1019...",0.230739,False,False,True,True,...,0.009387,0.036693,18,0.009512,0.005725,0.0,0.000681,0.000932,0.015968,0.027905


### Calculate PSI for bbox neighbors using Population Density

In [36]:
colonies_bbox_psi_popdensity = calc_all_services(polygon_gdf = colonies_bbox_nbrs, 
                                       point_services = point_services, 
                                       line_services = line_services, 
                                       epsg_code = epsg_code, 
                                       pcen_denom = "popdensity",
                                       nbr_dist_colname = 'nbrs_dist_bbox')

colonies_bbox_psi_popdensity = colonies_bbox_psi_popdensity.rename(columns={'road_count':'road_length'})

# Provisional Save
#colonies_bbox_psi_popdensity.to_file('./psi_2019_results/delhi_psi_bbox_popdensity2019_norv_29Aug2021.shp')
colonies_bbox_psi_popdensity.to_csv('./psi_2020_results/delhi_psi_bbox_popdensity2020_norv_29Aug2021.csv')
with open('./psi_2020_results/colonies_bbox_psi_popdensity2020_norv_29Aug2021.pkl', 'wb') as f:
    pickle.dump(colonies_bbox_psi_popdensity, f)
    
colonies_bbox_psi_popdensity.head()

GeoDataFrame now has the following CRS:

epsg:7760
bank service index is completed
--------------------------------------------------------
GeoDataFrame now has the following CRS:

epsg:7760
health service index is completed
--------------------------------------------------------
GeoDataFrame now has the following CRS:

epsg:7760
police service index is completed
--------------------------------------------------------
GeoDataFrame now has the following CRS:

epsg:7760
ration service index is completed
--------------------------------------------------------
GeoDataFrame now has the following CRS:

epsg:7760
school service index is completed
--------------------------------------------------------
GeoDataFrame now has the following CRS:

epsg:7760
transport service index is completed
--------------------------------------------------------
all point services completed
GeoDataFrame now has the following CRS:

epsg:7760
road service is completed


Unnamed: 0,AREA,USO_AREA_U,HOUSETAX_C,USO_FINAL,geometry,area_km2,canal,railway,drain,barrier,...,school_pcen,school_idx,transport_count,transport_pcen,transport_idx,road_length,road_pcen,road_idx,unnorm_psi,norm_psi
0,NEW DELHI 36,5584,,Planned,"POLYGON Z ((1020282.788 996796.773 0.000, 1020...",1.966739,False,True,True,True,...,0.009126,0.499906,8,0.016195,0.496241,3.903855,0.007311,0.690912,0.612867,1.0
1,NEW DELHI 36,5584,,Planned,"POLYGON Z ((1020282.788 996796.773 0.000, 1020...",1.966739,False,True,True,True,...,0.008949,0.490182,8,0.01588,0.486589,0.0,0.00506,0.478183,0.572476,0.934096
2,NEW DELHI 35,5585,,Planned,"POLYGON Z ((1019724.475 994932.797 0.000, 1019...",0.036429,False,False,False,False,...,0.000923,0.050572,0,7e-05,0.002136,0.0,4.1e-05,0.003877,0.030854,0.050343
3,NEW DELHI 35,5585,,Planned,"POLYGON Z ((1019724.475 994932.797 0.000, 1019...",0.036429,False,False,False,False,...,0.000113,0.006203,0,9e-06,0.000262,0.0,5e-06,0.000475,0.003784,0.006175
4,NEW DELHI 34,5586,,Planned,"POLYGON Z ((1019571.955 994876.019 0.000, 1019...",0.230739,False,False,True,True,...,0.002166,0.118642,18,0.002195,0.067247,0.0,0.000157,0.014846,0.123561,0.201612


## Inspecting PSI results

In [37]:
count_cols = [colname for colname in colonies_bbox_psi_popdensity.columns if colname.endswith('_count')]
for count_col in count_cols:
    print('There are', len(colonies_bbox_psi_popdensity[colonies_bbox_psi_popdensity[count_col] < 0]), 'negative values in', count_col, 'column')

There are 0 negative values in bank_count column
There are 0 negative values in health_count column
There are 0 negative values in police_count column
There are 0 negative values in ration_count column
There are 0 negative values in school_count column
There are 0 negative values in transport_count column


In [38]:
pcen_cols = [colname for colname in colonies_bbox_psi_popdensity.columns if colname.endswith('_pcen')]
for pcen_col in pcen_cols:
    print('There are', len(colonies_bbox_psi_popdensity[colonies_bbox_psi_popdensity[pcen_col] < 0]), 'negative values in', pcen_col, 'column')

There are 0 negative values in bank_pcen column
There are 0 negative values in health_pcen column
There are 0 negative values in police_pcen column
There are 0 negative values in ration_pcen column
There are 0 negative values in school_pcen column
There are 0 negative values in transport_pcen column
There are 0 negative values in road_pcen column


In [39]:
idx_cols = [colname for colname in colonies_bbox_psi_popdensity.columns if colname.endswith('_idx')]
for idx_col in idx_cols:
    print('There are', len(colonies_bbox_psi_popdensity[colonies_bbox_psi_popdensity[idx_col] < 0]), 'negative values in', idx_col, 'column')

There are 0 negative values in bank_idx column
There are 0 negative values in health_idx column
There are 0 negative values in police_idx column
There are 0 negative values in ration_idx column
There are 0 negative values in school_idx column
There are 0 negative values in transport_idx column
There are 0 negative values in road_idx column


In [40]:
colonies_bbox_psi_popdensity['bank_idx'].describe()

count    4156.000000
mean        0.014509
std         0.051665
min         0.000000
25%         0.000309
50%         0.002748
75%         0.011349
max         1.000000
Name: bank_idx, dtype: float64

In [41]:
colonies_bbox_psi_popdensity['unnorm_psi'].describe()

count    4156.000000
mean        0.014445
std         0.037375
min         0.000000
25%         0.000815
50%         0.004628
75%         0.014989
max         0.612867
Name: unnorm_psi, dtype: float64

In [42]:
colonies_bbox_psi_popdensity['norm_psi'].describe()

count    4156.000000
mean        0.023569
std         0.060983
min         0.000000
25%         0.001330
50%         0.007551
75%         0.024457
max         1.000000
Name: norm_psi, dtype: float64

## Save Files

In [43]:
bbox_drop_columns = ['nbrs_bbox', 'nbrs_dist_bbox', 'centroid']

In [44]:
colonies_bbox_psi_popsize.drop(columns=bbox_drop_columns).to_file('./psi_2020_results/delhi_psi_bbox_popsize2020_norv_29Aug2021.shp')
colonies_bbox_psi_popdensity.drop(columns=bbox_drop_columns).to_file('./psi_2020_results/delhi_psi_bbox_popdensity2020_norv_29Aug2021.shp')

  colonies_bbox_psi_popsize.drop(columns=bbox_drop_columns).to_file('./psi_2020_results/delhi_psi_bbox_popsize2020_norv_29Aug2021.shp')
  colonies_bbox_psi_popdensity.drop(columns=bbox_drop_columns).to_file('./psi_2020_results/delhi_psi_bbox_popdensity2020_norv_29Aug2021.shp')


In [45]:
colonies_bbox_psi_popsize.to_csv('./psi_2020_results/delhi_psi_bbox_popsize2020_norv_29Aug2021.csv')
colonies_bbox_psi_popdensity.to_csv('./psi_2020_results/delhi_psi_bbox_popdensity2020_norv_29Aug2021.csv')

In [46]:
with open('./psi_2020_results/colonies_bbox_psi_popsize2020_norv_29Aug2021.pkl', 'wb') as f:
    pickle.dump(colonies_bbox_psi_popsize, f)
    
with open('./psi_2020_results/colonies_bbox_psi_popdensity2020_norv_29Aug2021.pkl', 'wb') as f:
    pickle.dump(colonies_bbox_psi_popdensity, f)