In [1]:
import os
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import json
import geopandas as gpd
from random import choice, uniform
from shapely.geometry import Point, MultiPolygon

city = "banglore"

## Geospatial data of each ward in Banglore

In [2]:
with open("data/base/blr_populationDistribution_by_area.json", "r") as jsonData:
    data = json.load(jsonData)
    for i in range(len(data['features'])):
        data['features'][i]['properties']['WARD_NO'] = int(data['features'][i]['properties']['WARD_NO'])
        del(data['features'][i]['properties']['ASS_CONST_']) 
        del(data['features'][i]['properties']['ASS_CONST1']) 
        del(data['features'][i]['properties']['POP_M'])
        del(data['features'][i]['properties']['POP_F'])
        del(data['features'][i]['properties']['POP_SC'])
        del(data['features'][i]['properties']['POP_ST'])
        del(data['features'][i]['properties']['POP_TOTAL'])
        del(data['features'][i]['properties']['AREA_SQ_KM'])
        del(data['features'][i]['properties']['RESERVATIO'])

with open("data/"+city+"_citymap.geojson", "w") as mapData:
    json.dump(data, mapData)

#read GeoJSON as geopandas df
blrDF = gpd.read_file("data/"+city+"_citymap.geojson")
blrDF = blrDF.rename(columns={"WARD_NO": "Ward No."})

## Find all neighbouring wards

In [3]:
blrDF["Neighbors"] = None

for index, row in blrDF.iterrows():  
    # get 'not disjoint' countries
    neighbors = blrDF[~blrDF.geometry.disjoint(row.geometry)]['Ward No.'].tolist()

    # remove own name from the list
    neighbors = [ ward for ward in neighbors if row['Ward No.'] != ward ]
    
    # add names of neighbors as NEIGHBORS value
    blrDF.at[index, "Neighbors"] = ", ".join(str(e) for e in neighbors)
    
wardNeighbors = blrDF[['Ward No.', 'Neighbors']]

## Demographic data of Banglore

The data for the city of bengaluru is collected from the [smartcities.data.gov.in/](https://smartcities.data.gov.in/catalogsv2?format=json&offset=0&limit=9&query=bengaluru&sort%5B_score%5D=desc) portal.This data was collected during the 2011 Census and could have some errors (as on current date, due to factors like migration, mortality, etc.)

In order to download the data for other cities you will need to sign-up for an account. The data are available in different formats and for the ease to work with the prospective JS simulator, we have got the data as JSON.

The data we are interested to use is the demographic data which classify population grouped by age and area. 

In [4]:
with open("data/base/blr_demographics_2011.json", "r") as jsonData:
    data=json.load(jsonData)

#Get the  column names
colNames= []
for field in data['fields']:
    colNames.append(field['label'])

#Create a dataframe of the values
demographics = pd.read_json(json.dumps(data['data']))

#Refactoring Dataset
demographics.rename(columns = dict(zip(demographics.columns, colNames)),inplace=True)
demographics = demographics.drop(columns={'City Name', 'Zone Name','Population - Male (in thousands)', 'Population - female (in thousands)', 'population - children aged 0-14 (in thousands)', 'Population - youth aged 15-24 (in thousands)', 'Population - adults aged 25-60 (in thousands)', 'Population - Senior citizens aged 60+ (in thousands)'})

#add population density per ward
demographics['Population Density'] = demographics.apply(lambda row: row['Total Population (in thousands)']/ row['Area (in sq km)'], axis=1)


#add bounding box for ward and join households with demographics on Ward No.
blrDF['Ward Bound'] = blrDF.apply(lambda row: MultiPolygon(row['geometry']).bounds, axis=1)

wardBounds = blrDF[['Ward No.', 'Ward Bound']]
demographics = pd.merge(demographics, wardBounds, on="Ward No.", how="left")
del wardBounds

## Adding Households based on area

We add the data on the number of households based on the 2011 census data.


The averge household size for [Banglore](http://censusindia.gov.in/2011census/hh-series/HH-1/DDW-HH01-2900-2011.XLS) based on the Census Data is $4.0$. In order to get fine-grained data of household size per ward, we shall use compute the mean household size based on the data as, $$\text{Mean Household Size} = \frac{\text{Total Population per Ward}}{\text{Total Households per Ward}}$$

The mean of the computed household size of $4.078576$ matches with the average household size given on the census data

In [5]:
with open("data/base/blr_numberOfHouseholds_2011.json", "r") as jsonData:
    data = json.load(jsonData)

#Get the  column names
colNames= []
for field in data['fields']:
    colNames.append(field['label'])

#Create a dataframe of the values
households = pd.read_json(json.dumps(data['data']))

#Replace column indices with columnNames
households.rename(columns = dict(zip(households.columns, colNames)),inplace=True)
households = households.drop(columns=['City Name', 'Zone Name', 'Ward Name'])

#set Ward No. as index to the dataframe
households.set_index("Ward No.")

#join households with demographics on Ward No.
demographics = pd.merge(demographics, households, on="Ward No.", how="left")
del households

#add average household size per ward
demographics['Mean Household Size'] = demographics.apply(lambda row: row['Total Population (in thousands)']/ row['Total no. of Households'], axis = 1)

## Household and Age Distribution for Proportionated Population per Ward

The 2011 Census data for India has age distribution state-wise, we get the data for Karnataka. In the data, the age-bins are for 5 years, but the Imperial college paper has bins of 10 year age distribution, thus we create bins of 10 years by adding the two columns of the dataset to get the % of population belonging to a age-group. This is considered to be constant from 2011 to 2020 by assumption. Ideally, we would use a decade-growth rate which will be for the next iteration. The household size distribution data for [Banglore](http://censusindia.gov.in/2011census/hh-series/HH-1/DDW-HH01CITY-2900-2011.XLS).

<table class="waffle" cellspacing="0" cellpadding="0"><tbody><tr style="height:20px;"><th id="708093901R0" style="height: 20px;" class="row-headers-background"><div class="row-header-wrapper" style="line-height: 20px;">1</div></th><td class="s0" dir="ltr">Age group</td><td class="s1" dir="ltr">2011 pop</td><td class="s1" dir="ltr">age-10 bins</td></tr><tr style="height:20px;"><th id="708093901R1" style="height: 20px;" class="row-headers-background"><div class="row-header-wrapper" style="line-height: 20px;">2</div></th><td class="s0" dir="ltr">0-4</td><td class="s1" dir="ltr">8.3</td><td class="s1" dir="ltr" rowspan="2">16.9</td></tr><tr style="height:20px;"><th id="708093901R2" style="height: 20px;" class="row-headers-background"><div class="row-header-wrapper" style="line-height: 20px;">3</div></th><td class="s0" dir="ltr">5-9</td><td class="s1" dir="ltr">8.6</td></tr><tr style="height:20px;"><th id="708093901R3" style="height: 20px;" class="row-headers-background"><div class="row-header-wrapper" style="line-height: 20px;">4</div></th><td class="s0" dir="ltr">10-14</td><td class="s1" dir="ltr">9.4</td><td class="s1" dir="ltr" rowspan="2">18.9</td></tr><tr style="height:20px;"><th id="708093901R4" style="height: 20px;" class="row-headers-background"><div class="row-header-wrapper" style="line-height: 20px;">5</div></th><td class="s0" dir="ltr">15-19</td><td class="s1" dir="ltr">9.5</td></tr><tr style="height:20px;"><th id="708093901R5" style="height: 20px;" class="row-headers-background"><div class="row-header-wrapper" style="line-height: 20px;">6</div></th><td class="s0" dir="ltr">20-24</td><td class="s1" dir="ltr">9.9</td><td class="s1" dir="ltr" rowspan="2">19.3</td></tr><tr style="height:20px;"><th id="708093901R6" style="height: 20px;" class="row-headers-background"><div class="row-header-wrapper" style="line-height: 20px;">7</div></th><td class="s0" dir="ltr">25-29</td><td class="s1" dir="ltr">9.4</td></tr><tr style="height:20px;"><th id="708093901R7" style="height: 20px;" class="row-headers-background"><div class="row-header-wrapper" style="line-height: 20px;">8</div></th><td class="s0" dir="ltr">30-34</td><td class="s1" dir="ltr">7.7</td><td class="s1" dir="ltr" rowspan="2">15.4</td></tr><tr style="height:20px;"><th id="708093901R8" style="height: 20px;" class="row-headers-background"><div class="row-header-wrapper" style="line-height: 20px;">9</div></th><td class="s0" dir="ltr">35-39</td><td class="s1" dir="ltr">7.7</td></tr><tr style="height:20px;"><th id="708093901R9" style="height: 20px;" class="row-headers-background"><div class="row-header-wrapper" style="line-height: 20px;">10</div></th><td class="s0" dir="ltr">40-44</td><td class="s1" dir="ltr">6.3</td><td class="s1" dir="ltr" rowspan="2">12.1</td></tr><tr style="height:20px;"><th id="708093901R10" style="height: 20px;" class="row-headers-background"><div class="row-header-wrapper" style="line-height: 20px;">11</div></th><td class="s0" dir="ltr">45-49</td><td class="s1" dir="ltr">5.8</td></tr><tr style="height:20px;"><th id="708093901R11" style="height: 20px;" class="row-headers-background"><div class="row-header-wrapper" style="line-height: 20px;">12</div></th><td class="s0" dir="ltr">50-54</td><td class="s1" dir="ltr">4.4</td><td class="s1" dir="ltr" rowspan="2">7.9</td></tr><tr style="height:20px;"><th id="708093901R12" style="height: 20px;" class="row-headers-background"><div class="row-header-wrapper" style="line-height: 20px;">13</div></th><td class="s0" dir="ltr">55-59</td><td class="s1" dir="ltr">3.5</td></tr><tr style="height:20px;"><th id="708093901R13" style="height: 20px;" class="row-headers-background"><div class="row-header-wrapper" style="line-height: 20px;">14</div></th><td class="s0" dir="ltr">60-64</td><td class="s1" dir="ltr">3.4</td><td class="s1" dir="ltr" rowspan="2">5.9</td></tr><tr style="height:20px;"><th id="708093901R14" style="height: 20px;" class="row-headers-background"><div class="row-header-wrapper" style="line-height: 20px;">15</div></th><td class="s0" dir="ltr">65-69</td><td class="s1" dir="ltr">2.5</td></tr><tr style="height:20px;"><th id="708093901R15" style="height: 20px;" class="row-headers-background"><div class="row-header-wrapper" style="line-height: 20px;">16</div></th><td class="s0" dir="ltr">70-74</td><td class="s1" dir="ltr">1.7</td><td class="s1" dir="ltr" rowspan="2">2.6</td></tr><tr style="height:20px;"><th id="708093901R16" style="height: 20px;" class="row-headers-background"><div class="row-header-wrapper" style="line-height: 20px;">17</div></th><td class="s0" dir="ltr">75-79</td><td class="s1" dir="ltr">0.9</td></tr><tr style="height:20px;"><th id="708093901R17" style="height: 20px;" class="row-headers-background"><div class="row-header-wrapper" style="line-height: 20px;">18</div></th><td class="s0" dir="ltr">80-84</td><td class="s1" dir="ltr">0.6</td><td class="s1" dir="ltr" rowspan="6">1</td></tr><tr style="height:20px;"><th id="708093901R18" style="height: 20px;" class="row-headers-background"><div class="row-header-wrapper" style="line-height: 20px;">19</div></th><td class="s0" dir="ltr">85-89</td><td class="s1" dir="ltr">0.2</td></tr><tr style="height:20px;"><th id="708093901R19" style="height: 20px;" class="row-headers-background"><div class="row-header-wrapper" style="line-height: 20px;">20</div></th><td class="s0" dir="ltr">90-94</td><td class="s1" dir="ltr">0.1</td></tr><tr style="height:20px;"><th id="708093901R20" style="height: 20px;" class="row-headers-background"><div class="row-header-wrapper" style="line-height: 20px;">21</div></th><td class="s0" dir="ltr">95-99</td><td class="s1" dir="ltr">0.1</td></tr><tr style="height:20px;"><th id="708093901R21" style="height: 20px;" class="row-headers-background"><div class="row-header-wrapper" style="line-height: 20px;">22</div></th><td class="s0" dir="ltr">100+</td><td class="s1" dir="ltr">0</td></tr><tr style="height:20px;"><th id="708093901R22" style="height: 20px;" class="row-headers-background"><div class="row-header-wrapper" style="line-height: 20px;">23</div></th><td class="s0" dir="ltr">not stated</td><td class="s1" dir="ltr">0.1</td></tr></tbody></table>

In [6]:
population_2011 = sum(demographics['Total Population (in thousands)'].values)
target_population = 100000

scalingFactor = target_population/population_2011

demographics['new Population'] = demographics.apply(lambda row: row['Total Population (in thousands)'] * scalingFactor, axis = 1)
demographics['age 0-9'] = demographics.apply(lambda row: (row['new Population'] * 16.9)/100, axis = 1)
demographics['age 10-19'] = demographics.apply(lambda row: (row['new Population'] * 18.9)/100, axis = 1)
demographics['age 20-29'] = demographics.apply(lambda row: (row['new Population'] * 19.3)/100, axis = 1)
demographics['age 30-39'] = demographics.apply(lambda row: (row['new Population'] * 15.4)/100, axis = 1)
demographics['age 40-49'] = demographics.apply(lambda row: (row['new Population'] * 12.1)/100, axis = 1)
demographics['age 50-59'] = demographics.apply(lambda row: (row['new Population'] * 7.9)/100, axis = 1)
demographics['age 60-69'] = demographics.apply(lambda row: (row['new Population'] * 5.9)/100, axis = 1)
demographics['age 70-79'] = demographics.apply(lambda row: (row['new Population'] * 2.6)/100, axis = 1)
demographics['age 80+'] = demographics.apply(lambda row: (row['new Population'] * 1)/100, axis = 1)

#add scaled household size per ward
demographics['Scaled Number of Households'] = demographics.apply(lambda row: row['new Population']/ demographics['Mean Household Size'].describe()['mean'], axis = 1)

number_of_houses = sum(demographics['Scaled Number of Households'].values)

scalingFactor1 = sum(demographics['Total no. of Households'].values) / sum(demographics['Scaled Number of Households'].values)
householdSizes = ["1", "2", "3", "4", "5", "6", "7-10", "11-14", "15+"]
householdDistributions = [87638, 274556, 467839, 646184, 321320, 152359, 135412, 11346, 3088]
newDistribution = [i*scalingFactor1 for i in householdDistributions]

weights = [x/number_of_houses for x in householdDistributions]

## Instantiate Community Centre for Each Ward

In [7]:
ward = []
location = []

def assignCC(row):
    lon, lat = MultiPolygon(row['geometry']).centroid.coords.xy
    ward.append(row['Ward No.'])
    location.append((lon[0], lat[0])) 


cc = pd.DataFrame()
blrDF.apply(assignCC, axis=1)

cc['Ward No.'] = ward
cc['location'] = location


del ward, location

## Instantiate houses with location in ward

In [8]:
houses = pd.DataFrame()


location = []
ward = []

def assignHouses(row):
    bounds = row['Ward Bound']
    households = int(row['Scaled Number of Households'])
    lon1 = bounds[0]
    lat1 = bounds[1]
    lon2 = bounds[2]
    lat2 = bounds[3]
    
    for house in range(households):
        ward.append(row['Ward No.'])
        location.append((uniform(lon1, lon2), uniform(lat1, lat2)))

        
demographics.apply(assignHouses, axis=1)


houses['ward'] = ward
houses['location'] = location

del ward, location

## Instantiate schools in each ward

In [9]:
schools = pd.DataFrame()


ward = []
location = []

averageStudents = 300

def addSchools(row):

    bounds = row['Ward Bound']
    totalSchools = int(np.ceil(((row['age 0-9']/6) + (row['age 20-29']/4) + row['age 10-19'])/ averageStudents))

    lon1 = bounds[0]
    lat1 = bounds[1]
    lon2 = bounds[2]
    lat2 = bounds[3]
    
    schools = []
    for school in range(totalSchools):
        ward.append(row['Ward No.'])
        location.append((uniform(lon1, lon2), uniform(lat1, lat2)))

        
demographics.apply(addSchools, axis=1)


schools['ward'] = ward
schools['location'] = location

del ward, location

## Adding Workplaces - Offices and Community Space on GeoMap

Number of Community Space = 1/ward 

In [10]:
averageWorking = 50

workspaces = pd.DataFrame()

temp_id = []
ward = []
location = []

allPolygons = blrDF['geometry'].values
wardNo = blrDF['Ward No.'].values


totalWorkingPopulation = (float(demographics[['age 20-29']].sum())/4)+\
                         float(demographics[['age 30-39']].sum())+\
                         float(demographics[['age 40-49']].sum())+\
                         float(demographics[['age 50-59']].sum())

workspaceNeeded = totalWorkingPopulation/ averageWorking

for space in range(int(workspaceNeeded)):

    boundIndex = int(uniform(0, len(allPolygons)))
    bounds = MultiPolygon(allPolygons[boundIndex]).bounds
    
    lon1 = bounds[0]
    lat1 = bounds[1]
    lon2 = bounds[2]
    lat2 = bounds[3]

    ward.append(wardNo[boundIndex])
    location.append((uniform(lon1, lon2), uniform(lat1, lat2)))

    

workspaces['ward'] = ward
workspaces['location'] = location

del ward, location

## Instantiate people in the city by household 

In [11]:
ppl = pd.read_json("data/banglore_generated_individuals.json") 
lat = []
lon = []
ward = []

def assignHouse(row):
    #randomly assign houses
    houseIndex = row['household']
    pos = houses['location'].values[houseIndex]
    lat.append(pos[1])
    lon.append(pos[0])
    ward.append(houses['ward'].values[houseIndex])
    
    
ppl.apply(assignHouse, axis=1)
ppl['lat'] = lat
ppl['lon'] = lon
ppl['ward'] = ward

ppl

Unnamed: 0,id,age,household,lat,lon,ward
0,0,18,21569,12.918518,77.578316,179
1,1,18,19944,12.938781,77.558273,164
2,2,11,8090,13.011115,77.606527,61
3,3,12,22204,12.908909,77.544485,184
4,4,18,203,13.117118,77.591395,2
...,...,...,...,...,...,...
99995,99995,64,22424,12.900901,77.573354,186
99996,99996,21,20401,12.926321,77.594961,170
99997,99997,36,757,13.064987,77.628086,6
99998,99998,25,12659,12.990684,77.549344,99


## Dump of static data as json

In [12]:
wardNeighbors.to_json("data/"+city+"_wardNeighbors.json", orient='records')
houses.to_json("data/"+city+"_houses.json", orient='index')
schools.to_json("data/"+city+"_schools.json", orient='index')
workspaces.to_json("data/"+city+"_workspaces.json", orient='index')
cc.to_json("data/"+city+"_cc.json", orient='index')
ppl.to_json("data/"+city+"_individuals.json", orient='index')