In [1]:
import geopandas as gpd
import pandas as pd
from requests.auth import HTTPBasicAuth
import requests
import json
import plotly.express as px
import os
import rtree
import numpy as np
from shapely.geometry import Polygon


In [3]:
# Load the population data
pop = gpd.read_file("data/population/kontur_population_20220630.gpkg")
pop = pop.to_crs("EPSG:4326")


In [5]:
world_grid = gpd.read_file("data/join_experiments/world_template_grid.geojson")


In [55]:
# add a column to world grid with the string "gid"+a number
world_grid["grid-id"] = "gid-" + world_grid.index.astype(str)
world_grid

Unnamed: 0,geometry,grid-id
0,"POLYGON ((-92.24149 -26.86868, -92.14149 -26.8...",gid-0
1,"POLYGON ((-92.24149 -26.76868, -92.14149 -26.7...",gid-1
2,"POLYGON ((-92.24149 -26.66868, -92.14149 -26.6...",gid-2
3,"POLYGON ((-92.24149 -26.56868, -92.14149 -26.5...",gid-3
4,"POLYGON ((-92.24149 -26.46868, -92.14149 -26.4...",gid-4
...,...,...
1093483,"POLYGON ((74.85851 38.03132, 74.95851 38.03132...",gid-1093483
1093484,"POLYGON ((74.85851 38.13132, 74.95851 38.13132...",gid-1093484
1093485,"POLYGON ((74.85851 38.23132, 74.95851 38.23132...",gid-1093485
1093486,"POLYGON ((74.85851 38.33132, 74.95851 38.33132...",gid-1093486


In [56]:
# write world grid to GeoJSON
world_grid.to_file("data/join_experiments/world_template_grid.geojson", driver="GeoJSON")

In [4]:
fewsnet_grid = gpd.read_file(
    "data/fewsnet_grid/all_fewsnet_template_grid.geojson")


In [7]:
pop_grid = world_grid.sjoin(pop, how="left")


In [6]:
pop


Unnamed: 0,h3,population,geometry
0,88f3a6db3bfffff,1.0,"POLYGON ((167.11548 -77.85778, 167.09603 -77.8..."
1,88f3a6db17fffff,1.0,"POLYGON ((167.14891 -77.85546, 167.12947 -77.8..."
2,88f2a40257fffff,1.0,"POLYGON ((-167.46486 -83.64520, -167.47532 -83..."
3,88f1b4575dfffff,1.0,"POLYGON ((123.34808 -75.10658, 123.33761 -75.1..."
4,88f1b45755fffff,2.0,"POLYGON ((123.31830 -75.10299, 123.30784 -75.0..."
...,...,...,...
32589548,8800189467fffff,8.0,"POLYGON ((47.53472 80.79723, 47.56455 80.79766..."
32589549,8800189465fffff,3.0,"POLYGON ((47.54202 80.78878, 47.57183 80.78921..."
32589550,880018942dfffff,1.0,"POLYGON ((47.57583 80.80210, 47.60568 80.80253..."
32589551,8800189425fffff,2.0,"POLYGON ((47.61699 80.80697, 47.64686 80.80740..."


In [8]:
# save pop_grid to geojson
pop_grid.to_file("data/population/world_pop_grid.geojson", driver="GeoJSON")


In [9]:
pop_grid


Unnamed: 0,geometry,index_right,h3,population
0,"POLYGON ((-92.24149 -26.86868, -92.14149 -26.8...",,,
1,"POLYGON ((-92.24149 -26.76868, -92.14149 -26.7...",,,
2,"POLYGON ((-92.24149 -26.66868, -92.14149 -26.6...",,,
3,"POLYGON ((-92.24149 -26.56868, -92.14149 -26.5...",,,
4,"POLYGON ((-92.24149 -26.46868, -92.14149 -26.4...",,,
...,...,...,...,...
1093484,"POLYGON ((74.85851 38.13132, 74.95851 38.13132...",27418448.0,88208801cbfffff,4.0
1093484,"POLYGON ((74.85851 38.13132, 74.95851 38.13132...",27418454.0,8820880159fffff,1.0
1093485,"POLYGON ((74.85851 38.23132, 74.95851 38.23132...",27418446.0,882088022bfffff,1.0
1093486,"POLYGON ((74.85851 38.33132, 74.95851 38.33132...",,,


In [35]:
# remove nan rows
pop_grid_clean = pop_grid.dropna(subset=["population"])


In [13]:
pop_grid_clean.to_file(
    "data/population/world_pop_grid_clean.geojson", driver="GeoJSON")


In [14]:
fewsnet_grid


Unnamed: 0,geometry
0,"POLYGON ((-90.84149 14.83132, -90.74149 14.831..."
1,"POLYGON ((-90.44149 14.93132, -90.34149 14.931..."
2,"POLYGON ((-73.34149 18.43132, -73.24149 18.431..."
3,"POLYGON ((-91.74149 15.83132, -91.64149 15.831..."
4,"POLYGON ((-89.74149 14.23132, -89.64149 14.231..."
...,...
152039,"POLYGON ((54.15851 12.43132, 54.25851 12.43132..."
152040,"POLYGON ((54.15851 12.53132, 54.25851 12.53132..."
152041,"POLYGON ((54.25851 12.43132, 54.35851 12.43132..."
152042,"POLYGON ((54.25851 12.53132, 54.35851 12.53132..."


In [15]:
all_fewsnet_grid = gpd.read_file(
    "data/fewsnet_grid/all_fewsnet_grid_ffill.geojson")


In [16]:
all_fewsnet_centroid = gpd.read_file(
    "data/fewsnet_grid/all_fewsnet_centroid_ffill.geojson")


In [17]:
all_fewsnet_centroid


Unnamed: 0,ADMIN0-2009-10,ADMIN0-2011-07,ADMIN0-2012-04,ADMIN0-2013-04,ADMIN0-2014-10,ADMIN0-2021-10_left,ADMIN0-2021-10_left_1,ADMIN0-2021-10_right,ADMIN0-2021-10_right_1,ADMIN0_lef-2010-07,...,index_righ-2014-10,report_mon-2021-10_left,report_mon-2021-10_left_1,report_mon-2021-10_right,report_mon-2021-10_right_1,unit_name-2021-10_left,unit_name-2021-10_left_1,unit_name-2021-10_right,unit_name-2021-10_right_1,geometry
0,,,,,,,,Uganda,,,...,,,,10-2021,,,,"National Park and Reserves, Uganda",,POINT (30.20851 -0.11868)
1,,,,,,,,Uganda,,,...,,,,10-2021,,,,"National Park and Reserves, Uganda",,POINT (31.90851 2.28132)
2,,,,,,,,Uganda,,,...,,,,10-2021,,,,"National Park and Reserves, Uganda",,POINT (32.00851 2.08132)
3,,,,,,,,Uganda,,,...,,,,10-2021,,,,"National Park and Reserves, Uganda",,POINT (33.90851 3.88132)
4,,,,,,,,Uganda,,,...,,,,10-2021,,,,"National Park and Reserves, Uganda",,POINT (31.80851 2.58132)
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
152039,,,,,,,,,,,...,,,,,,,,,,POINT (29.80851 -8.51868)
152040,,,,,,,,,,,...,,,,,,,,,,POINT (30.60851 -8.31868)
152041,,,,,,,,,,,...,,,,,,,,,,POINT (30.90851 -8.61868)
152042,,,,,,,,,,,...,,,,,,,,,,POINT (29.10851 -8.61868)


In [18]:
# # join the population data to the fewsnet grid
# fewsnet_pop_grid = fewsnet_grid.sjoin(pop_grid_clean, how="left")
pop_grid_clean


Unnamed: 0,geometry,index_right,h3,population
413,"POLYGON ((-92.24149 14.43132, -92.14149 14.431...",8080575.0,886d2a31b5fffff,36.0
413,"POLYGON ((-92.24149 14.43132, -92.14149 14.431...",8080577.0,886d2a31b1fffff,37.0
413,"POLYGON ((-92.24149 14.43132, -92.14149 14.431...",8080572.0,886d2a31bbfffff,25.0
413,"POLYGON ((-92.24149 14.43132, -92.14149 14.431...",8080209.0,886d2a3abbfffff,1.0
413,"POLYGON ((-92.24149 14.43132, -92.14149 14.431...",8080215.0,886d2a3a97fffff,167.0
...,...,...,...,...
1093484,"POLYGON ((74.85851 38.13132, 74.95851 38.13132...",27418449.0,88208801bdfffff,1.0
1093484,"POLYGON ((74.85851 38.13132, 74.95851 38.13132...",27418456.0,8820880103fffff,1.0
1093484,"POLYGON ((74.85851 38.13132, 74.95851 38.13132...",27418448.0,88208801cbfffff,4.0
1093484,"POLYGON ((74.85851 38.13132, 74.95851 38.13132...",27418454.0,8820880159fffff,1.0


In [23]:
# sum of population colum of rows with index values that match 413
test1 = pop_grid_clean.loc[pop_grid_clean.index == 413, "population"].sum()
test2 = pop_grid_clean.loc[pop_grid_clean.index == 1093484, "population"].sum()
print(test1, test2)


16101.0 108.0


In [30]:
pop_grid_clean.loc[pop_grid_clean.index == 1093484]


Unnamed: 0_level_0,geometry,index_right,h3,population
index_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1093484,"POLYGON ((74.85851 38.13132, 74.95851 38.13132...",27418458.0,8820880035fffff,84.0
1093484,"POLYGON ((74.85851 38.13132, 74.95851 38.13132...",27418457.0,882088003dfffff,1.0
1093484,"POLYGON ((74.85851 38.13132, 74.95851 38.13132...",27418459.0,8820880031fffff,1.0
1093484,"POLYGON ((74.85851 38.13132, 74.95851 38.13132...",27418453.0,8820880163fffff,4.0
1093484,"POLYGON ((74.85851 38.13132, 74.95851 38.13132...",27418450.0,88208801abfffff,10.0
1093484,"POLYGON ((74.85851 38.13132, 74.95851 38.13132...",27418455.0,8820880105fffff,1.0
1093484,"POLYGON ((74.85851 38.13132, 74.95851 38.13132...",27418449.0,88208801bdfffff,1.0
1093484,"POLYGON ((74.85851 38.13132, 74.95851 38.13132...",27418456.0,8820880103fffff,1.0
1093484,"POLYGON ((74.85851 38.13132, 74.95851 38.13132...",27418448.0,88208801cbfffff,4.0
1093484,"POLYGON ((74.85851 38.13132, 74.95851 38.13132...",27418454.0,8820880159fffff,1.0


In [36]:
pop_grid_clean.index.name = 'index_id'


In [37]:
# assign epsg to pop_grid_clean
pop_grid_clean = pop_grid_clean.to_crs("EPSG:4326")
all_fewsnet_centroid = all_fewsnet_centroid.to_crs("EPSG:4326")
all_fewsnet_grid = all_fewsnet_grid.to_crs("EPSG:4326")


I need to do what's in the cell below, but shouldnt' reset_index, and need to make a new df so i can re add the geometry column


In [38]:
# in pop_grid_clean combine rows with same geometry
pop_grid_summed = pop_grid_clean.groupby('index_id').sum()


In [42]:
# join pop_grid_summed and pop_grid_clean
result = pd.merge(pop_grid_clean, pop_grid_summed, on='index_id', how='left')
result


Unnamed: 0_level_0,geometry,index_right_x,h3,population_x,index_right_y,population_y
index_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
413,"POLYGON ((-92.24149 14.43132, -92.14149 14.431...",8080575.0,886d2a31b5fffff,36.0,420198882.0,16101.0
413,"POLYGON ((-92.24149 14.43132, -92.14149 14.431...",8080577.0,886d2a31b1fffff,37.0,420198882.0,16101.0
413,"POLYGON ((-92.24149 14.43132, -92.14149 14.431...",8080572.0,886d2a31bbfffff,25.0,420198882.0,16101.0
413,"POLYGON ((-92.24149 14.43132, -92.14149 14.431...",8080209.0,886d2a3abbfffff,1.0,420198882.0,16101.0
413,"POLYGON ((-92.24149 14.43132, -92.14149 14.431...",8080215.0,886d2a3a97fffff,167.0,420198882.0,16101.0
...,...,...,...,...,...,...
1093484,"POLYGON ((74.85851 38.13132, 74.95851 38.13132...",27418449.0,88208801bdfffff,1.0,274184539.0,108.0
1093484,"POLYGON ((74.85851 38.13132, 74.95851 38.13132...",27418456.0,8820880103fffff,1.0,274184539.0,108.0
1093484,"POLYGON ((74.85851 38.13132, 74.95851 38.13132...",27418448.0,88208801cbfffff,4.0,274184539.0,108.0
1093484,"POLYGON ((74.85851 38.13132, 74.95851 38.13132...",27418454.0,8820880159fffff,1.0,274184539.0,108.0


In [48]:
# keep the first row of each index_id value in pop_grid_clean
pop_grid_nodup = pop_grid_clean.drop_duplicates(
    subset=['geometry'], keep='first')


In [49]:
pop_grid_nodup


Unnamed: 0_level_0,geometry,index_right,h3,population
index_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
413,"POLYGON ((-92.24149 14.43132, -92.14149 14.431...",8080575.0,886d2a31b5fffff,36.0
414,"POLYGON ((-92.24149 14.53132, -92.14149 14.531...",8082000.0,886d2a04b1fffff,15.0
415,"POLYGON ((-92.24149 14.63132, -92.14149 14.631...",8077599.0,886d2aa9e9fffff,30.0
416,"POLYGON ((-92.24149 14.73132, -92.14149 14.731...",8081290.0,886d2a14e9fffff,169.0
417,"POLYGON ((-92.24149 14.83132, -92.14149 14.831...",8076539.0,886d2ab965fffff,370.0
...,...,...,...,...
1093481,"POLYGON ((74.85851 37.83132, 74.95851 37.83132...",27418355.0,88208846e5fffff,2.0
1093482,"POLYGON ((74.85851 37.93132, 74.95851 37.93132...",27418437.0,88208809d5fffff,1.0
1093483,"POLYGON ((74.85851 38.03132, 74.95851 38.03132...",27418442.0,8820880803fffff,2.0
1093484,"POLYGON ((74.85851 38.13132, 74.95851 38.13132...",27418458.0,8820880035fffff,84.0


In [50]:
# join pop_grid_summed and pop_grid_nodup
result = pd.merge(pop_grid_summed, pop_grid_nodup, on='index_id', how='left')

In [52]:
# remove index_right_x column from result
result = result.drop(columns=['index_right_x'])
result = result.drop(columns=['index_right_y'])
result = result.drop(columns=['population_y'])

In [57]:
grid_id_result = pd.merge(result, world_grid, on='geometry', how='left')

In [59]:
result

Unnamed: 0_level_0,population_x,geometry,h3
index_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
413,16101.0,"POLYGON ((-92.24149 14.43132, -92.14149 14.431...",886d2a31b5fffff
414,40366.0,"POLYGON ((-92.24149 14.53132, -92.14149 14.531...",886d2a04b1fffff
415,42143.0,"POLYGON ((-92.24149 14.63132, -92.14149 14.631...",886d2aa9e9fffff
416,26070.0,"POLYGON ((-92.24149 14.73132, -92.14149 14.731...",886d2a14e9fffff
417,53624.0,"POLYGON ((-92.24149 14.83132, -92.14149 14.831...",886d2ab965fffff
...,...,...,...
1093481,20.0,"POLYGON ((74.85851 37.83132, 74.95851 37.83132...",88208846e5fffff
1093482,3.0,"POLYGON ((74.85851 37.93132, 74.95851 37.93132...",88208809d5fffff
1093483,4.0,"POLYGON ((74.85851 38.03132, 74.95851 38.03132...",8820880803fffff
1093484,108.0,"POLYGON ((74.85851 38.13132, 74.95851 38.13132...",8820880035fffff


In [60]:
grid_id_result = grid_id_result.drop(columns=['h3'])
# change name of population_x to population
grid_id_result = grid_id_result.rename(columns={'population_x': 'population'})

In [61]:
grid_id_result

Unnamed: 0,population,geometry,grid-id
0,16101.0,"POLYGON ((-92.24149 14.43132, -92.14149 14.431...",gid-413
1,40366.0,"POLYGON ((-92.24149 14.53132, -92.14149 14.531...",gid-414
2,42143.0,"POLYGON ((-92.24149 14.63132, -92.14149 14.631...",gid-415
3,26070.0,"POLYGON ((-92.24149 14.73132, -92.14149 14.731...",gid-416
4,53624.0,"POLYGON ((-92.24149 14.83132, -92.14149 14.831...",gid-417
...,...,...,...
320405,20.0,"POLYGON ((74.85851 37.83132, 74.95851 37.83132...",gid-1093481
320406,3.0,"POLYGON ((74.85851 37.93132, 74.95851 37.93132...",gid-1093482
320407,4.0,"POLYGON ((74.85851 38.03132, 74.95851 38.03132...",gid-1093483
320408,108.0,"POLYGON ((74.85851 38.13132, 74.95851 38.13132...",gid-1093484


In [79]:
# print how many rows results has
print(len(grid_id_result))
print(len(result))
print(len(all_fewsnet_centroid))
print(len(all_fewsnet_grid))

320410
320410
152044
152044


In [70]:

all_fewsnet_grid = pd.merge(all_fewsnet_grid, world_grid, on='geometry', how='left')

In [74]:
# return all_fewsnet_grid geometry column where grid-id column matches gid-793484
print (all_fewsnet_grid.loc[all_fewsnet_grid['grid-id'] == 'gid-793484', 'geometry'])
print (grid_id_result.loc[grid_id_result['grid-id'] == 'gid-793484', 'geometry'])

152042    POLYGON ((29.05851 -8.66868, 29.15851 -8.66868...
Name: geometry, dtype: geometry
212834    POLYGON ((29.05851 -8.66868, 29.15851 -8.66868...
Name: geometry, dtype: geometry


In [75]:
all_fewsnet_grid


Unnamed: 0,ADMIN0-2009-10,ADMIN0-2011-07,ADMIN0-2012-04,ADMIN0-2013-04,ADMIN0-2014-10,ADMIN0-2021-10_left,ADMIN0-2021-10_left_1,ADMIN0-2021-10_right,ADMIN0-2021-10_right_1,ADMIN0_lef-2010-07,...,report_mon-2021-10_left,report_mon-2021-10_left_1,report_mon-2021-10_right,report_mon-2021-10_right_1,unit_name-2021-10_left,unit_name-2021-10_left_1,unit_name-2021-10_right,unit_name-2021-10_right_1,geometry,grid-id
0,,,,,,,,Uganda,,,...,,,10-2021,,,,"National Park and Reserves, Uganda",,"POLYGON ((30.15851 -0.16868, 30.25851 -0.16868...",gid-800763
1,,,,,,,,Uganda,,,...,,,10-2021,,,,"National Park and Reserves, Uganda",,"POLYGON ((31.85851 2.23132, 31.95851 2.23132, ...",gid-811905
2,,,,,,,,Uganda,,,...,,,10-2021,,,,"National Park and Reserves, Uganda",,"POLYGON ((31.95851 2.03132, 32.05851 2.03132, ...",gid-812557
3,,,,,,,,Uganda,,,...,,,10-2021,,,,"National Park and Reserves, Uganda",,"POLYGON ((33.85851 3.83132, 33.95851 3.83132, ...",gid-825001
4,,,,,,,,Uganda,,,...,,,10-2021,,,,"National Park and Reserves, Uganda",,"POLYGON ((31.75851 2.53132, 31.85851 2.53132, ...",gid-811254
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
152039,,,,,,,,,,,...,,,,,,,,,"POLYGON ((29.75851 -8.56868, 29.85851 -8.56868...",gid-798063
152040,,,,,,,,,,,...,,,,,,,,,"POLYGON ((30.55851 -8.36868, 30.65851 -8.36868...",gid-803297
152041,,,,,,,,,,,...,,,,,,,,,"POLYGON ((30.85851 -8.66868, 30.95851 -8.66868...",gid-805256
152042,,,,,,,,,,,...,,,,,,,,,"POLYGON ((29.05851 -8.66868, 29.15851 -8.66868...",gid-793484


In [78]:
# return all column names of all_fewsnet_grid
for x in all_fewsnet_grid.columns:
    print(x)

ADMIN0-2009-10
ADMIN0-2011-07
ADMIN0-2012-04
ADMIN0-2013-04
ADMIN0-2014-10
ADMIN0-2021-10_left
ADMIN0-2021-10_left_1
ADMIN0-2021-10_right
ADMIN0-2021-10_right_1
ADMIN0_lef-2010-07
ADMIN0_rig-2010-07
ADMIN1-2009-10
ADMIN1-2011-07
ADMIN1-2012-04
ADMIN1-2013-04
ADMIN1-2014-10
ADMIN1-2021-10_left
ADMIN1-2021-10_left_1
ADMIN1-2021-10_right
ADMIN1-2021-10_right_1
ADMIN1FR-2009-10
ADMIN1FR-2011-07
ADMIN1FR-2012-04
ADMIN1FR-2013-04
ADMIN1FR-2014-10
ADMIN1FR_l-2010-07
ADMIN1FR_r-2010-07
ADMIN1PT-2009-10
ADMIN1PT-2011-07
ADMIN1PT-2012-04
ADMIN1PT-2013-04
ADMIN1PT-2014-10
ADMIN1PT_l-2010-07
ADMIN1PT_r-2010-07
ADMIN1SP-2009-10
ADMIN1SP-2011-07
ADMIN1SP-2012-04
ADMIN1SP-2013-04
ADMIN1SP-2014-10
ADMIN1SP_l-2010-07
ADMIN1SP_r-2010-07
ADMIN1_lef-2010-07
ADMIN1_rig-2010-07
ADMIN2-2009-10
ADMIN2-2010-07
ADMIN2-2011-07
ADMIN2-2012-04
ADMIN2-2013-04
ADMIN2-2014-10
ADMIN2-2021-10_left
ADMIN2-2021-10_left_1
ADMIN2-2021-10_right
ADMIN2-2021-10_right_1
ADMIN2FR-2009-10
ADMIN2FR-2010-07
ADMIN2FR-2011-07
ADMIN2

In [80]:
grid_id_result

Unnamed: 0,population,geometry,grid-id
0,16101.0,"POLYGON ((-92.24149 14.43132, -92.14149 14.431...",gid-413
1,40366.0,"POLYGON ((-92.24149 14.53132, -92.14149 14.531...",gid-414
2,42143.0,"POLYGON ((-92.24149 14.63132, -92.14149 14.631...",gid-415
3,26070.0,"POLYGON ((-92.24149 14.73132, -92.14149 14.731...",gid-416
4,53624.0,"POLYGON ((-92.24149 14.83132, -92.14149 14.831...",gid-417
...,...,...,...
320405,20.0,"POLYGON ((74.85851 37.83132, 74.95851 37.83132...",gid-1093481
320406,3.0,"POLYGON ((74.85851 37.93132, 74.95851 37.93132...",gid-1093482
320407,4.0,"POLYGON ((74.85851 38.03132, 74.95851 38.03132...",gid-1093483
320408,108.0,"POLYGON ((74.85851 38.13132, 74.95851 38.13132...",gid-1093484


In [None]:
# join population column of grid_id_result to all_fewsnet_grid based on grid-id column
all_fewsnet_grid_pop = pd.merge(all_fewsnet_grid, grid_id_result, on='grid-id', how='left')
all_fewsnet_grid_pop

In [83]:
# remove geometry_y column
all_fewsnet_grid_pop = all_fewsnet_grid_pop.drop(columns=['geometry_y'])
# rename geometry_x to geometry
all_fewsnet_grid_pop = all_fewsnet_grid_pop.rename(columns={'geometry_x': 'geometry'})

all_fewsnet_grid_pop

Unnamed: 0,ADMIN0-2009-10,ADMIN0-2011-07,ADMIN0-2012-04,ADMIN0-2013-04,ADMIN0-2014-10,ADMIN0-2021-10_left,ADMIN0-2021-10_left_1,ADMIN0-2021-10_right,ADMIN0-2021-10_right_1,ADMIN0_lef-2010-07,...,report_mon-2021-10_left_1,report_mon-2021-10_right,report_mon-2021-10_right_1,unit_name-2021-10_left,unit_name-2021-10_left_1,unit_name-2021-10_right,unit_name-2021-10_right_1,geometry,grid-id,population
0,,,,,,,,Uganda,,,...,,10-2021,,,,"National Park and Reserves, Uganda",,"POLYGON ((30.15851 -0.16868, 30.25851 -0.16868...",gid-800763,23195.0
1,,,,,,,,Uganda,,,...,,10-2021,,,,"National Park and Reserves, Uganda",,"POLYGON ((31.85851 2.23132, 31.95851 2.23132, ...",gid-811905,57.0
2,,,,,,,,Uganda,,,...,,10-2021,,,,"National Park and Reserves, Uganda",,"POLYGON ((31.95851 2.03132, 32.05851 2.03132, ...",gid-812557,10.0
3,,,,,,,,Uganda,,,...,,10-2021,,,,"National Park and Reserves, Uganda",,"POLYGON ((33.85851 3.83132, 33.95851 3.83132, ...",gid-825001,13.0
4,,,,,,,,Uganda,,,...,,10-2021,,,,"National Park and Reserves, Uganda",,"POLYGON ((31.75851 2.53132, 31.85851 2.53132, ...",gid-811254,21579.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
152039,,,,,,,,,,,...,,,,,,,,"POLYGON ((29.75851 -8.56868, 29.85851 -8.56868...",gid-798063,202.0
152040,,,,,,,,,,,...,,,,,,,,"POLYGON ((30.55851 -8.36868, 30.65851 -8.36868...",gid-803297,1154.0
152041,,,,,,,,,,,...,,,,,,,,"POLYGON ((30.85851 -8.66868, 30.95851 -8.66868...",gid-805256,164.0
152042,,,,,,,,,,,...,,,,,,,,"POLYGON ((29.05851 -8.66868, 29.15851 -8.66868...",gid-793484,4900.0


In [85]:
# assign projection to all_fewsnet_grid_pop
all_fewsnet_grid_pop = all_fewsnet_grid_pop.to_crs("EPSG:4326")


In [86]:
# write all_fewsnet_grid_pop to geojson
all_fewsnet_grid_pop.to_file("data/fewsnet_grid/all_fewsnet_grid.geojson", driver="GeoJSON")

In [89]:
# read fewsnet-experiments/data/fewsnet_grid/all_fewsnet_template_centroid.geojson as geodataframe
world_centroids = gpd.read_file("./data/join_experiments/world_template_centroids.geojson")
world_centroids["grid-id"] = "gid-" + world_centroids.index.astype(str)
world_centroids

Unnamed: 0,geometry,grid-id
0,POINT (-92.19149 -26.81868),gid-0
1,POINT (-92.19149 -26.71868),gid-1
2,POINT (-92.19149 -26.61868),gid-2
3,POINT (-92.19149 -26.51868),gid-3
4,POINT (-92.19149 -26.41868),gid-4
...,...,...
1093483,POINT (74.90851 38.08132),gid-1093483
1093484,POINT (74.90851 38.18132),gid-1093484
1093485,POINT (74.90851 38.28132),gid-1093485
1093486,POINT (74.90851 38.38132),gid-1093486


In [90]:
all_fewsnet_centroid = pd.merge(all_fewsnet_centroid, world_centroids, on='geometry', how='left')
all_fewsnet_centroid

Unnamed: 0,ADMIN0-2009-10,ADMIN0-2011-07,ADMIN0-2012-04,ADMIN0-2013-04,ADMIN0-2014-10,ADMIN0-2021-10_left,ADMIN0-2021-10_left_1,ADMIN0-2021-10_right,ADMIN0-2021-10_right_1,ADMIN0_lef-2010-07,...,report_mon-2021-10_left,report_mon-2021-10_left_1,report_mon-2021-10_right,report_mon-2021-10_right_1,unit_name-2021-10_left,unit_name-2021-10_left_1,unit_name-2021-10_right,unit_name-2021-10_right_1,geometry,grid-id
0,,,,,,,,Uganda,,,...,,,10-2021,,,,"National Park and Reserves, Uganda",,POINT (30.20851 -0.11868),gid-800763
1,,,,,,,,Uganda,,,...,,,10-2021,,,,"National Park and Reserves, Uganda",,POINT (31.90851 2.28132),gid-811905
2,,,,,,,,Uganda,,,...,,,10-2021,,,,"National Park and Reserves, Uganda",,POINT (32.00851 2.08132),gid-812557
3,,,,,,,,Uganda,,,...,,,10-2021,,,,"National Park and Reserves, Uganda",,POINT (33.90851 3.88132),
4,,,,,,,,Uganda,,,...,,,10-2021,,,,"National Park and Reserves, Uganda",,POINT (31.80851 2.58132),
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
152039,,,,,,,,,,,...,,,,,,,,,POINT (29.80851 -8.51868),gid-798063
152040,,,,,,,,,,,...,,,,,,,,,POINT (30.60851 -8.31868),gid-803297
152041,,,,,,,,,,,...,,,,,,,,,POINT (30.90851 -8.61868),gid-805256
152042,,,,,,,,,,,...,,,,,,,,,POINT (29.10851 -8.61868),gid-793484


In [92]:
all_fewsnet_centroid_pop = pd.merge(all_fewsnet_centroid, grid_id_result, on='grid-id', how='left')
all_fewsnet_centroid_pop

Unnamed: 0,ADMIN0-2009-10,ADMIN0-2011-07,ADMIN0-2012-04,ADMIN0-2013-04,ADMIN0-2014-10,ADMIN0-2021-10_left,ADMIN0-2021-10_left_1,ADMIN0-2021-10_right,ADMIN0-2021-10_right_1,ADMIN0_lef-2010-07,...,report_mon-2021-10_right,report_mon-2021-10_right_1,unit_name-2021-10_left,unit_name-2021-10_left_1,unit_name-2021-10_right,unit_name-2021-10_right_1,geometry_x,grid-id,population,geometry_y
0,,,,,,,,Uganda,,,...,10-2021,,,,"National Park and Reserves, Uganda",,POINT (30.20851 -0.11868),gid-800763,23195.0,"POLYGON ((30.15851 -0.16868, 30.25851 -0.16868..."
1,,,,,,,,Uganda,,,...,10-2021,,,,"National Park and Reserves, Uganda",,POINT (31.90851 2.28132),gid-811905,57.0,"POLYGON ((31.85851 2.23132, 31.95851 2.23132, ..."
2,,,,,,,,Uganda,,,...,10-2021,,,,"National Park and Reserves, Uganda",,POINT (32.00851 2.08132),gid-812557,10.0,"POLYGON ((31.95851 2.03132, 32.05851 2.03132, ..."
3,,,,,,,,Uganda,,,...,10-2021,,,,"National Park and Reserves, Uganda",,POINT (33.90851 3.88132),,,
4,,,,,,,,Uganda,,,...,10-2021,,,,"National Park and Reserves, Uganda",,POINT (31.80851 2.58132),,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
152039,,,,,,,,,,,...,,,,,,,POINT (29.80851 -8.51868),gid-798063,202.0,"POLYGON ((29.75851 -8.56868, 29.85851 -8.56868..."
152040,,,,,,,,,,,...,,,,,,,POINT (30.60851 -8.31868),gid-803297,1154.0,"POLYGON ((30.55851 -8.36868, 30.65851 -8.36868..."
152041,,,,,,,,,,,...,,,,,,,POINT (30.90851 -8.61868),gid-805256,164.0,"POLYGON ((30.85851 -8.66868, 30.95851 -8.66868..."
152042,,,,,,,,,,,...,,,,,,,POINT (29.10851 -8.61868),gid-793484,4900.0,"POLYGON ((29.05851 -8.66868, 29.15851 -8.66868..."


In [95]:
all_fewsnet_centroid_pop = all_fewsnet_centroid_pop.drop(columns=['geometry_y'])
# rename geometry_x to geometry
all_fewsnet_centroid_pop = all_fewsnet_centroid_pop.rename(columns={'geometry_x': 'geometry'})
all_fewsnet_centroid_pop

Unnamed: 0,ADMIN0-2009-10,ADMIN0-2011-07,ADMIN0-2012-04,ADMIN0-2013-04,ADMIN0-2014-10,ADMIN0-2021-10_left,ADMIN0-2021-10_left_1,ADMIN0-2021-10_right,ADMIN0-2021-10_right_1,ADMIN0_lef-2010-07,...,report_mon-2021-10_left_1,report_mon-2021-10_right,report_mon-2021-10_right_1,unit_name-2021-10_left,unit_name-2021-10_left_1,unit_name-2021-10_right,unit_name-2021-10_right_1,geometry,grid-id,population
0,,,,,,,,Uganda,,,...,,10-2021,,,,"National Park and Reserves, Uganda",,POINT (30.20851 -0.11868),gid-800763,23195.0
1,,,,,,,,Uganda,,,...,,10-2021,,,,"National Park and Reserves, Uganda",,POINT (31.90851 2.28132),gid-811905,57.0
2,,,,,,,,Uganda,,,...,,10-2021,,,,"National Park and Reserves, Uganda",,POINT (32.00851 2.08132),gid-812557,10.0
3,,,,,,,,Uganda,,,...,,10-2021,,,,"National Park and Reserves, Uganda",,POINT (33.90851 3.88132),,
4,,,,,,,,Uganda,,,...,,10-2021,,,,"National Park and Reserves, Uganda",,POINT (31.80851 2.58132),,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
152039,,,,,,,,,,,...,,,,,,,,POINT (29.80851 -8.51868),gid-798063,202.0
152040,,,,,,,,,,,...,,,,,,,,POINT (30.60851 -8.31868),gid-803297,1154.0
152041,,,,,,,,,,,...,,,,,,,,POINT (30.90851 -8.61868),gid-805256,164.0
152042,,,,,,,,,,,...,,,,,,,,POINT (29.10851 -8.61868),gid-793484,4900.0


In [96]:
# write all_fewsnet_centroid_pop to GeoJSON
all_fewsnet_centroid_pop.to_file("data/fewsnet_grid/all_fewsnet_centroid.geojson", driver="GeoJSON")


# Exploring missing data in the pop centroids set

In [2]:
pop_centroids = gpd.read_file("./data/fewsnet_grid/all_fewsnet_centroid_pop_ffill.geojson")
pop_centroids

Unnamed: 0,ADMIN0-2009-10,ADMIN0-2011-07,ADMIN0-2012-04,ADMIN0-2013-04,ADMIN0-2014-10,ADMIN0-2021-10_left,ADMIN0-2021-10_left_1,ADMIN0-2021-10_right,ADMIN0-2021-10_right_1,ADMIN0_lef-2010-07,...,report_mon-2021-10_left_1,report_mon-2021-10_right,report_mon-2021-10_right_1,unit_name-2021-10_left,unit_name-2021-10_left_1,unit_name-2021-10_right,unit_name-2021-10_right_1,grid-id,population,geometry
0,,,,,,,,Uganda,,,...,,10-2021,,,,"National Park and Reserves, Uganda",,gid-800763,23195.0,POINT (30.20851 -0.11868)
1,,,,,,,,Uganda,,,...,,10-2021,,,,"National Park and Reserves, Uganda",,gid-811905,57.0,POINT (31.90851 2.28132)
2,,,,,,,,Uganda,,,...,,10-2021,,,,"National Park and Reserves, Uganda",,gid-812557,10.0,POINT (32.00851 2.08132)
3,,,,,,,,Uganda,,,...,,10-2021,,,,"National Park and Reserves, Uganda",,,,POINT (33.90851 3.88132)
4,,,,,,,,Uganda,,,...,,10-2021,,,,"National Park and Reserves, Uganda",,,,POINT (31.80851 2.58132)
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
152039,,,,,,,,,,,...,,,,,,,,gid-798063,202.0,POINT (29.80851 -8.51868)
152040,,,,,,,,,,,...,,,,,,,,gid-803297,1154.0,POINT (30.60851 -8.31868)
152041,,,,,,,,,,,...,,,,,,,,gid-805256,164.0,POINT (30.90851 -8.61868)
152042,,,,,,,,,,,...,,,,,,,,gid-793484,4900.0,POINT (29.10851 -8.61868)


In [3]:
centroids = gpd.read_file("./data/fewsnet_grid/all_fewsnet_centroid_ffill.geojson")
centroids

Unnamed: 0,ADMIN0-2009-10,ADMIN0-2011-07,ADMIN0-2012-04,ADMIN0-2013-04,ADMIN0-2014-10,ADMIN0-2021-10_left,ADMIN0-2021-10_left_1,ADMIN0-2021-10_right,ADMIN0-2021-10_right_1,ADMIN0_lef-2010-07,...,index_righ-2014-10,report_mon-2021-10_left,report_mon-2021-10_left_1,report_mon-2021-10_right,report_mon-2021-10_right_1,unit_name-2021-10_left,unit_name-2021-10_left_1,unit_name-2021-10_right,unit_name-2021-10_right_1,geometry
0,,,,,,,,Uganda,,,...,,,,10-2021,,,,"National Park and Reserves, Uganda",,POINT (30.20851 -0.11868)
1,,,,,,,,Uganda,,,...,,,,10-2021,,,,"National Park and Reserves, Uganda",,POINT (31.90851 2.28132)
2,,,,,,,,Uganda,,,...,,,,10-2021,,,,"National Park and Reserves, Uganda",,POINT (32.00851 2.08132)
3,,,,,,,,Uganda,,,...,,,,10-2021,,,,"National Park and Reserves, Uganda",,POINT (33.90851 3.88132)
4,,,,,,,,Uganda,,,...,,,,10-2021,,,,"National Park and Reserves, Uganda",,POINT (31.80851 2.58132)
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
152039,,,,,,,,,,,...,,,,,,,,,,POINT (29.80851 -8.51868)
152040,,,,,,,,,,,...,,,,,,,,,,POINT (30.60851 -8.31868)
152041,,,,,,,,,,,...,,,,,,,,,,POINT (30.90851 -8.61868)
152042,,,,,,,,,,,...,,,,,,,,,,POINT (29.10851 -8.61868)


In [8]:
# If this is 0 there are no differences. This would imply that something went wrong in the geojson-mbtiles process as the geojson didn't have any differences between the original file and this one.
differences = centroids["CS-2018-10"] - pop_centroids["CS-2018-10"]
print(differences.max())


0.0


^Because that value is 0, it shows that data was dropped in mbtiling process.

## Single date data
Creating a new dataframe with only one date's worth of data to experiment on map styling and binning of the population data

In [11]:
# create a dataframe with the "CS-2018-10" and "population" column from pop_centroids
pop_centroids_pop = pop_centroids[["CS-2018-10", "population", "geometry"]]
pop_centroids_pop


Unnamed: 0,CS-2018-10,population,geometry
0,99.0,23195.0,POINT (30.20851 -0.11868)
1,99.0,57.0,POINT (31.90851 2.28132)
2,99.0,10.0,POINT (32.00851 2.08132)
3,99.0,,POINT (33.90851 3.88132)
4,99.0,,POINT (31.80851 2.58132)
...,...,...,...
152039,1.0,202.0,POINT (29.80851 -8.51868)
152040,1.0,1154.0,POINT (30.60851 -8.31868)
152041,1.0,164.0,POINT (30.90851 -8.61868)
152042,1.0,4900.0,POINT (29.10851 -8.61868)


In [12]:
# write pop_centroids_pop to GeoJSON
pop_centroids_pop.to_file("data/10-2018.geojson", driver="GeoJSON")

In [14]:
# get rid of nan columns in pop_centroids_pop
pop_centroids_pop = pop_centroids_pop.dropna()


In [20]:
# create histogram of population column using plotly
fig = px.histogram(pop_centroids_pop, x="bins")
fig.show()

In [19]:
pop_centroids_pop["bins"] = pd.cut(pop_centroids_pop["population"], bins=10, labels=False)



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [21]:
# how many of each value are there in the bins column
pop_centroids_pop["bins"].value_counts()

0    89859
1      131
2       36
3       19
5        5
6        4
4        2
7        2
9        2
8        1
Name: bins, dtype: int64

In [31]:
# how many rows are value 1 in pop_centroids_pop['population']
# for values 1-10
valueslist = []
for i in range(1000, 2000):
    print(len(pop_centroids_pop.loc[pop_centroids_pop['population'] == i]))
    valueslist.append(i)
# sum of values list
print(sum(valueslist))


14
12
6
14
13
18
12
13
16
12
11
15
13
9
10
11
16
15
10
17
14
12
13
16
13
13
22
12
17
8
6
9
9
13
11
9
11
12
10
13
14
9
15
5
21
14
10
15
11
6
9
16
8
10
9
15
12
8
12
11
13
17
7
16
14
7
7
11
10
12
5
12
8
6
15
11
12
11
12
14
7
12
9
14
9
9
8
9
11
13
15
6
11
12
15
13
7
11
13
17
13
15
13
6
9
18
12
12
14
12
13
11
16
12
18
7
18
9
15
10
7
10
12
15
9
11
10
10
13
11
12
13
11
4
12
16
13
8
13
11
14
17
7
12
15
10
21
6
15
5
15
8
11
9
8
9
17
11
14
16
12
7
12
5
9
12
7
12
12
11
10
10
6
7
16
15
4
10
14
9
7
10
13
12
8
16
9
7
16
11
11
9
10
11
8
10
9
8
11
11
10
16
9
11
12
6
12
15
11
11
9
9
14
13
10
10
5
11
15
9
13
13
14
9
10
10
13
12
16
12
14
8
10
7
15
13
7
10
7
15
16
12
12
13
18
8
13
10
10
6
6
9
12
11
12
8
12
8
14
13
10
8
8
12
7
13
14
12
8
8
12
6
7
11
12
12
7
7
10
7
9
13
6
11
14
12
14
14
16
10
14
8
10
15
10
12
9
6
4
9
10
5
15
6
10
7
9
10
7
9
10
10
12
11
13
14
9
14
8
11
11
8
6
12
9
14
14
13
10
9
4
17
10
12
11
9
11
13
9
9
9
9
9
10
6
11
13
16
4
6
6
9
8
15
8
6
6
10
11
6
13
10
8
2
14
12
11
5
10
14
9
8
9
10
14
12


In [24]:
# how many rows are there in pop_centroids_pop['population']
print(len(pop_centroids_pop))

90061


In [33]:
np.set_printoptions(suppress=True)

# Compute the bin edges
bin_edges = np.histogram_bin_edges(pop_centroids_pop['population'], bins=10)

# Divide the data into bins using the computed bin edges
binsnp = np.digitize(pop_centroids_pop['population'], bin_edges )

print(bin_edges)

[      1.   316952.9  633904.8  950856.7 1267808.6 1584760.5 1901712.4
 2218664.3 2535616.2 2852568.1 3169520. ]


In [36]:
# how many values are in each bin
print(np.bincount(binsnp))

[    0 89859   131    36    19     2     5     4     2     1     1     1]
