In [33]:
import geopandas as gpd
import pandas as pd
import numpy as np
import os
pd.set_option('display.max_columns', None)
from shapely.geometry import LineString, Point, Polygon
import h3

input_folder_path = "/Volumes/samsung-4tb/b2p/impact-model/no_order_1_less_than_500m_with_top_sites/model_outputs/"
output_folder_path ="/Volumes/samsung-4tb/b2p/impact-model/cleaned_data/destination_data/"


### education destinations

In [28]:
drop_columns = [
    "connectivity_status",
    "coverage_status",
    "is_verified",
    "coordinates",
    "og_name",
    "coords",
    "province",
    "district",
    "sector",
    "cell",
    "village",
    "is_preschool",
    "is_primary",
    "is_o_level",
    "is_a_level",
    "ownership",
    "type",
    "orig_name",
    "source",
    "prov_code",
    "distr_code",
    "lat",
    "long",
    "data_year",
]

In [29]:
eth_all_edu = gpd.read_parquet(os.path.join(input_folder_path, "ethiopia/destinations_with_index/all_education_facilities.parquet"))
civ_all_edu = gpd.read_parquet(os.path.join(input_folder_path, "ivory_coast/destinations_with_index/all_education_facilities.parquet"))
ken_all_edu = gpd.read_parquet(os.path.join(input_folder_path, "kenya/destinations_with_index/all_education_facilities.parquet"))
rwa_all_edu = gpd.read_parquet(os.path.join(input_folder_path, "rwanda/destinations_with_index/all_education_facilities.parquet"))
tza_all_edu = gpd.read_parquet(os.path.join(input_folder_path, "tanzania/destinations_with_index/all_education_facilities.parquet"))
uga_all_edu = gpd.read_parquet(os.path.join(input_folder_path, "uganda/destinations_with_index/all_education_facilities.parquet"))
zmb_all_edu = gpd.read_parquet(os.path.join(input_folder_path, "zambia/destinations_with_index/all_education_facilities.parquet"))

# create list of all education facilities paths
all_edu_dfs = [ eth_all_edu, civ_all_edu, ken_all_edu, rwa_all_edu, tza_all_edu, uga_all_edu, zmb_all_edu ]

all_edu_dfs = [df.drop(columns=drop_columns, errors='ignore') for df in all_edu_dfs]

# join all dataframes into one
all_edu_df = pd.concat(all_edu_dfs, ignore_index=True)
# reset index
all_edu_df.reset_index(drop=True, inplace=True)



In [30]:
all_edu_df

Unnamed: 0,id,name,geometry,category,all_education_facilities_index,subregion_index,fid,globalid
0,4527811.0,unity university,POINT (38.80434 9.00017),other,1,412081,,
1,4527812.0,cpu college,POINT (38.80249 9.00068),college,2,412081,,
2,4527813.0,ትምህርት ቤት,POINT (38.52361 9.77877),,3,378792,,
3,4527814.0,walya primary,POINT (37.44922 12.59572),primary,4,269325,,
4,4527815.0,further training institute university adama,POINT (39.28836 8.56323),other,5,466166,,
...,...,...,...,...,...,...,...,...
104781,,Chikuse Primary School,POINT (28.67089 -14.50118),Primary School,7128,123849,8489.0,35LPD800961_002
104782,,Chimpempe Secondary School,POINT (29.43418 -9.54560),Secondary School,7129,144698,8490.0,35LQK672438_001
104783,,Kafubu Basic School,POINT (28.58740 -13.02607),Basic School,7130,120543,8502.0,35LPF721594_001
104784,,Namiyanga School,POINT (26.50011 -17.07299),School,7131,68657,8506.0,35KMB468123_001


note, i'm not sure how to handle the destination ID's exactly... ideally there would be one that matches up with the paths down the road, but not really sure how that will work as there are many nan's in the id column and a few other id columns

In [34]:
# write to all_edu_allcountries.geojson
all_edu_df.to_file(os.path.join(output_folder_path, "all_education_facilities.geojson"), driver='GeoJSON')

### health destinations

In [35]:
eth_all_health = gpd.read_parquet(os.path.join(input_folder_path, "ethiopia/destinations_with_index/all_health_facilities.parquet"))
civ_all_health = gpd.read_parquet(os.path.join(input_folder_path, "ivory_coast/destinations_with_index/all_health_facilities.parquet"))
ken_all_health = gpd.read_parquet(os.path.join(input_folder_path, "kenya/destinations_with_index/all_health_facilities.parquet"))
rwa_all_health = gpd.read_parquet(os.path.join(input_folder_path, "rwanda/destinations_with_index/all_health_facilities.parquet"))
tza_all_health = gpd.read_parquet(os.path.join(input_folder_path, "tanzania/destinations_with_index/all_health_facilities.parquet"))
uga_all_health = gpd.read_parquet(os.path.join(input_folder_path, "uganda/destinations_with_index/all_health_facilities.parquet"))
zmb_all_health = gpd.read_parquet(os.path.join(input_folder_path, "zambia/destinations_with_index/all_health_facilities.parquet"))

# create list of all health facilities paths
all_health_dfs = [eth_all_health, civ_all_health, ken_all_health, rwa_all_health, tza_all_health, uga_all_health, zmb_all_health]
# join all dataframes into one
all_health_df = pd.concat(all_health_dfs, ignore_index=True)
# reset index
all_health_df.reset_index(drop=True, inplace=True)

In [36]:
all_health_df

Unnamed: 0,country,admin1,facility_name,facility_type,ownership,y,x,ll_source,geometry,coords,all_health_facilities_index,subregion_index,district,name,sector,cell,village,fid,type,source,globalid,label,province,prov_code,distr_code,lat,long,data_year
0,ethiopia,Addis Ababa,Addis Ketema Clinic 1,clinic,MoH,9.02970,38.7186,Digitized from online map,POINT (38.71860 9.02970),"[38.7186, 9.0297]",1,402987,,,,,,,,,,,,,,,,
1,ethiopia,Addis Ababa,Addis Ketema Clinic 10,clinic,MoH,9.02922,38.7386,Digitized from online map,POINT (38.73860 9.02922),"[38.7386, 9.02922]",2,404694,,,,,,,,,,,,,,,,
2,ethiopia,Addis Ababa,Addis Ketema Clinic 11,clinic,MoH,9.02868,38.7362,Digitized from online map,POINT (38.73620 9.02868),"[38.7362, 9.02868]",3,403784,,,,,,,,,,,,,,,,
3,ethiopia,Addis Ababa,Addis Ketema Clinic 12,clinic,MoH,9.02816,38.7347,Digitized from online map,POINT (38.73470 9.02816),"[38.7347, 9.02816]",4,403784,,,,,,,,,,,,,,,,
4,ethiopia,Addis Ababa,Addis Ketema Clinic 13,clinic,MoH,9.02942,38.7340,Digitized from online map,POINT (38.73400 9.02942),"[38.734, 9.02942]",5,403784,,,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
26642,,,,rural health post,,,,,POINT (23.58512 -16.68015),"[23.58512, -16.68015]",2601,27133,Sioma,Sankandi RHP,,,,2604.0,Health Facility,MOH,{F2987FF1-FEB8-437F-BE54-386D551B385C},Sankandi RHP,Western,110.0,110016.0,-16.680146,23.585124,2017-2018
26643,,,,rural health center,,,,,POINT (23.15679 -16.19463),"[23.15679, -16.19463]",2602,21382,Sioma,Silowana RHC,,,,2605.0,Health Facility,MOH,{B2A502DC-2FD2-4BB5-9564-489FC20BFA8D},Silowana RHC,Western,110.0,110016.0,-16.194630,23.156791,2017-2018
26644,,,,rural health center,,,,,POINT (23.02583 -17.31588),"[23.02583, -17.31588]",2603,19064,Sioma,Sinjembela RHC,,,,2606.0,Health Facility,MOH,{C80AE0FC-E7A1-4E5A-8AF2-BD191C211B4B},Sinjembela RHC,Western,110.0,110016.0,-17.315875,23.025835,2017-2018
26645,,,,rural health center,,,,,POINT (23.50325 -16.60110),"[23.50325, -16.6011]",2604,25914,Sioma,Sioma RHC,,,,2607.0,Health Facility,MOH,{B61042BB-4AC6-4D0E-AC37-C75DBC8CFD92},Sioma RHC,Western,110.0,110016.0,-16.601098,23.503246,2017-2018


In [38]:
keep_columns = [ 'facility_name', 'facility_type', 'geometry', 'all_health_facilities_index', 'subregion_index', 'name', 'fid', 'globalid' ]
# drop everything not in keep_columns
all_health_df = all_health_df[keep_columns]
# join name and facility_name into one column called name
all_health_df['name'] = all_health_df['facility_name'].fillna('') + ' ' + all_health_df['name'].fillna('')
# drop facility_name
all_health_df.drop(columns=['facility_name'], inplace=True)
# write to all_health_allcountries.geojson
all_health_df.to_file(os.path.join(output_folder_path, "all_health_facilities.geojson"), driver='GeoJSON')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  all_health_df.drop(columns=['facility_name'], inplace=True)


In [4]:
# read ethiopia/destinations_with_index/all_education_facilities.parquet
et_all_edu = gpd.read_parquet(input_folder_path + "ethiopia/destinations_with_index/all_education_facilities.parquet")
et_all_edu

Unnamed: 0,id,name,connectivity_status,coverage_status,is_verified,coordinates,geometry,og_name,category,coords,all_education_facilities_index,subregion_index
0,4527811,unity university,,,False,"[38.804337, 9.0001673]",POINT (38.80434 9.00017),unity university,other,"[38.80434, 9.00017]",1,412081
1,4527812,cpu college,,,False,"[38.8024916, 9.0006759]",POINT (38.80249 9.00068),cpu college,college,"[38.80249, 9.00068]",2,412081
2,4527813,ትምህርት ቤት,,,False,"[38.523605, 9.778773]",POINT (38.52361 9.77877),ትምህርት ቤት,,"[38.52361, 9.77877]",3,378792
3,4527814,walya primary,,,False,"[37.4492157, 12.5957207]",POINT (37.44922 12.59572),walya primery,primary,"[37.44922, 12.59572]",4,269325
4,4527815,further training institute university adama,,,False,"[39.2883622, 8.5632251]",POINT (39.28836 8.56323),further training institute university adama,other,"[39.28836, 8.56323]",5,466166
...,...,...,...,...,...,...,...,...,...,...,...,...
2949,4530760,name unknown,,,False,"[36.6324748, 10.8841774]",POINT (36.63247 10.88418),name unknown,unknown,"[36.63247, 10.88418]",2950,178379
2950,4530761,name unknown,,,False,"[36.4982114, 10.9679689]",POINT (36.49821 10.96797),name unknown,unknown,"[36.49821, 10.96797]",2951,162663
2951,4530762,name unknown,,,False,"[36.801116, 10.9002112]",POINT (36.80112 10.90021),name unknown,unknown,"[36.80112, 10.90021]",2952,198755
2952,4530763,name unknown,,,False,"[36.7206055, 10.892006]",POINT (36.72061 10.89201),name unknown,unknown,"[36.72061, 10.89201]",2953,189674


### Major Roads

In [39]:
eth_all_roads = gpd.read_parquet(os.path.join(input_folder_path, "ethiopia/destinations_with_index/major_roads.parquet"))
civ_all_roads = gpd.read_parquet(os.path.join(input_folder_path, "ivory_coast/destinations_with_index/major_roads.parquet"))
ken_all_roads = gpd.read_parquet(os.path.join(input_folder_path, "kenya/destinations_with_index/major_roads.parquet"))
rwa_all_roads = gpd.read_parquet(os.path.join(input_folder_path, "rwanda/destinations_with_index/major_roads.parquet"))
tza_all_roads = gpd.read_parquet(os.path.join(input_folder_path, "tanzania/destinations_with_index/major_roads.parquet"))
uga_all_roads = gpd.read_parquet(os.path.join(input_folder_path, "uganda/destinations_with_index/major_roads.parquet"))
zmb_all_roads = gpd.read_parquet(os.path.join(input_folder_path, "zambia/destinations_with_index/major_roads.parquet"))

# create list of all major roads paths
all_roads_dfs = [eth_all_roads, civ_all_roads, ken_all_roads, rwa_all_roads, tza_all_roads, uga_all_roads, zmb_all_roads]
# join all dataframes into one
all_roads_df = pd.concat(all_roads_dfs, ignore_index=True)
# reset index
all_roads_df.reset_index(drop=True, inplace=True)
all_roads_df

Unnamed: 0,osm_type,name,type,ascent,descent,incline,distance,route,network,highway,surface,smoothness,timestamp,ele,geometry,coords,major_roads_index,subregion_index
0,way,Equatorial Guinea Street,,,,,,,,primary,,,2019-12-23 18:55:23+00:00,,POINT (38.79534 9.01767),"[38.79534, 9.01767]",1,411149
1,way,Equatorial Guinea Street,,,,,,,,primary,,,2019-12-23 18:55:23+00:00,,POINT (38.79630 9.01795),"[38.7963, 9.01795]",2,411149
2,way,Equatorial Guinea Street,,,,,,,,primary,,,2019-12-23 18:55:23+00:00,,POINT (38.79726 9.01825),"[38.79726, 9.01825]",3,411594
3,way,Equatorial Guinea Street,,,,,,,,primary,,,2019-12-23 18:55:23+00:00,,POINT (38.79820 9.01858),"[38.7982, 9.01858]",4,411594
4,way,Ras Abebe Aregay Street,,,,,,,,primary,,,2019-12-23 20:27:49+00:00,,POINT (38.75185 9.01666),"[38.75185, 9.01666]",5,404906
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1206531,way,Great East Road,,,,,,,,trunk,asphalt,,2023-05-13 04:56:41+00:00,,POINT (30.54977 -14.85298),"[30.54977, -14.85298]",114487,188821
1206532,way,Great East Road,,,,,,,,trunk,asphalt,,2023-05-13 04:56:41+00:00,,POINT (30.54890 -14.85348),"[30.5489, -14.85348]",114488,188821
1206533,way,Great East Road,,,,,,,,trunk,asphalt,,2023-05-13 04:56:41+00:00,,POINT (30.54804 -14.85399),"[30.54804, -14.85399]",114489,188821
1206534,way,Great East Road,,,,,,,,trunk,asphalt,,2023-05-13 04:56:41+00:00,,POINT (30.54765 -14.85421),"[30.54765, -14.85421]",114490,188821


In [43]:
keep_columns = ['name', 'highway', 'surface', 'geometry', 'major_roads_index', 'subregion_index']
# keep only the relevant columns
all_roads_df = all_roads_df[keep_columns]
# write to all_roads_allcountries.geojson
all_roads_df.to_file(os.path.join(output_folder_path, "all_major_roads.geojson"), driver='GeoJSON')

### Urban Areas

suburban files include other categories (urban centers etc.)

In [48]:
eth_all_urban = gpd.read_parquet(os.path.join(input_folder_path, "ethiopia/destinations_with_index/urban_center.parquet"))
civ_all_urban = gpd.read_parquet(os.path.join(input_folder_path, "ivory_coast/destinations_with_index/urban_center.parquet"))
ken_all_urban = gpd.read_parquet(os.path.join(input_folder_path, "kenya/destinations_with_index/urban_center.parquet"))
rwa_all_urban = gpd.read_parquet(os.path.join(input_folder_path, "rwanda/destinations_with_index/urban_center.parquet"))
tza_all_urban = gpd.read_parquet(os.path.join(input_folder_path, "tanzania/destinations_with_index/urban_center.parquet"))
uga_all_urban = gpd.read_parquet(os.path.join(input_folder_path, "uganda/destinations_with_index/urban_center.parquet"))
zmb_all_urban = gpd.read_parquet(os.path.join(input_folder_path, "zambia/destinations_with_index/urban_center.parquet"))

# create list of all urban center paths
all_urban_dfs = [eth_all_urban, civ_all_urban, ken_all_urban, rwa_all_urban, tza_all_urban, uga_all_urban, zmb_all_urban]
# join all dataframes into one
all_urban_df = pd.concat(all_urban_dfs, ignore_index=True)
# reset index
all_urban_df.reset_index(drop=True, inplace=True)

In [49]:
all_urban_df

Unnamed: 0,row,col,category_value,geometry,category,coords,urban_center_index,subregion_index
0,643,543,30,POINT (42.05792 4.17458),urban_center,"[42.05792, 4.17458]",1,719554
1,580,550,30,POINT (42.17458 5.22458),urban_center,"[42.17458, 5.22458]",2,726112
2,573,533,30,POINT (41.89125 5.34125),urban_center,"[41.89125, 5.34125]",3,710027
3,537,633,30,POINT (43.55792 5.94125),urban_center,"[43.55792, 5.94125]",4,774776
4,537,632,30,POINT (43.54125 5.94125),urban_center,"[43.54125, 5.94125]",5,774776
...,...,...,...,...,...,...,...,...
3498,33,546,30,POINT (31.10792 -8.78292),urban_center,"[31.10792, -8.78292]",409,12829
3499,33,547,30,POINT (31.12458 -8.78292),urban_center,"[31.12458, -8.78292]",410,204118
3500,32,546,30,POINT (31.10792 -8.76625),urban_center,"[31.10792, -8.76625]",411,12847
3501,32,547,30,POINT (31.12458 -8.76625),urban_center,"[31.12458, -8.76625]",412,203857


In [51]:
# drop row, col,
all_urban_df = all_urban_df.drop(columns=['row', 'col', 'coords'], errors='ignore')
# write to all_urban_allcountries.geojson
all_urban_df.to_file(os.path.join(output_folder_path, "all_urban_centers.geojson"), driver='GeoJSON')

### Experimenting

In [None]:
drop_columns = ['connectivity_status', 'coverage_status', 'is_verified', 'coordinates', 'og_name', 'coords', 'all_education_facilities_index']
et_all_edu = et_all_edu.drop(columns=drop_columns)

In [9]:
et_all_edu['category'].unique()

array(['other', 'college', '', 'primary', 'secondary',
       'college,secondary', 'primary,secondary', 'vocational,college',
       'unknown', 'preschool', 'preschool,primary'], dtype=object)

In [13]:
civ_paths_edu = pd.read_parquet(input_folder_path + "ivory_coast/joined_data/travel_paths_to_all_education_facilities_fixed.parquet")

In [14]:
civ_paths_edu

Unnamed: 0,row,col,subregion,coords,exit_point_used,exit_point_index,destination_index,destination_coords,exit_point_index_path,h3_index,exit_point_path,path_to_exit_point,exit_point_used_no_sites,exit_point_path_no_sites,path_to_exit_point_no_sites
0,3653,1262,21093,"[-7.5474, 7.69195]","[-7.5474, 7.69195]",230524,2095,"[-7.47986, 7.6695]","[230524, 83597, 83598, 83599, 83600, 83601, 83...",8875a19995fffff,"[[-7.5474, 7.69195], [-7.5409, 7.69699], [-7.5...","[[-7.5474, 7.69195]]",,,
1,4463,1478,27651,"[-7.3675, 7.016667]","[-7.3744, 7.01404]",83875,1485,"[-7.39222, 7.0519]","[83875, 82970, 82971, 82972, 82973]",8875a12459fffff,"[[-7.3744, 7.01404], [-7.40107, 7.04269], [-7....","[[-7.3675, 7.016667], [-7.3683333, 7.0158334],...",,,
2,4464,1477,27651,"[-7.3683333, 7.0158334]","[-7.3744, 7.01404]",83875,1485,"[-7.39222, 7.0519]","[83875, 82970, 82971, 82972, 82973]",8875a12459fffff,"[[-7.3744, 7.01404], [-7.40107, 7.04269], [-7....","[[-7.3683333, 7.0158334], [-7.3683333, 7.01500...",,,
3,4465,1477,27651,"[-7.3683333, 7.0150003]","[-7.3744, 7.01404]",83875,1485,"[-7.39222, 7.0519]","[83875, 82970, 82971, 82972, 82973]",8875a12459fffff,"[[-7.3744, 7.01404], [-7.40107, 7.04269], [-7....","[[-7.3683333, 7.0150003], [-7.3683333, 7.01416...",,,
4,4466,1471,27651,"[-7.373462, 7.0139494]","[-7.3744, 7.01404]",83875,1485,"[-7.39222, 7.0519]","[83875, 82970, 82971, 82972, 82973]",8875a12451fffff,"[[-7.3744, 7.01404], [-7.40107, 7.04269], [-7....","[[-7.373462, 7.0139494], [-7.374401, 7.0140433...",,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4964078,6807,2094,45547,"[-6.8541665, 5.0633335]","[-6.85284, 5.06473]",104919,170,"[-6.61776, 5.16575]","[104919, 261139, 104917, 104918, 208015, 10492...",8875a96f0dfffff,"[[-6.85284, 5.06473], [-6.84043, 5.06062], [-6...","[[-6.8541665, 5.0633335], [-6.8533335, 5.06416...",,"[[-6.86037, 5.05999], [-6.86074, 5.05988], [-6...","[[-6.8541665, 5.0633335], [-6.855, 5.0633335],..."
4964079,6808,2090,45547,"[-6.8575, 5.0625]","[-6.85284, 5.06473]",104919,170,"[-6.61776, 5.16575]","[104919, 261139, 104917, 104918, 208015, 10492...",8875a96f0dfffff,"[[-6.85284, 5.06473], [-6.84043, 5.06062], [-6...","[[-6.8575, 5.0625], [-6.8575, 5.0633335], [-6....",,"[[-6.86037, 5.05999], [-6.86074, 5.05988], [-6...","[[-6.8575, 5.0625], [-6.858333, 5.061667], [-6..."
4964080,6809,2089,45547,"[-6.858333, 5.061667]","[-6.85284, 5.06473]",104919,170,"[-6.61776, 5.16575]","[104919, 261139, 104917, 104918, 208015, 10492...",8875a96f0dfffff,"[[-6.85284, 5.06473], [-6.84043, 5.06062], [-6...","[[-6.858333, 5.061667], [-6.8575, 5.0625], [-6...",,"[[-6.86037, 5.05999], [-6.86074, 5.05988], [-6...","[[-6.858333, 5.061667], [-6.8591666, 5.0608335]]"
4964081,6810,2088,45547,"[-6.8591666, 5.0608335]","[-6.85284, 5.06473]",104919,170,"[-6.61776, 5.16575]","[104919, 261139, 104917, 104918, 208015, 10492...",8875a96f0dfffff,"[[-6.85284, 5.06473], [-6.84043, 5.06062], [-6...","[[-6.8591666, 5.0608335], [-6.858333, 5.061667...",,"[[-6.86037, 5.05999], [-6.86074, 5.05988], [-6...","[[-6.8591666, 5.0608335]]"
