In [101]:
import pandas as pd
import geopandas as gpd
import folium
import os
from shapely.geometry import Point
import webbrowser
import numpy as np
from flask import Flask
import requests

In [102]:
people_groups = pd.read_excel('People_Groups.xlsx')

***

# Indonesia

In [103]:
f = "./ID_pop_estimates/Indonesia_5yr_age_sex_2000-2020_508_uscb_mar2019.xlsx"
id_pop_estimates = pd.read_excel(f, sheet_name='2000-2020', header = 3)


file = "./ID_pop_estimates/Indonesia_adm2_uscb_2019.shp"
id_adm2 = gpd.read_file(file)

id_map_data = pd.merge(id_pop_estimates[['AREA_NAME', 'GEO_CONCAT', 'ADM1_NAME', 'ADM2_NAME', 'ADM_LEVEL', 'BTOTL_2020']],
                      id_adm2[['GEO_CONCAT', 'geometry']], on='GEO_CONCAT', how='left')

id_map_data.columns = ['AREA_NAME', 'GEO_CONCAT', 'Province', 'Regency', 'ADM_LEVEL', 'Regency Population (2020)', 'geometry']

id_map_data = gpd.GeoDataFrame(id_map_data.query('geometry != None').drop(['AREA_NAME', 'GEO_CONCAT', 'ADM_LEVEL'], axis=1), geometry='geometry')

USE FOR OTHER MAP
id_map_data = pd.merge(id_pop_estimates[['GEO_CONCAT', 'ADM1_NAME', 'ADM2_NAME', 'BTOTL_2015', 'BTOTL_2016',
                                         'BTOTL_2017', 'BTOTL_2018', 'BTOTL_2019', 'BTOTL_2020']],
                      id_adm2[['GEO_CONCAT', 'geometry']], on='GEO_CONCAT', how='left')

id_map_data.columns = ['GEO_CONCAT', 'Province', 'Regency', 'Regency Population (2015)', 'Regency Population (2016)',
                                         'Regency Population (2017)', 'Regency Population (2018)', 'Regency Population (2019)', 'Regency Population (2020)', 'geometry']

In [105]:
id_map_data

Unnamed: 0,Province,Regency,Regency Population (2020),geometry
2,ACEH,KABUPATEN SIMEULUE,108586,"MULTIPOLYGON (((96.66509 2.12018, 96.66765 2.1..."
3,ACEH,KABUPATEN ACEH SINGKIL,137678,"MULTIPOLYGON (((97.39711 2.03835, 97.39588 2.0..."
4,ACEH,KABUPATEN ACEH SELATAN,207677,"MULTIPOLYGON (((97.59461 2.80777, 97.59365 2.8..."
5,ACEH,KABUPATEN ACEH TENGGARA,211427,"POLYGON ((97.82406 3.74896, 97.82177 3.74658, ..."
6,ACEH,KABUPATEN ACEH TIMUR,396175,"POLYGON ((97.50049 5.24908, 97.50838 5.24523, ..."
...,...,...,...,...
527,PAPUA,KABUPATEN PUNCAK,220441,"POLYGON ((137.38573 -2.96489, 137.38681 -2.968..."
528,PAPUA,KABUPATEN DOGIYAI,137190,"POLYGON ((136.06136 -3.72440, 136.06944 -3.727..."
529,PAPUA,KABUPATEN INTAN JAYA,102617,"POLYGON ((136.30807 -2.90939, 136.30963 -2.911..."
530,PAPUA,KABUPATEN DEIYAI,157417,"POLYGON ((136.24767 -3.99286, 136.27673 -3.993..."


In [106]:
id_groups = people_groups.query("(Country == 'Indonesia') & (`People Cluster` not in ['Chinese', 'Persian', 'Deaf'])")
id_groups = id_groups[['Country', 'People Group', 'Population', 'Language', 'Religion', 'People Name', 'Latitude', 'Longitude']]
id_groups = id_groups.rename({'Population':'People Group Population'}, axis=1)

In [107]:
geo_prov = id_map_data[['geometry', 'Province']].set_index('Province')

def province():
    provs = []
    for i in np.arange(len(id_groups.index)):
        coordinate = Point(map(float, (id_groups['Longitude'].iloc[i], id_groups['Latitude'].iloc[i])))
        group_prov = geo_prov[geo_prov['geometry'].contains(coordinate) == True].index.values
        if len(group_prov) == 0:
            group_prov = ['MISSING']
        provs.append(group_prov[0])
    return provs

id_groups['Province'] = province()

In [108]:
geo_reg = id_map_data[['geometry', 'Regency']].set_index('Regency')

def regency():
    provs = []
    for i in np.arange(len(id_groups.index)):
        coordinate = Point(map(float, (id_groups['Longitude'].iloc[i], id_groups['Latitude'].iloc[i])))
        group_prov = geo_reg[geo_reg['geometry'].contains(coordinate) == True].index.values
        if len(group_prov) == 0:
            group_prov = ['MISSING']
        provs.append(group_prov[0])
    return provs

id_groups['Regency'] = regency()

In [109]:
group_by_regency = id_groups.groupby('Regency')['People Group'].agg(['unique']).reset_index().rename(columns={'unique': 'People List'})
group_by_regency.head()

Unnamed: 0,Regency,People List
0,KABUPATEN ACEH SELATAN,"[Aneuk Jamee, Kluet]"
1,KABUPATEN ACEH SINGKIL,"[Batak Dairi, Pakpak Boang]"
2,KABUPATEN ACEH TAMIANG,[Tamiang]
3,KABUPATEN ACEH TENGAH,[Gayo]
4,KABUPATEN ACEH TENGGARA,[Alas]


In [110]:
def prettylist(lst):
    people_string = ''
    char_count = 0
    index = 0
    while index < len(lst):
        item = lst[index]
        if index == (len(lst) - 1):
            people_string += item
            char_count += len(item)
        elif char_count >= 40:
            people_string += "<br>" + item + ', '
            char_count = 0
        else:
            people_string += item + ', '
            char_count += len(item)
        index += 1
    return people_string

group_by_regency['People List'] = group_by_regency['People List'].apply(lambda row: prettylist(row));

indonesia = pd.merge(id_map_data, group_by_regency, on='Regency', how='left')
indonesia.fillna(' ', inplace=True)

In [111]:
id_groups['geometry'] = gpd.points_from_xy(id_groups.Longitude, id_groups.Latitude)
id_points = gpd.GeoDataFrame(id_groups, geometry='geometry')

***

# Papua New Guinea

In [112]:
f = "./PNG_pop_estimates/Papua New Guinea_5yr_age_sex_2015-2030_508_uscb_mar2022.xlsx"
png_pop_estimates = pd.read_excel(f, sheet_name='2015 - 2030', header = 3)


file = "./PNG_pop_estimates/Papua_New_Guinea_adm2_uscb_2022.shp"
png_adm2 = gpd.read_file(file)

png_map_data = pd.merge(png_pop_estimates[['AREA_NAME', 'GEO_CONCAT', 'ADM1_NAME', 'ADM2_NAME','ADM_LEVEL', 'BTOTL_2020']],
                      png_adm2[['GEO_CONCAT', 'geometry']], on='GEO_CONCAT', how='left')
png_map_data.columns = ['AREA_NAME', 'GEO_CONCAT', 'Province', 'District', 'ADM_LEVEL', 'District Population (2020)', 'geometry']
png_map_data = gpd.GeoDataFrame(png_map_data.query('geometry != None').drop(['AREA_NAME', 'GEO_CONCAT', 'ADM_LEVEL'], axis=1), geometry='geometry')

USE FOR OTHER MAP

png_map_data = pd.merge(png_pop_estimates[['GEO_CONCAT', 'ADM1_NAME', 'ADM2_NAME', 'BTOTL_2015', 'BTOTL_2016',
                                         'BTOTL_2017', 'BTOTL_2018', 'BTOTL_2019', 'BTOTL_2020']],
                      png_adm2[['GEO_CONCAT', 'geometry']], on='GEO_CONCAT', how='left')

png_map_data.columns = ['GEO_CONCAT', 'Province', 'District', 'District Population (2015)', 'District Population (2016)',
                                         'District Population (2017)', 'District Population (2018)', 'District Population (2019)', 'District Population (2020)', 'geometry']

png_map_data = gpd.GeoDataFrame(png_map_data.query('geometry != None').drop('GEO_CONCAT', axis=1), geometry='geometry')

In [113]:
png_map_data.head()

Unnamed: 0,Province,District,District Population (2020),geometry
2,WESTERN,MIDDLE FLY,97838,"MULTIPOLYGON (((142.92813 -8.39891, 142.92432 ..."
8,WESTERN,NORTH FLY,69023,"POLYGON ((142.15489 -5.64145, 142.15369 -5.647..."
14,WESTERN,SOUTH FLY,66584,"MULTIPOLYGON (((142.72210 -9.31019, 142.71991 ..."
20,GULF,KEREMA,164123,"MULTIPOLYGON (((146.15963 -8.21856, 146.15882 ..."
27,GULF,KIKORI,55790,"MULTIPOLYGON (((145.42429 -7.94389, 145.42141 ..."


In [114]:
png_groups = people_groups.query("(`Affinity Bloc` == 'Pacific Islanders') & (`Country` == 'Papua New Guinea')")
png_groups = png_groups[['Country', 'People Group', 'Population', 'Language', 'Religion', 'People Name', 'Latitude', 'Longitude']]
png_groups = png_groups.rename({'Population':'People Group Population'}, axis=1)

In [115]:
geo_provPNG = png_map_data[['geometry', 'Province']].set_index('Province')

def provincePNG():
    provs = []
    for i in np.arange(len(png_groups.index)):
        coordinate = Point(map(float, (png_groups['Longitude'].iloc[i], png_groups['Latitude'].iloc[i])))
        group_provPNG = geo_provPNG[geo_provPNG['geometry'].contains(coordinate) == True].index.values
        if len(group_provPNG) == 0:
            group_provPNG = ['MISSING']
        provs.append(group_provPNG[0])
    return provs

png_groups['Province'] = provincePNG()

In [116]:
geo_distPNG = png_map_data[['geometry', 'District']].set_index('District')

def districtPNG():
    provs = []
    for i in np.arange(len(png_groups.index)):
        coordinate = Point(map(float, (png_groups['Longitude'].iloc[i], png_groups['Latitude'].iloc[i])))
        group_prov = geo_distPNG[geo_distPNG['geometry'].contains(coordinate) == True].index.values
        if len(group_prov) == 0:
            group_prov = ['MISSING']
        provs.append(group_prov[0])
    return provs

png_groups['District'] = districtPNG()

In [117]:
group_by_district = png_groups.groupby('District')['People Group'].agg(['unique']).reset_index().rename(columns={'unique': 'People List'})
group_by_district.head()

Unnamed: 0,District,People List
0,ABAU,"[Binahari, Daga, Domu, Keopara, Magori, Mailu,..."
1,AITAPE-LUMI,"[Ak, Aruek, Aunalei, Bouye, Dia, Elkei, Kamnum..."
2,ALOTAU,"[Anuki, Boanaki, Bohutu, Dawawa, Doga, Garuwah..."
3,AMBUNTI-DREIKIKIR,"[Ama, Bahinemo, Bitara, Bo, Bongos, Chenapian,..."
4,ANGALIMP-SOUTH WAHGI,"[Chimbu, Kumai, Nii, Wahgi]"


In [118]:
# pretty list cell
group_by_district['People List'] = group_by_district['People List'].apply(lambda row: prettylist(row));

papua_new_guinea = pd.merge(png_map_data, group_by_district, on='District', how='left')
papua_new_guinea.fillna(' ', inplace=True)

In [119]:
png_groups['geometry'] = gpd.points_from_xy(png_groups.Longitude, png_groups.Latitude)
png_points = gpd.GeoDataFrame(png_groups, geometry='geometry')

***

# Philippines

f = "./PH_pop_estimates/Philippines_5yr_age_sex_2000-2040_508_uscb_apr2022.xlsx"
ph_pop_estimates = pd.read_excel(f, sheet_name='2000 - 2040', header = 3)

file = "./PH_pop_estimates/Philippines_adm2_uscb_2022.shp"
ph_adm2 = gpd.read_file(file)

ph_map_data = pd.merge(ph_pop_estimates[['AREA_NAME', 'GEO_CONCAT', 'ADM1_NAME', 'ADM2_NAME','ADM_LEVEL', 'BTOTL_2020']],
                      ph_adm2[['GEO_CONCAT', 'geometry']], on='GEO_CONCAT', how='left')

ph_map_data.columns = ['AREA_NAME', 'GEO_CONCAT', 'Region', 'District', 'ADM_LEVEL', 'District Population (2020)', 'geometry']

ph_map_data = gpd.GeoDataFrame(ph_map_data.query('geometry != None').drop(['AREA_NAME', 'GEO_CONCAT', 'ADM_LEVEL'], axis=1), geometry='geometry')

In [120]:
ph_groups = people_groups.query("(`Country` == 'Philippines') & (`People Cluster`.str.contains('Filipino'))")
ph_groups = ph_groups[['Country', 'People Group', 'Population', 'Language', 'Religion', 'People Name', 'Latitude', 'Longitude']]
ph_groups = ph_groups.rename({'Population':'People Group Population'}, axis=1)
ph_groups.head()

Unnamed: 0,Country,People Group,People Group Population,Language,Religion,People Name,Latitude,Longitude
3203,Philippines,Abaknon Sama,31000,Inabaknon - (abx),Christianity - Roman Catholic,Abaknon Sama,12.42636,124.16645
3204,Philippines,Aklano,830000,Aklanon - (akl),Christianity - Roman Catholic,Aklano,11.7111,122.2784
3205,Philippines,Capisano,945000,Capiznon - (cps),Christianity - Roman Catholic,Capisano,11.41215,122.95359
3206,Philippines,Cebuano,22150000,Cebuano - (ceb),Christianity - Roman Catholic,Visayan,7.071867,125.600776
3207,Philippines,Chabakano Creole,690000,Chavacano - (cbk),Christianity - Roman Catholic,Chabakano Creole,7.11655,122.16942


geo_regPH = ph_map_data[['geometry', 'Region']].set_index('Region')

def regionPH():
    regs = []
    for i in np.arange(len(ph_groups.index)):
        coordinate = Point(map(float, (ph_groups['Longitude'].iloc[i], ph_groups['Latitude'].iloc[i])))
        group_regPH = geo_regPH[geo_regPH['geometry'].contains(coordinate) == True].index.values
        if len(group_regPH) == 0:
            group_regPH = ['MISSING']
        regs.append(group_regPH[0])
    return regs

ph_groups['Region'] = regionPH()

geo_distPH = ph_map_data[['geometry', 'District']].set_index('District')

def districtPH():
    provs = []
    for i in np.arange(len(ph_groups.index)):
        coordinate = Point(map(float, (ph_groups['Longitude'].iloc[i], ph_groups['Latitude'].iloc[i])))
        group_prov = geo_distPH[geo_distPH['geometry'].contains(coordinate) == True].index.values
        if len(group_prov) == 0:
            group_prov = ['MISSING']
        provs.append(group_prov[0])
    return provs

ph_groups['District'] = districtPH()

group_by_districtPH = ph_groups.groupby('District')['People Group'].agg(['unique']).reset_index().rename(columns={'unique': 'People List'})
group_by_districtPH.head()

# pretty list cell
group_by_districtPH['People List'] = group_by_districtPH['People List'].apply(lambda row: prettylist(row));

philippines = pd.merge(ph_map_data, group_by_districtPH, on='District', how='left')
philippines.fillna(' ', inplace=True)

ph_groups['geometry'] = gpd.points_from_xy(ph_groups.Longitude, ph_groups.Latitude)
ph_points = gpd.GeoDataFrame(ph_groups, geometry='geometry')

***

# Map Code

In [121]:
id_groups['geometry'] = gpd.points_from_xy(id_groups.Longitude, id_groups.Latitude)
id_points = gpd.GeoDataFrame(id_groups, geometry='geometry')

nusantara = indonesia.explore(
    name = 'ID Regency Data (2020)',
    prefer_canvas=True,
    color = 'blue'
)

id_points.explore(
    m = nusantara,
    color = 'red',
    prefer_canvas=True,
    marker_kwds = dict(radius=3, fill=True),
    name = 'ID People Groups'
)

papua_new_guinea.explore(
    m = nusantara,
    prefer_canvas=True,
    name = 'PNG District Data (2020)',
    color = 'blue'
)

png_points.explore(
    m = nusantara,
    color = 'red',
    prefer_canvas=True,
    marker_kwds = dict(radius=3, fill=True),
    name = 'PNG People Groups'
)


folium.LayerControl().add_to(nusantara);

***

In [123]:
# nusantara

***

In [22]:
# output_file = "nusantara.html"
# curr_map = nusantara
# curr_map.save(output_file)
# webbrowser.open(output_file, new=2)  # open in new tab

***