In [1]:
import pandas as pd
import geopandas as gpd
import numpy as np

# functions to process PeopleGroups datasets
from features import *

### link to google drive
* https://drive.google.com/drive/folders/1_4R9ut87eemnxWH53VN8QCSyRugit27s?usp=sharing

### Note:
* It is important to run the cells of this jupyter notebook sequentially. If you run into an error, or an output isn't what you would expect, please rerun the cells from top to bottom.

<hr style="border: 5px solid #003262;" />
<hr style="border: 1px solid #fdb515;" />

# Subnational Population Data

### Countries that can be Validated (ADM1)

In [2]:
countries_with_data()

['Afghanistan',
 'Bangladesh',
 'Brunei',
 'Burkina Faso',
 'Burundi',
 'Cambodia',
 'Chad',
 'Comoros',
 'Congo, Republic of',
 'Cuba',
 'Indonesia',
 'Jamaica',
 'Kazakhstan',
 'Kyrgyzstan',
 'Laos',
 'Malaysia',
 'Myanmar',
 'Nauru',
 'Papua New Guinea',
 'Philippines',
 'Singapore',
 'Suriname',
 'Thailand',
 'Timor-Leste',
 'Tonga',
 'Turkmenistan',
 'Vietnam']

<hr style="border: 5px solid #003262;" />
<hr style="border: 1px solid #fdb515;" />

# Validation Code 

* only change the country variable
* If you get an error, it is either because the country is spelled incorrectly/differently or there is no ADM1 population data available for the country (work in progress)
* takes roughly 2 minutes to run (potentially up to 5 minutes)

In [4]:
country = "Philippines"

# no need to edit anything below this line
results = validate_country(country, True)
results

The Caluyanhon people group did not intersect with a CGAZ ADM1 boundary. They may be valid.
The Abaknon Sama people group did not intersect with a CGAZ ADM1 boundary. They may be valid.


Unnamed: 0,People Group,Alpha-3 Code,People Group Population,Country,geometry,ADM1 Boundaries Present,Total Boundary Population,Valid People Group
119,Western Subanon,PHL,139000,Philippines,"MULTIPOLYGON (((122.67391 8.14023, 122.67646 8...",[Zamboanga Peninsula],3.804251e+06,True
165,Gadang,PHL,9000,Philippines,"MULTIPOLYGON (((121.60453 17.39660, 121.59203 ...","[Cagayan Valley, CAR]",6.097376e+06,True
174,Madukayang Kalinga,PHL,4000,Philippines,"MULTIPOLYGON (((121.50057 17.10707, 121.50037 ...",[CAR],2.057254e+06,True
223,Iwaak,PHL,3200,Philippines,"MULTIPOLYGON (((120.89145 16.40731, 120.91391 ...","[Cagayan Valley, CAR]",6.097376e+06,True
233,Southern Atta,PHL,800,Philippines,"MULTIPOLYGON (((121.47756 18.04127, 121.47782 ...",[Cagayan Valley],4.040122e+06,True
...,...,...,...,...,...,...,...,...
10173,Kaagan Kalagan,PHL,3700,Philippines,"MULTIPOLYGON (((125.34932 6.81387, 125.35327 6...",[Davao Region],6.054097e+06,True
10210,Cuyonon,PHL,213000,Philippines,"MULTIPOLYGON (((119.48023 11.42052, 119.48786 ...",[Mimaropa],2.910419e+06,True
10214,Kagayanen,PHL,37000,Philippines,"MULTIPOLYGON (((119.83139 10.64961, 119.84064 ...",[Mimaropa],2.910419e+06,True
10311,"Han Chinese, Min Nan",PHL,1210000,Philippines,"MULTIPOLYGON (((123.49109 8.64528, 123.51193 8...","[ARMM, Calabarzon, Caraga, Central Luzon, Cent...",9.109527e+07,True


<hr style="border: 5px solid #003262;" />
<hr style="border: 1px solid #fdb515;" />

# Map

* This section will allow you to view a map of the people groups  

In [22]:
def map_results(df, query=None):
    '''
    output: folium map
    description: outputs an interactive map of people groups that did not intersect any ADM1 boundaries.
    '''
    
    results_map = df.query('`ADM1 Boundaries Present` == "NONE"')
    
    if results_map.query('`ADM1 Boundaries Present` == "NONE"').shape[0] == 0:
        print('This country has no people groups that did not intersect with ADM1 boundaries.')
        return
    
    return results_map.explore(color='red')

In [23]:
map_results(results)

<hr style="border: 5px solid #003262;" />
<hr style="border: 1px solid #fdb515;" />

# Saving Results

* uncomment the cells below (delete the #) to save it as a certain file format

### CSV file

In [None]:
# results.to_csv(f'./output/{country}_validated.csv')

### Excel

In [None]:
# results.to_excel(f'./output/{country}_validated.xlsx')