In [1]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import seaborn as sns
from pandasgui import show

In [62]:
# Read interpolated population series
df_gpw = pd.read_excel('gpw_series.xlsx')

# Fill region ID with unique values
df_gpw['region_id'] = df_gpw.groupby('region').ngroup() + 1

# Keep only relevant colums and rows
df_gpw = df_gpw[['province', 'province_id', 'region', 'region_id', 'year', 'gpw_sum_ipo']]
df_gpw = df_gpw.loc[df_gpw.year <= 2018, :]

# Generate region-level gridded population
df_gpw_reg = df_gpw.groupby(by=['region', 'year']).sum()
df_gpw = df_gpw.set_index(['region', 'year']).join(df_gpw_reg, rsuffix='_reg')

# Keep relevant columns only
df_gpw = df_gpw[['province', 'province_id', 'region_id', 'gpw_sum_ipo', 'gpw_sum_ipo_reg']]

# Calculate province's share to region's population; you will use these as weights for inequality
df_gpw = df_gpw.assign(gpw_sum_ipo_share = lambda x: x.gpw_sum_ipo / x.gpw_sum_ipo_reg)

# Rearrange order of columns and sort rows
df_gpw = df_gpw.reset_index()[['province', 'province_id', 'region', 'region_id', 'year', 'gpw_sum_ipo', 'gpw_sum_ipo_reg', 'gpw_sum_ipo_share']]
df_gpw = df_gpw.sort_values(by=['province_id', 'year']).reset_index(drop=True)

# Export to excel
df_gpw.to_excel('gpw_province_clean.xlsx', index=False)

In [63]:
# Generate table of top 20 most populous provinces
df_gpw.groupby('province').mean().sort_values(by='gpw_sum_ipo', ascending=False).head(20)

Unnamed: 0_level_0,province_id,region_id,year,gpw_sum_ipo
province,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Cebu,25,12,2009,3845270.0
Cavite,24,7,2009,2992313.0
Bulacan,17,6,2009,2988431.0
Negros Occidental,51,11,2009,2798382.0
Quezon City,4717,3,2009,2712927.0
Laguna,40,7,2009,2703477.0
Pangasinan,61,4,2009,2694468.0
Rizal,64,7,2009,2439229.0
Pampanga,60,6,2009,2314359.0
Batangas,12,7,2009,2292547.0


In [64]:
# Generate table of bottom 20 most populous provinces
df_gpw.groupby('province').mean().sort_values(by='gpw_sum_ipo', ascending=False).tail(20)

Unnamed: 0_level_0,province_id,region_id,year,gpw_sum_ipo
province,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
City of Mandaluyong,475,3,2009,326935.382906
City of Navotas,4710,3,2009,292780.301742
Romblon,65,8,2009,263437.463153
Abra,1,2,2009,235023.84346
Catanduanes,23,10,2009,229336.807721
Marinduque,45,8,2009,218320.984031
Kalinga,38,2,2009,201200.70686
Aurora,8,6,2009,193214.088197
Ifugao,33,2,2009,190773.345793
Quirino,63,5,2009,175086.169354
