In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os

import warnings
warnings.filterwarnings("ignore")

TABULAR_PATH = 'data/tabular/'
data_files = os.listdir(TABULAR_PATH)
data_files

['Condition(State of Repair)oftheBuilding(ARMM, 2010)Brgy.xlsx',
 'Construction Materials of the Roof (ARMM 2015)Brgy.xlsx',
 'Construction Materials of the Walls(ARMM 2015).xlsx',
 'Floor Area (ARMM, 2010) Brgy.xlsx',
 'Household Population by Ethnicity (ARMM, 2010) Brgy.xlsx',
 'Main Source of Water Supply for Cooking (ARMM 2015) Brgy.xlsx',
 'Main Source of Water Supply for Drinking (ARMM 2015) Brgy.xlsx',
 'Number of Households by Kind of Fuel for Lighting (ARMM 2015) Brgy.xlsx',
 'Number of schools per brgy.xlsx',
 'Population 2010, 2015 ARMM,BCT.xlsx',
 'Tenure Status of the Lot (ARMM 2010) Brgy.xlsx',
 'Type of Building (ARMM 2015) Brgy.xlsx',
 'Year Built (ARMM 2010) Brgy.xlsx']

In [2]:
def get_sex_age_cols(sex="bot",min_age=7,max_age=25):
    sex_single_age_cols = [col for col in population_df.columns if (col.split("_")[-2:][0]==sex)]
    student_age_cols = [col for col in sex_single_age_cols if (int(col.split("_")[-1])>=min_age) \
                        & (int(col.split("_")[-1])<=max_age)]
    return student_age_cols

In [3]:
population_df = pd.read_excel(TABULAR_PATH+'Population 2010, 2015 ARMM,BCT.xlsx')

In [4]:
both_student_age_cols = get_sex_age_cols()
male_student_age_cols = get_sex_age_cols(sex="mal")
female_student_age_cols = get_sex_age_cols(sex="fem")

student_population_df = population_df[["PSGC_BRGY"]+both_student_age_cols+male_student_age_cols+female_student_age_cols]
student_population_df["both_student_age_7_to_25"] = student_population_df.loc[:,both_student_age_cols].sum(axis=1)
student_population_df["male_student_age_7_to_25"] = student_population_df.loc[:,male_student_age_cols].sum(axis=1)
student_population_df["female_student_age_7_to_25"] = student_population_df.loc[:,female_student_age_cols].sum(axis=1)

student_population_df.drop(columns=both_student_age_cols+male_student_age_cols+female_student_age_cols,inplace=True)
print(student_population_df.shape)
student_population_df.head()

(2721, 4)


Unnamed: 0,PSGC_BRGY,both_student_age_7_to_25,male_student_age_7_to_25,female_student_age_7_to_25
0,PH150702001,250.0,134.0,116.0
1,PH150702002,497.0,247.0,250.0
2,PH150702004,237.0,112.0,125.0
3,PH150702005,953.0,489.0,464.0
4,PH150702006,1277.0,700.0,577.0


In [5]:
schools_df = pd.read_excel(TABULAR_PATH+'Number of schools per brgy.xlsx',usecols=["PSGC_BRGY","Number of School"])
schools_df.rename(columns={'Number of School':'No_of_Schools'},inplace=True)
print(schools_df.shape)
schools_df.head()

(1570, 2)


Unnamed: 0,PSGC_BRGY,No_of_Schools
0,PH150702001,1
1,PH150702002,1
2,PH150702005,1
3,PH150702006,1
4,PH150702007,1


In [6]:
education_df = pd.merge(student_population_df,schools_df,on="PSGC_BRGY",how="left")
print(education_df.shape)
education_df.head()

(2721, 5)


Unnamed: 0,PSGC_BRGY,both_student_age_7_to_25,male_student_age_7_to_25,female_student_age_7_to_25,No_of_Schools
0,PH150702001,250.0,134.0,116.0,1.0
1,PH150702002,497.0,247.0,250.0,1.0
2,PH150702004,237.0,112.0,125.0,
3,PH150702005,953.0,489.0,464.0,1.0
4,PH150702006,1277.0,700.0,577.0,1.0


In [7]:
source_light_df = pd.read_excel(TABULAR_PATH+'Number of Households by Kind of Fuel for Lighting (ARMM 2015) Brgy.xlsx')
source_light_df["Perc_Households_Electricity_Light"] = \
            source_light_df["Electricity"]/source_light_df["Number of HH"]
source_light_df["Perc_Households_Kerosene_Gass_Light"] = \
            source_light_df["Kerosene (Gaas)"]/source_light_df["Number of HH"]
source_light_df = source_light_df[["PSGC","Perc_Households_Electricity_Light","Perc_Households_Kerosene_Gass_Light"]]
print(source_light_df.shape)
source_light_df.head()

(2490, 3)


Unnamed: 0,PSGC,Perc_Households_Electricity_Light,Perc_Households_Kerosene_Gass_Light
0,PH150702001,0.65625,0.34375
1,PH150702002,0.792208,0.207792
2,PH150702004,0.39604,0.594059
3,PH150702005,0.870968,0.126728
4,PH150702006,0.71978,0.236264


In [8]:
source_drinking_water_df = pd.read_excel(TABULAR_PATH+'Main Source of Water Supply for Drinking (ARMM 2015) Brgy.xlsx')
source_drinking_water_df["Perc_Households_Drinking_Owned_Faucet"] = \
            source_drinking_water_df["Own use faucet community water system"]/source_drinking_water_df["Number of Households"]
source_drinking_water_df["Perc_Households_Drinking_Shared_Faucet"] = \
            source_drinking_water_df["Shared faucet community water system"]/source_drinking_water_df["Number of Households"]
source_drinking_water_df = source_drinking_water_df[["PSGC","Perc_Households_Drinking_Owned_Faucet","Perc_Households_Drinking_Shared_Faucet"]]
print(source_drinking_water_df.shape)
source_drinking_water_df.head()

(2490, 3)


Unnamed: 0,PSGC,Perc_Households_Drinking_Owned_Faucet,Perc_Households_Drinking_Shared_Faucet
0,PH150702001,0.05,0.41875
1,PH150702002,0.051948,0.008658
2,PH150702004,0.019802,0.138614
3,PH150702005,0.099078,0.730415
4,PH150702006,0.001832,0.012821


In [9]:
sdg_df = pd.merge(source_light_df,source_drinking_water_df,on="PSGC",how="left")
print(sdg_df.shape)
sdg_df.head()

(2490, 5)


Unnamed: 0,PSGC,Perc_Households_Electricity_Light,Perc_Households_Kerosene_Gass_Light,Perc_Households_Drinking_Owned_Faucet,Perc_Households_Drinking_Shared_Faucet
0,PH150702001,0.65625,0.34375,0.05,0.41875
1,PH150702002,0.792208,0.207792,0.051948,0.008658
2,PH150702004,0.39604,0.594059,0.019802,0.138614
3,PH150702005,0.870968,0.126728,0.099078,0.730415
4,PH150702006,0.71978,0.236264,0.001832,0.012821


In [10]:
education_sdg_df = pd.merge(education_df,sdg_df,left_on="PSGC_BRGY",right_on="PSGC",how="left")
print(education_sdg_df.shape)
print(education_sdg_df.isna().sum())
education_sdg_df.head()

(2721, 10)
PSGC_BRGY                                    0
both_student_age_7_to_25                     0
male_student_age_7_to_25                     0
female_student_age_7_to_25                   0
No_of_Schools                             1153
PSGC                                       239
Perc_Households_Electricity_Light          239
Perc_Households_Kerosene_Gass_Light        239
Perc_Households_Drinking_Owned_Faucet      239
Perc_Households_Drinking_Shared_Faucet     239
dtype: int64


Unnamed: 0,PSGC_BRGY,both_student_age_7_to_25,male_student_age_7_to_25,female_student_age_7_to_25,No_of_Schools,PSGC,Perc_Households_Electricity_Light,Perc_Households_Kerosene_Gass_Light,Perc_Households_Drinking_Owned_Faucet,Perc_Households_Drinking_Shared_Faucet
0,PH150702001,250.0,134.0,116.0,1.0,PH150702001,0.65625,0.34375,0.05,0.41875
1,PH150702002,497.0,247.0,250.0,1.0,PH150702002,0.792208,0.207792,0.051948,0.008658
2,PH150702004,237.0,112.0,125.0,,PH150702004,0.39604,0.594059,0.019802,0.138614
3,PH150702005,953.0,489.0,464.0,1.0,PH150702005,0.870968,0.126728,0.099078,0.730415
4,PH150702006,1277.0,700.0,577.0,1.0,PH150702006,0.71978,0.236264,0.001832,0.012821
