## Setup

In [104]:
import pandas as pd 
import os 

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"


DATA_PATH = 'dataset/raw'

UNEMPLOYMENT_PATH = os.path.join(DATA_PATH,
                                 'Unemployment Rate Province Level 2022.xlsx')
CTZ_GROWTH_RATE_PATH = os.path.join(DATA_PATH,
                                    'Citizen Growth Rate Province Level 2022from2020.xlsx')
POVERTY_PCT_PATH = os.path.join(DATA_PATH,
                                 'Percentage of Poor Citizen Province Level 2022.xlsx' 
)

In [105]:
import bamboolib as bam 
bam.disable()

The bamboolib extension was disabled. You can enable it again via 'bam.enable()'. In case that bamboolib was not helpful to you, we are sorry and would like to fix this. Please write us a quick mail to info@8080labs.com so that we can serve you better in the future. Best regards, Tobias and Florian


## Read Data

In [106]:
unemployment_data = pd.read_excel(UNEMPLOYMENT_PATH)
ctz_growth = pd.read_excel(CTZ_GROWTH_RATE_PATH)
poverty_pct = pd.read_excel(POVERTY_PCT_PATH)

In [107]:
unemployment_data

ctz_growth

poverty_pct

Unnamed: 0,PROVINSI,Februari_2022,Agustus_2022
0,ACEH,5.97,6.17
1,SUMATERA UTARA,5.47,6.16
2,SUMATERA BARAT,6.17,6.28
3,RIAU,4.40,4.37
4,JAMBI,4.70,4.59
...,...,...,...
29,SULAWESI BARAT,3.11,2.34
30,MALUKU,6.44,6.88
31,MALUKU UTARA,4.98,3.98
32,PAPUA BARAT,5.78,5.37


Unnamed: 0,PROVINSI,2022
0,ACEH,1.432930
1,SUMATERA UTARA,1.214010
2,SUMATERA BARAT,1.091596
3,RIAU,1.954455
4,JAMBI,1.328590
...,...,...
29,SULAWESI BARAT,1.576145
30,MALUKU,1.010019
31,MALUKU UTARA,1.611598
32,PAPUA BARAT,2.458412


Unnamed: 0,PROVINSI,Semester 1 (Maret)_2022,Semester 2 (September)_2022
0,ACEH,14.64,14.75
1,SUMATERA UTARA,8.42,8.33
2,SUMATERA BARAT,5.92,6.04
3,RIAU,6.78,6.84
4,JAMBI,7.62,7.70
...,...,...,...
29,SULAWESI BARAT,11.75,11.92
30,MALUKU,15.97,16.23
31,MALUKU UTARA,6.23,6.37
32,PAPUA BARAT,21.33,21.43


## Data Manipulation 

### Generate Average of 2022 Poverty

In [108]:
poverty_pct['poverty_pct_avg_2022'] = ( poverty_pct['Semester 1 (Maret)_2022'] + \
                                                poverty_pct['Semester 2 (September)_2022'] ) / 2


In [109]:
poverty_pct = poverty_pct.drop(['Semester 1 (Maret)_2022','Semester 2 (September)_2022'],axis=1)

### Generate Average of 2022 Unemployment

In [110]:
unemployment_data.columns

Index(['PROVINSI', 'Februari_2022', 'Agustus_2022'], dtype='object')

In [111]:
unemployment_data['unemployment_avg_2022'] = ( unemployment_data['Februari_2022'] + \
                                                unemployment_data['Agustus_2022'] ) / 2

In [112]:
unemployment_data = unemployment_data.drop(['Februari_2022','Agustus_2022'],axis=1)

### Merging All Data Based on PROVINSI Key

In [113]:
#merge 1 unemployment +growth 
merge_1 = unemployment_data.merge(ctz_growth,on='PROVINSI',how='inner')
merged_all= merge_1.merge(poverty_pct,on='PROVINSI',how='inner')

In [114]:
merged_all

Unnamed: 0,PROVINSI,unemployment_avg_2022,2022,poverty_pct_avg_2022
0,ACEH,6.070,1.432930,14.695
1,SUMATERA UTARA,5.815,1.214010,8.375
2,SUMATERA BARAT,6.225,1.091596,5.980
3,RIAU,4.385,1.954455,6.810
4,JAMBI,4.645,1.328590,7.660
...,...,...,...,...
29,SULAWESI BARAT,2.725,1.576145,11.835
30,MALUKU,6.660,1.010019,16.100
31,MALUKU UTARA,4.480,1.611598,6.300
32,PAPUA BARAT,5.575,2.458412,21.380


### Generate West and East Indonesia Encoding

First we map the province --> regional --> west and east indonesia

In [115]:
regional_mapping = {
    "ACEH": "SUMATERA",
    "BALI": "BALI DAN NUSA TENGGARA",
    "BANTEN": "JAWA",
    "BENGKULU": "SUMATERA",
    "DI YOGYAKARTA": "JAWA",
    "GORONTALO": "SULAWESI",
    "JAMBI": "SUMATERA",
    "JAWA BARAT": "JAWA",
    "JAWA TENGAH": "JAWA",
    "JAWA TIMUR": "JAWA",
    "DKI JAKARTA": "JAWA",
    "KALIMANTAN BARAT": "KALIMANTAN",
    "KALIMANTAN SELATAN": "KALIMANTAN",
    "KALIMANTAN TENGAH": "KALIMANTAN",
    "KALIMANTAN TIMUR": "KALIMANTAN",
    "KALIMANTAN UTARA": "KALIMANTAN",
    "KEP. BANGKA BELITUNG": "SUMATERA",
    "KEP. RIAU": "SUMATERA",
    "LAMPUNG": "SUMATERA",
    "MALUKU": "MALUKU DAN PAPUA",
    "MALUKU UTARA": "MALUKU DAN PAPUA",
    "NUSA TENGGARA BARAT": "BALI DAN NUSA TENGGARA",
    "NUSA TENGGARA TIMUR": "BALI DAN NUSA TENGGARA",
    "PAPUA": "MALUKU DAN PAPUA",
    "PAPUA BARAT": "MALUKU DAN PAPUA",
    "RIAU": "SUMATERA",
    "SULAWESI BARAT": "SULAWESI",
    "SULAWESI SELATAN": "SULAWESI",
    "SULAWESI TENGAH": "SULAWESI",
    "SULAWESI TENGGARA": "SULAWESI",
    "SULAWESI UTARA": "SULAWESI",
    "SUMATERA BARAT": "SUMATERA",
    "SUMATERA SELATAN": "SUMATERA",
    "SUMATERA UTARA": "SUMATERA",
}

merged_all['REGIONAL'] = merged_all['PROVINSI'].map(regional_mapping)

In [116]:
west_midle_east_mapping = {'SUMATERA':'WEST',
                          'JAWA':'MIDDLE',
                          'KALIMANTAN':'WEST',
                          'SULAWESI':'EAST',
                          'MALUKU DAN PAPUA':'EAST',
                          'BALI DAN NUSA TENGGARA':'MIDDLE'}

In [117]:
merged_all['ZONE'] = merged_all['REGIONAL'].map(west_midle_east_mapping)

In [118]:
merged_all.isnull().sum()

PROVINSI                 0
unemployment_avg_2022    0
2022                     0
poverty_pct_avg_2022     0
REGIONAL                 0
ZONE                     0
dtype: int64

### Saving Result

In [119]:
##create directory dataset/clean

#os.mkdir('dataset/clean')

In [120]:
merged_all.to_excel('dataset/clean/poverty_analysis.xlsx',index=False)

In [121]:
merged_all

Unnamed: 0,PROVINSI,unemployment_avg_2022,2022,poverty_pct_avg_2022,REGIONAL,ZONE
0,ACEH,6.070,1.432930,14.695,SUMATERA,WEST
1,SUMATERA UTARA,5.815,1.214010,8.375,SUMATERA,WEST
2,SUMATERA BARAT,6.225,1.091596,5.980,SUMATERA,WEST
3,RIAU,4.385,1.954455,6.810,SUMATERA,WEST
4,JAMBI,4.645,1.328590,7.660,SUMATERA,WEST
...,...,...,...,...,...,...
29,SULAWESI BARAT,2.725,1.576145,11.835,SULAWESI,EAST
30,MALUKU,6.660,1.010019,16.100,MALUKU DAN PAPUA,EAST
31,MALUKU UTARA,4.480,1.611598,6.300,MALUKU DAN PAPUA,EAST
32,PAPUA BARAT,5.575,2.458412,21.380,MALUKU DAN PAPUA,EAST
