# BNPB Data Manipulation

---

## Import packages

`%pip freeze > requirements.txt`

In [1]:
# Data manipulation
import pandas as pd

# JSON manipulation
import json

# Regular expression
import re

# Translation
from deep_translator import GoogleTranslator

## Load JSONs

### Initial data

In [2]:
# Open JSON file
initial = open('../data/raw/bnpb-data-20220125075854.json')

# Returns JSON object as a dictionary
data_initial = json.load(initial)

In [3]:
# JSON
data_initial

{'0': {'No': ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10'],
  'KIB': ['3512999202201251',
   '3325999202201231',
   '3325105202201231',
   '3301105202201231',
   '3301105202201232',
   '1610999202201231',
   '3315999202201221',
   '3315999202201222',
   '3309102202201221',
   '3512102202201211'],
  'Wilayah': ['Kab. Situbondo, Jawa Timur',
   'Kab. Batang, Jawa Tengah',
   'Kab. Batang, Jawa Tengah',
   'Kab. Cilacap, Jawa Tengah',
   'Kab. Cilacap, Jawa Tengah',
   'Kab. Ogan Ilir, Sumatera Selatan',
   'Kab. Grobogan, Jawa Tengah',
   'Kab. Grobogan, Jawa Tengah',
   'Kab. Boyolali, Jawa Tengah',
   'Kab. Situbondo, Jawa Timur'],
  'Bencana': ['Lainnya',
   'Lainnya',
   'Puting Beliung',
   'Puting Beliung',
   'Puting Beliung',
   'Lainnya',
   'Lainnya',
   'Lainnya',
   'Tanah Longsor',
   'Tanah Longsor'],
  'Kejadian': ['Kebakaran gudang arsip Diknas',
   'Pohon Tumbang',
   'Angin Puting Beliung',
   'ANGIN KENCANG DI DSN, KUTANGSA, DSN. SIMBAR DAN DSN. DAWUAN, DS. BENER

In [4]:
# JSON restructuring

# Create a new dictionary
data_dict = {key: [] for key in data_initial['0'].keys()}

for key in data_initial.keys():
    for subkey in data_initial[key]:
        value = data_initial[key][subkey]
        data_dict[subkey] += value

In [5]:
# Convert data from dictionary to data frame
df_initial = pd.DataFrame(data_dict)

In [6]:
print('Dimension of data: {} rows and {} columns'.format(len(df_initial), len(df_initial.columns)))
df_initial.head()

Dimension of data: 39489 rows and 7 columns


Unnamed: 0,No,KIB,Wilayah,Bencana,Kejadian,Detail,Links
0,1,3512999202201251,"Kab. Situbondo, Jawa Timur",Lainnya,Kebakaran gudang arsip Diknas,{'Keterangan': 'Kejadian pada; Hari : ...,https://dibi.bnpb.go.id/xdibi/read/55969//////...
1,2,3325999202201231,"Kab. Batang, Jawa Tengah",Lainnya,Pohon Tumbang,{'Keterangan': 'Pohon Tumbang di Jalan Desa Ke...,https://dibi.bnpb.go.id/xdibi/read/55935//////...
2,3,3325105202201231,"Kab. Batang, Jawa Tengah",Puting Beliung,Angin Puting Beliung,{'Keterangan': 'Angin Puting Beliung melanda 4...,https://dibi.bnpb.go.id/xdibi/read/55936//////...
3,4,3301105202201231,"Kab. Cilacap, Jawa Tengah",Puting Beliung,"ANGIN KENCANG DI DSN, KUTANGSA, DSN. SIMBAR DA...",{'Keterangan': 'Hujan deras disertai angin ken...,https://dibi.bnpb.go.id/xdibi/read/55965//////...
4,5,3301105202201232,"Kab. Cilacap, Jawa Tengah",Puting Beliung,"ANGIN KENCANG DI DSN. CITANGKIL, DS. BOJA, KEC...",{'Keterangan': 'Pada hari minggu tgl. 23 Janua...,https://dibi.bnpb.go.id/xdibi/read/55966//////...


In [7]:
# Function for remapping column(s) with dictionary data type
def mappingValue(x):
    # Values
    detailed = None
    victims = {}
    damages = {}

    for key in x.keys():
        if key == 'Keterangan':
            value = x[key].encode('ascii', 'ignore').decode('utf-8').replace('\n', ' ')
            detailed = re.sub(' +', ' ', str(value))
        elif key == 'Korban':
            subkeys = re.findall(pattern = '(\w+)\W+.<br>', string = x[key])
            for subkey in subkeys:
                try:
                    victim_value = int(re.findall(pattern = str(subkey + '\W+.(\d+)<br>'), string = x[key])[0])
                except:
                    victim_value = 0
                victims.update(
                    {
                        subkey: victim_value
                    }
                )
        elif key == 'Kerusakan':
            subkeys = re.findall(pattern = '(\w+)\W+.<br>', string = x[key])
            for subkey in subkeys:
                try:
                    damage_value = int(re.findall(pattern = str(subkey + '\W+.(\d+)<br>'), string = x[key])[0])
                except:
                    damage_value = 0
                damages.update(
                    {
                        subkey: damage_value
                    }
                )

    return (detailed, victims, damages)

In [8]:
# Remapping column(s) with dictionary data type
new_cols = ['Keterangan', 'Korban', 'Kerusakan']
df_initial[new_cols] = df_initial['Detail'].apply(lambda x: mappingValue(x)).tolist()

In [9]:
# Show the data
df_initial.head()

Unnamed: 0,No,KIB,Wilayah,Bencana,Kejadian,Detail,Links,Keterangan,Korban,Kerusakan
0,1,3512999202201251,"Kab. Situbondo, Jawa Timur",Lainnya,Kebakaran gudang arsip Diknas,{'Keterangan': 'Kejadian pada; Hari : ...,https://dibi.bnpb.go.id/xdibi/read/55969//////...,Kejadian pada; Hari : Minggu Tanggal : 23 Janu...,"{'Meninggal': 0, 'Hilang': 0, 'Terluka': 0, 'M...","{'Rumah': 0, 'Pendidikan': 0, 'Kesehatan': 0, ..."
1,2,3325999202201231,"Kab. Batang, Jawa Tengah",Lainnya,Pohon Tumbang,{'Keterangan': 'Pohon Tumbang di Jalan Desa Ke...,https://dibi.bnpb.go.id/xdibi/read/55935//////...,"Pohon Tumbang di Jalan Desa Keconorejo, Kec. T...","{'Meninggal': 0, 'Hilang': 0, 'Terluka': 0, 'M...","{'Rumah': 0, 'Pendidikan': 0, 'Kesehatan': 0, ..."
2,3,3325105202201231,"Kab. Batang, Jawa Tengah",Puting Beliung,Angin Puting Beliung,{'Keterangan': 'Angin Puting Beliung melanda 4...,https://dibi.bnpb.go.id/xdibi/read/55936//////...,Angin Puting Beliung melanda 4 Desa di Kecamat...,"{'Meninggal': 0, 'Hilang': 0, 'Terluka': 0}","{'Pendidikan': 1, 'Kesehatan': 0, 'Peribadatan..."
3,4,3301105202201231,"Kab. Cilacap, Jawa Tengah",Puting Beliung,"ANGIN KENCANG DI DSN, KUTANGSA, DSN. SIMBAR DA...",{'Keterangan': 'Hujan deras disertai angin ken...,https://dibi.bnpb.go.id/xdibi/read/55965//////...,Hujan deras disertai angin kencang pada hari M...,"{'Meninggal': 0, 'Hilang': 0, 'Terluka': 0, 'M...","{'Rumah': 7, 'Pendidikan': 0, 'Kesehatan': 0, ..."
4,5,3301105202201232,"Kab. Cilacap, Jawa Tengah",Puting Beliung,"ANGIN KENCANG DI DSN. CITANGKIL, DS. BOJA, KEC...",{'Keterangan': 'Pada hari minggu tgl. 23 Janua...,https://dibi.bnpb.go.id/xdibi/read/55966//////...,"Pada hari minggu tgl. 23 Januari 2022, pkl : 1...","{'Meninggal': 0, 'Hilang': 0, 'Terluka': 0, 'M...","{'Rumah': 1, 'Pendidikan': 0, 'Kesehatan': 0, ..."


### Detailed data

In [10]:
# Open JSON file
detailed = open('../data/raw/bnpb-data-detailed-20220125154512.json')

# Returns JSON object as a dictionary
data_detailed = json.load(detailed)

In [11]:
# JSON
data_detailed

{'https://dibi.bnpb.go.id/xdibi/read/55969//////2//1': {'latitude': '-7.711043',
  'longitude': '114.012157',
  'keterangan': 'Kejadian pada;\nHari\xa0\xa0\xa0\xa0\xa0\xa0\xa0 :  Minggu\nTanggal :  23 Januari 2022\nPukul\xa0\xa0\xa0\xa0\xa0:  11.30 WIB\n\nLOKASI;\nDinas Pendidikan dan Kebudayaan\nJl. Madura No.55A, Krajan Mimbaan, \nKelurahan Mimbaan\nRT 03 RW 05\nKec. Panji\nKab. Situbondo',
  'sumber': 'BPBD Kab. SITUBONDO',
  'tgl': '2022-01-25',
  'id_jenis_bencana': '999. Lainnya',
  'prop': '35. JAWA TIMUR',
  'kab': '3512. SITUBONDO'},
 'https://dibi.bnpb.go.id/xdibi/read/55935//////2//2': {'latitude': '-6.94789783665541',
  'longitude': '109.80251186766762',
  'keterangan': 'Pohon Tumbang di Jalan Desa Keconorejo, Kec. Tulis, Kab. Batang',
  'sumber': 'BPBD Kab. BATANG',
  'tgl': '2022-01-23',
  'id_jenis_bencana': '999. Lainnya',
  'prop': '33. JAWA TENGAH',
  'kab': '3325. BATANG'},
 'https://dibi.bnpb.go.id/xdibi/read/55936//////2//3': {'latitude': '-6.960166533552673',
  'l

In [12]:
# JSON restructuring

# Create a list
data_list = []

for key in data_detailed.keys():
    # Initial dictionary for links
    dict_data = {'links': key}
    
    # Cleanse the 'keterangan'
    try:
        value = data_detailed[key]['keterangan'].encode('ascii', 'ignore').decode('utf-8').replace('\n', ' ')
        value = re.sub(' +', ' ', str(value))
        data_detailed[key]['keterangan'] = value
    except:
        data_detailed[key]['keterangan'] = ''
    
    # Append with initial dictionary
    dict_data = {**dict_data, **data_detailed[key]}
    
    # Append to list of dictionary
    data_list.append(dict_data)

In [13]:
# Convert data from list of dictionary to data frame
df_detailed = pd.DataFrame(data_list)

In [14]:
print('Dimension of data: {} rows and {} columns'.format(len(df_detailed), len(df_detailed.columns)))
df_detailed.head()

Dimension of data: 39489 rows and 9 columns


Unnamed: 0,links,latitude,longitude,keterangan,sumber,tgl,id_jenis_bencana,prop,kab
0,https://dibi.bnpb.go.id/xdibi/read/55969//////...,-7.711043,114.012157,Kejadian pada; Hari : Minggu Tanggal : 23 Janu...,BPBD Kab. SITUBONDO,2022-01-25,999. Lainnya,35. JAWA TIMUR,3512. SITUBONDO
1,https://dibi.bnpb.go.id/xdibi/read/55935//////...,-6.94789783665541,109.80251186766762,"Pohon Tumbang di Jalan Desa Keconorejo, Kec. T...",BPBD Kab. BATANG,2022-01-23,999. Lainnya,33. JAWA TENGAH,3325. BATANG
2,https://dibi.bnpb.go.id/xdibi/read/55936//////...,-6.960166533552673,109.83359860768684,Angin Puting Beliung melanda 4 Desa di Kecamat...,BPBD Kab. BATANG,2022-01-23,105. Puting Beliung,33. JAWA TENGAH,3325. BATANG
3,https://dibi.bnpb.go.id/xdibi/read/55965//////...,-7.26602,108.751081,Hujan deras disertai angin kencang pada hari M...,BPBD Kab. CILACAP,2022-01-23,105. Puting Beliung,33. JAWA TENGAH,3301. CILACAP
4,https://dibi.bnpb.go.id/xdibi/read/55966//////...,-7.247742,108.775541,"Pada hari minggu tgl. 23 Januari 2022, pkl : 1...",BPBD Kab. CILACAP,2022-01-23,105. Puting Beliung,33. JAWA TENGAH,3301. CILACAP


### Merge initial and detailed data

In [15]:
# Merge the data
df_merge = df_initial.merge(
    right = df_detailed,
    left_on = 'Links',
    right_on = 'links',
    how = 'left'
)

In [16]:
# Show the data
df_merge.head()

Unnamed: 0,No,KIB,Wilayah,Bencana,Kejadian,Detail,Links,Keterangan,Korban,Kerusakan,links,latitude,longitude,keterangan,sumber,tgl,id_jenis_bencana,prop,kab
0,1,3512999202201251,"Kab. Situbondo, Jawa Timur",Lainnya,Kebakaran gudang arsip Diknas,{'Keterangan': 'Kejadian pada; Hari : ...,https://dibi.bnpb.go.id/xdibi/read/55969//////...,Kejadian pada; Hari : Minggu Tanggal : 23 Janu...,"{'Meninggal': 0, 'Hilang': 0, 'Terluka': 0, 'M...","{'Rumah': 0, 'Pendidikan': 0, 'Kesehatan': 0, ...",https://dibi.bnpb.go.id/xdibi/read/55969//////...,-7.711043,114.012157,Kejadian pada; Hari : Minggu Tanggal : 23 Janu...,BPBD Kab. SITUBONDO,2022-01-25,999. Lainnya,35. JAWA TIMUR,3512. SITUBONDO
1,2,3325999202201231,"Kab. Batang, Jawa Tengah",Lainnya,Pohon Tumbang,{'Keterangan': 'Pohon Tumbang di Jalan Desa Ke...,https://dibi.bnpb.go.id/xdibi/read/55935//////...,"Pohon Tumbang di Jalan Desa Keconorejo, Kec. T...","{'Meninggal': 0, 'Hilang': 0, 'Terluka': 0, 'M...","{'Rumah': 0, 'Pendidikan': 0, 'Kesehatan': 0, ...",https://dibi.bnpb.go.id/xdibi/read/55935//////...,-6.94789783665541,109.80251186766762,"Pohon Tumbang di Jalan Desa Keconorejo, Kec. T...",BPBD Kab. BATANG,2022-01-23,999. Lainnya,33. JAWA TENGAH,3325. BATANG
2,3,3325105202201231,"Kab. Batang, Jawa Tengah",Puting Beliung,Angin Puting Beliung,{'Keterangan': 'Angin Puting Beliung melanda 4...,https://dibi.bnpb.go.id/xdibi/read/55936//////...,Angin Puting Beliung melanda 4 Desa di Kecamat...,"{'Meninggal': 0, 'Hilang': 0, 'Terluka': 0}","{'Pendidikan': 1, 'Kesehatan': 0, 'Peribadatan...",https://dibi.bnpb.go.id/xdibi/read/55936//////...,-6.960166533552673,109.83359860768684,Angin Puting Beliung melanda 4 Desa di Kecamat...,BPBD Kab. BATANG,2022-01-23,105. Puting Beliung,33. JAWA TENGAH,3325. BATANG
3,4,3301105202201231,"Kab. Cilacap, Jawa Tengah",Puting Beliung,"ANGIN KENCANG DI DSN, KUTANGSA, DSN. SIMBAR DA...",{'Keterangan': 'Hujan deras disertai angin ken...,https://dibi.bnpb.go.id/xdibi/read/55965//////...,Hujan deras disertai angin kencang pada hari M...,"{'Meninggal': 0, 'Hilang': 0, 'Terluka': 0, 'M...","{'Rumah': 7, 'Pendidikan': 0, 'Kesehatan': 0, ...",https://dibi.bnpb.go.id/xdibi/read/55965//////...,-7.26602,108.751081,Hujan deras disertai angin kencang pada hari M...,BPBD Kab. CILACAP,2022-01-23,105. Puting Beliung,33. JAWA TENGAH,3301. CILACAP
4,5,3301105202201232,"Kab. Cilacap, Jawa Tengah",Puting Beliung,"ANGIN KENCANG DI DSN. CITANGKIL, DS. BOJA, KEC...",{'Keterangan': 'Pada hari minggu tgl. 23 Janua...,https://dibi.bnpb.go.id/xdibi/read/55966//////...,"Pada hari minggu tgl. 23 Januari 2022, pkl : 1...","{'Meninggal': 0, 'Hilang': 0, 'Terluka': 0, 'M...","{'Rumah': 1, 'Pendidikan': 0, 'Kesehatan': 0, ...",https://dibi.bnpb.go.id/xdibi/read/55966//////...,-7.247742,108.775541,"Pada hari minggu tgl. 23 Januari 2022, pkl : 1...",BPBD Kab. CILACAP,2022-01-23,105. Puting Beliung,33. JAWA TENGAH,3301. CILACAP


## Convert to English

### Initial data

#### 1 Order columns and change column names

In [17]:
# List of columns
cols = ['No', 'KIB', 'Wilayah', 'id_jenis_bencana', 'Korban', 'Kerusakan', 'Links']
# Reorder columns
df_final = df_merge[cols]
# Rename columns
df_final = df_final.rename(
    columns = {
        'KIB': 'ID',
        'Wilayah': 'Area',
        'id_jenis_bencana': 'Disasters',
        'Kejadian': 'Detailed Disaster',
        'Keterangan': 'Description',
        'Korban': 'Victims',
        'Kerusakan': 'Property Damages',
        'Links': 'URL'
    }
)

In [18]:
# Show the data
df_final.head()

Unnamed: 0,No,ID,Area,Disasters,Victims,Property Damages,URL
0,1,3512999202201251,"Kab. Situbondo, Jawa Timur",999. Lainnya,"{'Meninggal': 0, 'Hilang': 0, 'Terluka': 0, 'M...","{'Rumah': 0, 'Pendidikan': 0, 'Kesehatan': 0, ...",https://dibi.bnpb.go.id/xdibi/read/55969//////...
1,2,3325999202201231,"Kab. Batang, Jawa Tengah",999. Lainnya,"{'Meninggal': 0, 'Hilang': 0, 'Terluka': 0, 'M...","{'Rumah': 0, 'Pendidikan': 0, 'Kesehatan': 0, ...",https://dibi.bnpb.go.id/xdibi/read/55935//////...
2,3,3325105202201231,"Kab. Batang, Jawa Tengah",105. Puting Beliung,"{'Meninggal': 0, 'Hilang': 0, 'Terluka': 0}","{'Pendidikan': 1, 'Kesehatan': 0, 'Peribadatan...",https://dibi.bnpb.go.id/xdibi/read/55936//////...
3,4,3301105202201231,"Kab. Cilacap, Jawa Tengah",105. Puting Beliung,"{'Meninggal': 0, 'Hilang': 0, 'Terluka': 0, 'M...","{'Rumah': 7, 'Pendidikan': 0, 'Kesehatan': 0, ...",https://dibi.bnpb.go.id/xdibi/read/55965//////...
4,5,3301105202201232,"Kab. Cilacap, Jawa Tengah",105. Puting Beliung,"{'Meninggal': 0, 'Hilang': 0, 'Terluka': 0, 'M...","{'Rumah': 1, 'Pendidikan': 0, 'Kesehatan': 0, ...",https://dibi.bnpb.go.id/xdibi/read/55966//////...


#### 2 Replace values in column `Disasters`

In [19]:
# List of unique values
df_final['Disasters'].unique()

array(['999. Lainnya', '105. Puting Beliung', '102. Tanah Longsor',
       '101. Banjir', '103. Banjir dan Tanah longsor',
       '107. Kebakaran Hutan dan Lahan', '108. Gempa Bumi', '104. Abrasi',
       '111. Letusan Gunung Api', '106. Kekeringan', None,
       '110. Gempa Bumi dan Tsunami', '109. Tsunami'], dtype=object)

In [154]:
# Replace value in column 'Disasters'
df_final['Disasters'] = df_final['Disasters'].replace(
    {
        '999. Lainnya': 'Others',
        '105. Puting Beliung': 'Tornado',
        '102. Tanah Longsor': 'Landslide',
        '101. Banjir': 'Flood',
        '103. Banjir dan Tanah longsor': 'Floods and Landslides',
        '107. Kebakaran Hutan dan Lahan': 'Forest and Land Fires',
        '108. Gempa Bumi': 'Earthquake',
        '104. Abrasi': 'Abrasion',
        '111. Letusan Gunung Api': 'Volcanic Eruption',
        '106. Kekeringan': 'Drought',
        '110. Gempa Bumi dan Tsunami': 'Earthquake and Tsunami',
        '109. Tsunami': 'Tsunami',
        None: 'Others'
    }
)

In [155]:
# Show the data
df_final.head()

Unnamed: 0,No,ID,Area,Disasters,Victims,Property Damages,URL
0,1,3512999202201251,"Kab. Situbondo, Jawa Timur",Others,"{'Meninggal': 0, 'Hilang': 0, 'Terluka': 0, 'M...","{'Rumah': 0, 'Pendidikan': 0, 'Kesehatan': 0, ...",https://dibi.bnpb.go.id/xdibi/read/55969//////...
1,2,3325999202201231,"Kab. Batang, Jawa Tengah",Others,"{'Meninggal': 0, 'Hilang': 0, 'Terluka': 0, 'M...","{'Rumah': 0, 'Pendidikan': 0, 'Kesehatan': 0, ...",https://dibi.bnpb.go.id/xdibi/read/55935//////...
2,3,3325105202201231,"Kab. Batang, Jawa Tengah",Tornado,"{'Meninggal': 0, 'Hilang': 0, 'Terluka': 0}","{'Pendidikan': 1, 'Kesehatan': 0, 'Peribadatan...",https://dibi.bnpb.go.id/xdibi/read/55936//////...
3,4,3301105202201231,"Kab. Cilacap, Jawa Tengah",Tornado,"{'Meninggal': 0, 'Hilang': 0, 'Terluka': 0, 'M...","{'Rumah': 7, 'Pendidikan': 0, 'Kesehatan': 0, ...",https://dibi.bnpb.go.id/xdibi/read/55965//////...
4,5,3301105202201232,"Kab. Cilacap, Jawa Tengah",Tornado,"{'Meninggal': 0, 'Hilang': 0, 'Terluka': 0, 'M...","{'Rumah': 1, 'Pendidikan': 0, 'Kesehatan': 0, ...",https://dibi.bnpb.go.id/xdibi/read/55966//////...


#### 3 Replace values in column `Area`

In [156]:
# Remove string 'Kab.' and 'Kota' in column 'Area'
df_final['Area'] = df_final['Area'].replace(
    {
        'Kab. ': '',
        'Kota ': ''
    },
    regex = True
)

In [157]:
# List of unique values
list_prov = df_final['Area'].apply(lambda x: re.findall(pattern = ',\s*(\D+)', string = x)[0])
list_prov.unique()

array(['Jawa Timur', 'Jawa Tengah', 'Sumatera Selatan', 'Sulawesi Barat',
       'Jawa Barat', 'Sulawesi Utara', 'Kep. Bangka Belitung',
       'Kalimantan Tengah', 'Sumatera Utara', 'Sulawesi Tengah',
       'Kalimantan Utara', 'Sumatera Barat', 'Lampung',
       'Kalimantan Selatan', 'Nusa Tenggara Timur', 'Maluku Utara',
       'Kalimantan Timur', 'Kalimantan Barat', 'Sulawesi Tenggara',
       'Aceh', 'Jambi', 'Banten', 'Riau', 'Sulawesi Selatan', 'Gorontalo',
       'Kepulauan Riau', 'Bengkulu', 'Maluku', 'Papua',
       'Kepulauan Bangka Belitung', 'Di Yogyakarta', 'Bali',
       'Dki Jakarta', 'Nusa Tenggara Barat', 'Papua Barat'], dtype=object)

In [158]:
# Replace string of provinces
df_final['Area'] = df_final['Area'].replace(
    {
        'Jawa Timur': 'East Java',
        'Jawa Tengah': 'Central Java',
        'Sumatera Selatan': 'South Sumatera',
        'Sulawesi Barat': 'West Sumatera',
        'Jawa Barat': 'West Java',
        'Sulawesi Utara': 'North Sulawesi',
        'Kep. Bangka Belitung': 'Bangka Belitung Island',
        'Kalimantan Tengah': 'Central Kalimantan',
        'Sumatera Utara': 'North Sumatera',
        'Sulawesi Tengah': 'Central Sulawesi',
        'Kalimantan Utara': 'North Kalimantan',
        'Sumatera Barat': 'West Sumatera',
        'Kalimantan Selatan': 'South Kalimantan',
        'Nusa Tenggara Timur': 'East Nusa Tenggara',
        'Maluku Utara': 'North Maluku',
        'Kalimantan Timur': 'East Kalimantan',
        'Kalimantan Barat': 'West Kalimantan',
        'Sulawesi Tenggara': 'Southeast Sulawesi',
        'Sulawesi Selatan': 'South Sulawesi',
        'Kepulauan Riau': 'Riau Island',
        'Kepulauan Bangka Belitung': 'Bangka Belitung Island',
        'Di Yogyakarta': 'DI Yogyakarta',
        'Dki Jakarta': 'DKI Jakarta',
        'Nusa Tenggara Barat': 'West Nusa Tenggara',
        'Papua Barat': 'West Papua'
    },
    regex = True
)

In [159]:
# Show the data
df_final.head()

Unnamed: 0,No,ID,Area,Disasters,Victims,Property Damages,URL
0,1,3512999202201251,"Situbondo, East Java",Others,"{'Meninggal': 0, 'Hilang': 0, 'Terluka': 0, 'M...","{'Rumah': 0, 'Pendidikan': 0, 'Kesehatan': 0, ...",https://dibi.bnpb.go.id/xdibi/read/55969//////...
1,2,3325999202201231,"Batang, Central Java",Others,"{'Meninggal': 0, 'Hilang': 0, 'Terluka': 0, 'M...","{'Rumah': 0, 'Pendidikan': 0, 'Kesehatan': 0, ...",https://dibi.bnpb.go.id/xdibi/read/55935//////...
2,3,3325105202201231,"Batang, Central Java",Tornado,"{'Meninggal': 0, 'Hilang': 0, 'Terluka': 0}","{'Pendidikan': 1, 'Kesehatan': 0, 'Peribadatan...",https://dibi.bnpb.go.id/xdibi/read/55936//////...
3,4,3301105202201231,"Cilacap, Central Java",Tornado,"{'Meninggal': 0, 'Hilang': 0, 'Terluka': 0, 'M...","{'Rumah': 7, 'Pendidikan': 0, 'Kesehatan': 0, ...",https://dibi.bnpb.go.id/xdibi/read/55965//////...
4,5,3301105202201232,"Cilacap, Central Java",Tornado,"{'Meninggal': 0, 'Hilang': 0, 'Terluka': 0, 'M...","{'Rumah': 1, 'Pendidikan': 0, 'Kesehatan': 0, ...",https://dibi.bnpb.go.id/xdibi/read/55966//////...


#### 4 Change the dictionary's keys in `Victims`

In [164]:
# Function to change the dictionary's keys
def keys_dict(x, mapping):
    # Remapping keys in dictionary
    new_dict = dict((mapping[key], value) for (key, value) in x.items())
    
    return new_dict

In [166]:
# New keys
new_keys = {
    'Meninggal': 'Died',
    'Hilang': 'Missing',
    'Terluka': 'Injured',
    'Menderita': 'Suffered',
    'Mengungsi': 'Refugees'
}

In [170]:
# Tanslate the data
df_final['Victims'] = df_final['Victims'].apply(keys_dict, args = (new_keys,))

In [172]:
# Show the data
df_final.head()

Unnamed: 0,No,ID,Area,Disasters,Victims,Property Damages,URL
0,1,3512999202201251,"Situbondo, East Java",Others,"{'Died': 0, 'Missing': 0, 'Injured': 0, 'Suffe...","{'Rumah': 0, 'Pendidikan': 0, 'Kesehatan': 0, ...",https://dibi.bnpb.go.id/xdibi/read/55969//////...
1,2,3325999202201231,"Batang, Central Java",Others,"{'Died': 0, 'Missing': 0, 'Injured': 0, 'Suffe...","{'Rumah': 0, 'Pendidikan': 0, 'Kesehatan': 0, ...",https://dibi.bnpb.go.id/xdibi/read/55935//////...
2,3,3325105202201231,"Batang, Central Java",Tornado,"{'Died': 0, 'Missing': 0, 'Injured': 0}","{'Pendidikan': 1, 'Kesehatan': 0, 'Peribadatan...",https://dibi.bnpb.go.id/xdibi/read/55936//////...
3,4,3301105202201231,"Cilacap, Central Java",Tornado,"{'Died': 0, 'Missing': 0, 'Injured': 0, 'Suffe...","{'Rumah': 7, 'Pendidikan': 0, 'Kesehatan': 0, ...",https://dibi.bnpb.go.id/xdibi/read/55965//////...
4,5,3301105202201232,"Cilacap, Central Java",Tornado,"{'Died': 0, 'Missing': 0, 'Injured': 0, 'Suffe...","{'Rumah': 1, 'Pendidikan': 0, 'Kesehatan': 0, ...",https://dibi.bnpb.go.id/xdibi/read/55966//////...


#### 5 Change the dictionary's keys in `Property Damages`

In [173]:
df_final['Property Damages'][0]

{'Rumah': 0,
 'Pendidikan': 0,
 'Kesehatan': 0,
 'Peribadatan': 0,
 'Umum': 0,
 'Perkantoran': 0,
 'Jembatan': 0,
 'Pabrik': 0,
 'Pertokoan': 0}

In [174]:
# New keys
new_keys = {
    'Rumah': 'House',
    'Pendidikan': 'School',
    'Kesehatan': 'Health Facility',
    'Peribadatan': 'Places of Worship',
    'Umum': 'Public Facility',
    'Perkantoran': 'Office Building',
    'Jembatan': 'Bridge',
    'Pabrik': 'Factory Building',
    'Pertokoan': 'Store'
}

In [175]:
# Tanslate the data
df_final['Property Damages'] = df_final['Property Damages'].apply(keys_dict, args = (new_keys,))

In [176]:
# Show the data
df_final.head()

Unnamed: 0,No,ID,Area,Disasters,Victims,Property Damages,URL
0,1,3512999202201251,"Situbondo, East Java",Others,"{'Died': 0, 'Missing': 0, 'Injured': 0, 'Suffe...","{'House': 0, 'School': 0, 'Health Facility': 0...",https://dibi.bnpb.go.id/xdibi/read/55969//////...
1,2,3325999202201231,"Batang, Central Java",Others,"{'Died': 0, 'Missing': 0, 'Injured': 0, 'Suffe...","{'House': 0, 'School': 0, 'Health Facility': 0...",https://dibi.bnpb.go.id/xdibi/read/55935//////...
2,3,3325105202201231,"Batang, Central Java",Tornado,"{'Died': 0, 'Missing': 0, 'Injured': 0}","{'School': 1, 'Health Facility': 0, 'Places of...",https://dibi.bnpb.go.id/xdibi/read/55936//////...
3,4,3301105202201231,"Cilacap, Central Java",Tornado,"{'Died': 0, 'Missing': 0, 'Injured': 0, 'Suffe...","{'House': 7, 'School': 0, 'Health Facility': 0...",https://dibi.bnpb.go.id/xdibi/read/55965//////...
4,5,3301105202201232,"Cilacap, Central Java",Tornado,"{'Died': 0, 'Missing': 0, 'Injured': 0, 'Suffe...","{'House': 1, 'School': 0, 'Health Facility': 0...",https://dibi.bnpb.go.id/xdibi/read/55966//////...


#### 6 Store the data to JSON format

In [182]:
# Store to JSON format
df_final.to_json('../data/raw/new-bnpb-data-20220125075854.json', orient = 'records')

### Detailed data

#### 1 Order columns and change column names

In [187]:
# List of columns
cols = ['No', 'KIB', 'latitude', 'longitude', 'prop', 'kab', 'id_jenis_bencana', 'tgl', 'links']
# Reorder columns
df_final = df_merge[cols]
# Rename columns
df_final = df_final.rename(
    columns = {
        'KIB': 'ID',
        'latitude': 'Latitude',
        'longitude': 'Longitude',
        'prop': 'Province',
        'kab': 'District',
        'id_jenis_bencana': 'Disasters',
        'tgl': 'Date',
        'links': 'URL'
    }
)

In [188]:
# Show the data
df_final.head()

Unnamed: 0,No,ID,Latitude,Longitude,Province,District,Disasters,Date,URL
0,1,3512999202201251,-7.711043,114.012157,35. JAWA TIMUR,3512. SITUBONDO,999. Lainnya,2022-01-25,https://dibi.bnpb.go.id/xdibi/read/55969//////...
1,2,3325999202201231,-6.94789783665541,109.80251186766762,33. JAWA TENGAH,3325. BATANG,999. Lainnya,2022-01-23,https://dibi.bnpb.go.id/xdibi/read/55935//////...
2,3,3325105202201231,-6.960166533552673,109.83359860768684,33. JAWA TENGAH,3325. BATANG,105. Puting Beliung,2022-01-23,https://dibi.bnpb.go.id/xdibi/read/55936//////...
3,4,3301105202201231,-7.26602,108.751081,33. JAWA TENGAH,3301. CILACAP,105. Puting Beliung,2022-01-23,https://dibi.bnpb.go.id/xdibi/read/55965//////...
4,5,3301105202201232,-7.247742,108.775541,33. JAWA TENGAH,3301. CILACAP,105. Puting Beliung,2022-01-23,https://dibi.bnpb.go.id/xdibi/read/55966//////...


#### 2 Replace values in column `Disasters`

In [189]:
# List of unique values
df_final['Disasters'].unique()

array(['999. Lainnya', '105. Puting Beliung', '102. Tanah Longsor',
       '101. Banjir', '103. Banjir dan Tanah longsor',
       '107. Kebakaran Hutan dan Lahan', '108. Gempa Bumi', '104. Abrasi',
       '111. Letusan Gunung Api', '106. Kekeringan', None,
       '110. Gempa Bumi dan Tsunami', '109. Tsunami'], dtype=object)

In [190]:
# Replace value in column 'Disasters'
df_final['Disasters'] = df_final['Disasters'].replace(
    {
        '999. Lainnya': 'Others',
        '105. Puting Beliung': 'Tornado',
        '102. Tanah Longsor': 'Landslide',
        '101. Banjir': 'Flood',
        '103. Banjir dan Tanah longsor': 'Floods and Landslides',
        '107. Kebakaran Hutan dan Lahan': 'Forest and Land Fires',
        '108. Gempa Bumi': 'Earthquake',
        '104. Abrasi': 'Abrasion',
        '111. Letusan Gunung Api': 'Volcanic Eruption',
        '106. Kekeringan': 'Drought',
        '110. Gempa Bumi dan Tsunami': 'Earthquake and Tsunami',
        '109. Tsunami': 'Tsunami',
        None: 'Others'
    }
)

In [191]:
# Show the data
df_final.head()

Unnamed: 0,No,ID,Latitude,Longitude,Province,District,Disasters,Date,URL
0,1,3512999202201251,-7.711043,114.012157,35. JAWA TIMUR,3512. SITUBONDO,Others,2022-01-25,https://dibi.bnpb.go.id/xdibi/read/55969//////...
1,2,3325999202201231,-6.94789783665541,109.80251186766762,33. JAWA TENGAH,3325. BATANG,Others,2022-01-23,https://dibi.bnpb.go.id/xdibi/read/55935//////...
2,3,3325105202201231,-6.960166533552673,109.83359860768684,33. JAWA TENGAH,3325. BATANG,Tornado,2022-01-23,https://dibi.bnpb.go.id/xdibi/read/55936//////...
3,4,3301105202201231,-7.26602,108.751081,33. JAWA TENGAH,3301. CILACAP,Tornado,2022-01-23,https://dibi.bnpb.go.id/xdibi/read/55965//////...
4,5,3301105202201232,-7.247742,108.775541,33. JAWA TENGAH,3301. CILACAP,Tornado,2022-01-23,https://dibi.bnpb.go.id/xdibi/read/55966//////...


#### 3 Get numbers from `District`

In [214]:
# Get numbers
df_final['Area Code'] = df_final['District'].apply(
    lambda x: re.findall(pattern = '(\d+)', string = x)[0] if x != None else None
)

In [217]:
# Show the data
df_final.head()

Unnamed: 0,No,ID,Latitude,Longitude,Province,District,Disasters,Date,URL,Area Code
0,1,3512999202201251,-7.711043,114.012157,35. JAWA TIMUR,3512. SITUBONDO,Others,2022-01-25,https://dibi.bnpb.go.id/xdibi/read/55969//////...,3512
1,2,3325999202201231,-6.94789783665541,109.80251186766762,33. JAWA TENGAH,3325. BATANG,Others,2022-01-23,https://dibi.bnpb.go.id/xdibi/read/55935//////...,3325
2,3,3325105202201231,-6.960166533552673,109.83359860768684,33. JAWA TENGAH,3325. BATANG,Tornado,2022-01-23,https://dibi.bnpb.go.id/xdibi/read/55936//////...,3325
3,4,3301105202201231,-7.26602,108.751081,33. JAWA TENGAH,3301. CILACAP,Tornado,2022-01-23,https://dibi.bnpb.go.id/xdibi/read/55965//////...,3301
4,5,3301105202201232,-7.247742,108.775541,33. JAWA TENGAH,3301. CILACAP,Tornado,2022-01-23,https://dibi.bnpb.go.id/xdibi/read/55966//////...,3301


#### 4 Remove numbers in `Province` and `District`

In [218]:
# Remove numbers in column 'Province'
df_final['Province'] = df_final['Province'].apply(
    lambda x: re.findall(pattern = '^\d+.\s*(\D+)', string = x)[0] if x != None else None
)

In [220]:
# Remove numbers in column 'District'
df_final['District'] = df_final['District'].apply(
    lambda x: re.findall(pattern = '^\d+.\s*(\D+)', string = x)[0] if x != None else None
)

In [239]:
# Show the data
df_final.head()

Unnamed: 0,No,ID,Latitude,Longitude,Province,District,Disasters,Date,URL,Area Code
0,1,3512999202201251,-7.711043,114.012157,JAWA TIMUR,SITUBONDO,Others,2022-01-25,https://dibi.bnpb.go.id/xdibi/read/55969//////...,3512
1,2,3325999202201231,-6.94789783665541,109.80251186766762,JAWA TENGAH,BATANG,Others,2022-01-23,https://dibi.bnpb.go.id/xdibi/read/55935//////...,3325
2,3,3325105202201231,-6.960166533552673,109.83359860768684,JAWA TENGAH,BATANG,Tornado,2022-01-23,https://dibi.bnpb.go.id/xdibi/read/55936//////...,3325
3,4,3301105202201231,-7.26602,108.751081,JAWA TENGAH,CILACAP,Tornado,2022-01-23,https://dibi.bnpb.go.id/xdibi/read/55965//////...,3301
4,5,3301105202201232,-7.247742,108.775541,JAWA TENGAH,CILACAP,Tornado,2022-01-23,https://dibi.bnpb.go.id/xdibi/read/55966//////...,3301


#### 5 Decapitalize string in `Province` and `District`

In [234]:
# Function to decapitalize string(s)
def decapitalize(x, separator):
    try:
        string_list = [string.strip().capitalize() for string in x.split(separator)]
        result = (separator + '').join([elem for elem in string_list])
    except:
        result = x
    
    return result

In [240]:
# Separator
sep = ' '

# Remove numbers in column 'Province'
df_final['Province'] = df_final['Province'].apply(
    decapitalize,
    args = (sep)
)

In [241]:
# Remove numbers in column 'Province'
df_final['District'] = df_final['District'].apply(
    decapitalize,
    args = (sep)
)

In [242]:
# Show the data
df_final.head()

Unnamed: 0,No,ID,Latitude,Longitude,Province,District,Disasters,Date,URL,Area Code
0,1,3512999202201251,-7.711043,114.012157,Jawa Timur,Situbondo,Others,2022-01-25,https://dibi.bnpb.go.id/xdibi/read/55969//////...,3512
1,2,3325999202201231,-6.94789783665541,109.80251186766762,Jawa Tengah,Batang,Others,2022-01-23,https://dibi.bnpb.go.id/xdibi/read/55935//////...,3325
2,3,3325105202201231,-6.960166533552673,109.83359860768684,Jawa Tengah,Batang,Tornado,2022-01-23,https://dibi.bnpb.go.id/xdibi/read/55936//////...,3325
3,4,3301105202201231,-7.26602,108.751081,Jawa Tengah,Cilacap,Tornado,2022-01-23,https://dibi.bnpb.go.id/xdibi/read/55965//////...,3301
4,5,3301105202201232,-7.247742,108.775541,Jawa Tengah,Cilacap,Tornado,2022-01-23,https://dibi.bnpb.go.id/xdibi/read/55966//////...,3301


#### 6 Replace string in `Province`

In [244]:
# Replace string of provinces
df_final['Province'] = df_final['Province'].replace(
    {
        'Jawa Timur': 'East Java',
        'Jawa Tengah': 'Central Java',
        'Sumatera Selatan': 'South Sumatera',
        'Sulawesi Barat': 'West Sumatera',
        'Jawa Barat': 'West Java',
        'Sulawesi Utara': 'North Sulawesi',
        'Kep. Bangka Belitung': 'Bangka Belitung Island',
        'Kalimantan Tengah': 'Central Kalimantan',
        'Sumatera Utara': 'North Sumatera',
        'Sulawesi Tengah': 'Central Sulawesi',
        'Kalimantan Utara': 'North Kalimantan',
        'Sumatera Barat': 'West Sumatera',
        'Kalimantan Selatan': 'South Kalimantan',
        'Nusa Tenggara Timur': 'East Nusa Tenggara',
        'Maluku Utara': 'North Maluku',
        'Kalimantan Timur': 'East Kalimantan',
        'Kalimantan Barat': 'West Kalimantan',
        'Sulawesi Tenggara': 'Southeast Sulawesi',
        'Sulawesi Selatan': 'South Sulawesi',
        'Kepulauan Riau': 'Riau Island',
        'Kepulauan Bangka Belitung': 'Bangka Belitung Island',
        'Di Yogyakarta': 'DI Yogyakarta',
        'Dki Jakarta': 'DKI Jakarta',
        'Nusa Tenggara Barat': 'West Nusa Tenggara',
        'Papua Barat': 'West Papua'
    },
    regex = True
)

In [245]:
# Show the data
df_final.head()

Unnamed: 0,No,ID,Latitude,Longitude,Province,District,Disasters,Date,URL,Area Code
0,1,3512999202201251,-7.711043,114.012157,East Java,Situbondo,Others,2022-01-25,https://dibi.bnpb.go.id/xdibi/read/55969//////...,3512
1,2,3325999202201231,-6.94789783665541,109.80251186766762,Central Java,Batang,Others,2022-01-23,https://dibi.bnpb.go.id/xdibi/read/55935//////...,3325
2,3,3325105202201231,-6.960166533552673,109.83359860768684,Central Java,Batang,Tornado,2022-01-23,https://dibi.bnpb.go.id/xdibi/read/55936//////...,3325
3,4,3301105202201231,-7.26602,108.751081,Central Java,Cilacap,Tornado,2022-01-23,https://dibi.bnpb.go.id/xdibi/read/55965//////...,3301
4,5,3301105202201232,-7.247742,108.775541,Central Java,Cilacap,Tornado,2022-01-23,https://dibi.bnpb.go.id/xdibi/read/55966//////...,3301


#### 7 Reorder columns

In [248]:
# List of columns
cols = ['No', 'ID', 'Date', 'Latitude', 'Longitude', 'Area Code', 'Province', 'District', 'Disasters', 'URL']
# Reorder columns
df_final = df_final[cols]

In [249]:
# Show the data
df_final.head()

Unnamed: 0,No,ID,Date,Latitude,Longitude,Area Code,Province,District,Disasters,URL
0,1,3512999202201251,2022-01-25,-7.711043,114.012157,3512,East Java,Situbondo,Others,https://dibi.bnpb.go.id/xdibi/read/55969//////...
1,2,3325999202201231,2022-01-23,-6.94789783665541,109.80251186766762,3325,Central Java,Batang,Others,https://dibi.bnpb.go.id/xdibi/read/55935//////...
2,3,3325105202201231,2022-01-23,-6.960166533552673,109.83359860768684,3325,Central Java,Batang,Tornado,https://dibi.bnpb.go.id/xdibi/read/55936//////...
3,4,3301105202201231,2022-01-23,-7.26602,108.751081,3301,Central Java,Cilacap,Tornado,https://dibi.bnpb.go.id/xdibi/read/55965//////...
4,5,3301105202201232,2022-01-23,-7.247742,108.775541,3301,Central Java,Cilacap,Tornado,https://dibi.bnpb.go.id/xdibi/read/55966//////...


#### 8 Store the data to JSON format

In [250]:
# Store to JSON format
df_final.to_json(
    path_or_buf = '../data/raw/new-bnpb-data-detailed-20220125154512.json',
    orient = 'records'
)