## Import Library

In [77]:
import requests
import pandas as pd
from bs4 import BeautifulSoup as bs4

## Scrape data dari wikipedia

In [78]:
data_covid_page = requests.get("https://en.wikipedia.org/wiki/Statistics_of_the_COVID-19_pandemic_in_Indonesia")
data_covid_soup = bs4(data_covid_page.content, 'lxml')

## Ambil tabelnya

In [79]:
data_covid_tabel = data_covid_soup.find('table', {'class':'wikitable'})
data_covid_tabel

<table class="wikitable float sortable" style="text-align:right; font-size:85%; margin:0px 0px 0em 0em;">
<tbody><tr>
<th colspan="9" style="text-align:center;">COVID-19 cases in Indonesia<sup class="reference" id="cite_ref-fn0_1-0"><a href="#cite_note-fn0-1">[a]</a></sup><sup class="reference" id="cite_ref-fn1_2-0"><a href="#cite_note-fn1-2">[b]</a></sup>
</th></tr>
<tr>
<th>Province
</th>
<th>Cases
</th>
<th>Recoveries
</th>
<th>Deaths
</th>
<th>Active<sup class="reference" id="cite_ref-fn2_3-0"><a href="#cite_note-fn2-3">[c]</a></sup>
</th>
<th>Cases per<br/>100,000<br/> population<sup class="reference" id="cite_ref-fn3_4-0"><a href="#cite_note-fn3-4">[d]</a></sup>
</th>
<th>Recovery<br/>rate
</th>
<th>Fatality<br/>rate
</th>
<th>Official website
</th></tr>
<tr>
<th style="text-align:left;"><a class="image" href="/wiki/File:Coat_of_arms_of_Aceh.svg"><img alt="Coat of arms of Aceh.svg" data-file-height="516" data-file-width="506" decoding="async" height="20" src="//upload.wikimedia.o

## Parse tabel

In [80]:
data_covid_df = pd.read_html(str(data_covid_tabel))
data_covid_df

[                    COVID-19 cases in Indonesia[a][b]  \
                                              Province   
 0                                                Aceh   
 1                                                Bali   
 2                             Bangka Belitung Islands   
 3                                              Banten   
 4                                            Bengkulu   
 5                                        Central Java   
 6                                  Central Kalimantan   
 7                                    Central Sulawesi   
 8                                           East Java   
 9                                     East Kalimantan   
 10                                 East Nusa Tenggara   
 11                                          Gorontalo   
 12                                            Jakarta   
 13                                              Jambi   
 14                                            Lampung   
 15           

## Akses tabelnya

In [81]:
data_covid_df = data_covid_df[0]
data_covid_df

Unnamed: 0_level_0,COVID-19 cases in Indonesia[a][b],COVID-19 cases in Indonesia[a][b],COVID-19 cases in Indonesia[a][b],COVID-19 cases in Indonesia[a][b],COVID-19 cases in Indonesia[a][b],COVID-19 cases in Indonesia[a][b],COVID-19 cases in Indonesia[a][b],COVID-19 cases in Indonesia[a][b],COVID-19 cases in Indonesia[a][b]
Unnamed: 0_level_1,Province,Cases,Recoveries,Deaths,Active[c],"Cases per 100,000 population[d]",Recovery rate,Fatality rate,Official website
0,Aceh,44842,42515,2263,64,850,,,covid19.acehprov.go.id
1,Bali,172387,167356,4866,165,3993,,,infocorona.baliprov.go.id
2,Bangka Belitung Islands,67135,65405,1651,79,4612,,,covid19.babelprov.go.id
3,Banten,364679,361040,2986,653,3063,,,infocorona.bantenprov.go.id
4,Bengkulu,29838,29257,528,53,1484,,,covid19.bengkuluprov.go.id
5,Central Java,655026,617919,33956,3151,1794,,,corona.jatengprov.go.id
6,Central Kalimantan,59291,57683,1555,53,2221,,,corona.kalteng.go.id
7,Central Sulawesi,62894,60983,1752,159,2106,,,dinkes.sultengprov.go.id/category/covid-19
8,East Java,636284,603328,32224,732,1565,,,infocovid19.jatimprov.go.id
9,East Kalimantan,214380,208517,5792,71,5692,,,covid19.kaltimprov.go.id


## Ganti nama header kolom, drop kolom dan baris yang tidak dibutuhkan

In [82]:
data_covid_df = data_covid_df.rename(columns={'COVID-19 cases in Indonesia[a][b]':'header'})
data_covid_df = data_covid_df.drop(data_covid_df.iloc[:, 4:],axis = 1)
data_covid_df = data_covid_df.iloc[:34]
data_covid_df

Unnamed: 0_level_0,header,header,header,header
Unnamed: 0_level_1,Province,Cases,Recoveries,Deaths
0,Aceh,44842,42515,2263
1,Bali,172387,167356,4866
2,Bangka Belitung Islands,67135,65405,1651
3,Banten,364679,361040,2986
4,Bengkulu,29838,29257,528
5,Central Java,655026,617919,33956
6,Central Kalimantan,59291,57683,1555
7,Central Sulawesi,62894,60983,1752
8,East Java,636284,603328,32224
9,East Kalimantan,214380,208517,5792


## Buat dataframe baru supaya lebih rapi

In [83]:
new_data_covid = pd.DataFrame({ 'Provinsi' : data_covid_df.header.Province,
                                'Total Kasus' : data_covid_df.header.Cases,
                                'Pasien Sembuh' : data_covid_df.header.Recoveries,
                                'Pasien Meninggal': data_covid_df.header.Deaths
})
new_data_covid

Unnamed: 0,Provinsi,Total Kasus,Pasien Sembuh,Pasien Meninggal
0,Aceh,44842,42515,2263
1,Bali,172387,167356,4866
2,Bangka Belitung Islands,67135,65405,1651
3,Banten,364679,361040,2986
4,Bengkulu,29838,29257,528
5,Central Java,655026,617919,33956
6,Central Kalimantan,59291,57683,1555
7,Central Sulawesi,62894,60983,1752
8,East Java,636284,603328,32224
9,East Kalimantan,214380,208517,5792


## Ubah nama provinsi

In [84]:
new_data_covid['Provinsi'] = new_data_covid['Provinsi'].replace(['Bangka Belitung Islands','North Sumatra','South Sumatra','Special Region of Yogyakarta'],
                                                                ['Bangka Belitung','North Sumatera','South Sumatera','Yogyakarta'])
new_data_covid

Unnamed: 0,Provinsi,Total Kasus,Pasien Sembuh,Pasien Meninggal
0,Aceh,44842,42515,2263
1,Bali,172387,167356,4866
2,Bangka Belitung,67135,65405,1651
3,Banten,364679,361040,2986
4,Bengkulu,29838,29257,528
5,Central Java,655026,617919,33956
6,Central Kalimantan,59291,57683,1555
7,Central Sulawesi,62894,60983,1752
8,East Java,636284,603328,32224
9,East Kalimantan,214380,208517,5792


## Get request data lokasi

In [85]:
response = requests.get('https://raw.githubusercontent.com/Singa-dev/testing_dev/main/gps_data_sample.json')
if response.status_code == 200 and 'application/json' in response.headers.get('Content-Type',''):
    print(response.json())

In [86]:
location_data = response.json()
location_data

[{'Provinsi': 'Aceh',
  'latitude': 4.695135,
  'longitude': 96.7493993,
  'logo': 'https://upload.wikimedia.org/wikipedia/commons/thumb/4/41/Coat_of_arms_of_Aceh.svg/1200px-Coat_of_arms_of_Aceh.svg.png'},
 {'Provinsi': 'Bali',
  'latitude': -8.4095178,
  'longitude': 115.188916,
  'logo': 'https://4.bp.blogspot.com/-ELlrLdH0frM/WSz4AjqIWaI/AAAAAAAAASY/EF5ayA5zXn05TXw53cRUVTJeh6lzUJDDwCLcB/s400/Lambang%2BDaerah%2BProvinsi%2BBali%2B2.png'},
 {'Provinsi': 'Bangka Belitung',
  'latitude': -2.7410513,
  'longitude': 106.4405872,
  'logo': 'https://upload.wikimedia.org/wikipedia/commons/thumb/0/08/Coat_of_arms_of_Bangka_Belitung_Islands.svg/1200px-Coat_of_arms_of_Bangka_Belitung_Islands.svg.png'},
 {'Provinsi': 'Banten',
  'latitude': -6.4058172,
  'longitude': 106.0640179,
  'logo': 'https://upload.wikimedia.org/wikipedia/commons/8/82/Coat_of_arms_of_Banten.png'},
 {'Provinsi': 'Bengkulu',
  'latitude': -3.5778471,
  'longitude': 102.3463875,
  'logo': 'https://upload.wikimedia.org/wikiped

In [87]:
location_data[0]

{'Provinsi': 'Aceh',
 'latitude': 4.695135,
 'longitude': 96.7493993,
 'logo': 'https://upload.wikimedia.org/wikipedia/commons/thumb/4/41/Coat_of_arms_of_Aceh.svg/1200px-Coat_of_arms_of_Aceh.svg.png'}

## Buat dataframe untuk data titik kordinat provinsi

In [88]:
data_kordinat = pd.DataFrame(columns=['Provinsi','Latitude','Longitude'])

In [89]:
for i in range(0, len(location_data)):
    data = location_data[i]
    data_kordinat.loc[i] = [data['Provinsi'], data['latitude'], data['longitude']]

data_kordinat

Unnamed: 0,Provinsi,Latitude,Longitude
0,Aceh,4.695135,96.749399
1,Bali,-8.409518,115.188916
2,Bangka Belitung,-2.741051,106.440587
3,Banten,-6.405817,106.064018
4,Bengkulu,-3.577847,102.346388
5,Central Java,-7.150975,110.140259
6,Central Kalimantan,-1.681488,113.382355
7,Central Sulawesi,-1.430025,121.445618
8,East Java,-7.536064,112.238402
9,East Kalimantan,1.64063,116.419389


## Gabungkan kedua dataframe

In [90]:
data_covid_19_indonesia = pd.merge(new_data_covid, data_kordinat, on='Provinsi', how='left')
data_covid_19_indonesia['Provinsi'] = data_covid_19_indonesia['Provinsi'].replace('West Sumatra','West Sumatera')
data_covid_19_indonesia

Unnamed: 0,Provinsi,Total Kasus,Pasien Sembuh,Pasien Meninggal,Latitude,Longitude
0,Aceh,44842,42515,2263,4.695135,96.749399
1,Bali,172387,167356,4866,-8.409518,115.188916
2,Bangka Belitung,67135,65405,1651,-2.741051,106.440587
3,Banten,364679,361040,2986,-6.405817,106.064018
4,Bengkulu,29838,29257,528,-3.577847,102.346388
5,Central Java,655026,617919,33956,-7.150975,110.140259
6,Central Kalimantan,59291,57683,1555,-1.681488,113.382355
7,Central Sulawesi,62894,60983,1752,-1.430025,121.445618
8,East Java,636284,603328,32224,-7.536064,112.238402
9,East Kalimantan,214380,208517,5792,1.64063,116.419389


## Copy dataframe untuk melakukan modifikasi di nama provinsi

In [91]:
data = data_covid_19_indonesia.copy()
data

Unnamed: 0,Provinsi,Total Kasus,Pasien Sembuh,Pasien Meninggal,Latitude,Longitude
0,Aceh,44842,42515,2263,4.695135,96.749399
1,Bali,172387,167356,4866,-8.409518,115.188916
2,Bangka Belitung,67135,65405,1651,-2.741051,106.440587
3,Banten,364679,361040,2986,-6.405817,106.064018
4,Bengkulu,29838,29257,528,-3.577847,102.346388
5,Central Java,655026,617919,33956,-7.150975,110.140259
6,Central Kalimantan,59291,57683,1555,-1.681488,113.382355
7,Central Sulawesi,62894,60983,1752,-1.430025,121.445618
8,East Java,636284,603328,32224,-7.536064,112.238402
9,East Kalimantan,214380,208517,5792,1.64063,116.419389


In [92]:
data['Provinsi'] = data['Provinsi'].str.replace(' ','_')
data

Unnamed: 0,Provinsi,Total Kasus,Pasien Sembuh,Pasien Meninggal,Latitude,Longitude
0,Aceh,44842,42515,2263,4.695135,96.749399
1,Bali,172387,167356,4866,-8.409518,115.188916
2,Bangka_Belitung,67135,65405,1651,-2.741051,106.440587
3,Banten,364679,361040,2986,-6.405817,106.064018
4,Bengkulu,29838,29257,528,-3.577847,102.346388
5,Central_Java,655026,617919,33956,-7.150975,110.140259
6,Central_Kalimantan,59291,57683,1555,-1.681488,113.382355
7,Central_Sulawesi,62894,60983,1752,-1.430025,121.445618
8,East_Java,636284,603328,32224,-7.536064,112.238402
9,East_Kalimantan,214380,208517,5792,1.64063,116.419389
