# Nama tim: FourTwo

In [110]:
import pandas as pd
import numpy as np

data_covid = pd.read_csv('data_covid_indo.csv', delimiter=";")
data_mobilitas = pd.read_csv('data_mobilitas_indo.csv')

### Cleaning Data Mobilitas

In [111]:
# Filter data hanya di Jakarta
data_mobilitas = data_mobilitas.loc[data_mobilitas['sub_region_1']=='Jakarta']

# Sorting tanggal
data_mobilitas["date"] = pd.to_datetime(data_mobilitas["date"], dayfirst=True)
data_mobilitas = data_mobilitas.sort_values(by="date")

# Menghilangkan data kosong dan kolom yang tidak terpakai
data_mobilitas = data_mobilitas.drop(columns=['date', 'country_region_code', 'country_region', 'sub_region_1', 'sub_region_2', 'metro_area', 'iso_3166_2_code', 'census_fips_code', 'place_id']).dropna()
data_mobilitas = data_mobilitas.astype(int)

data_mobilitas.columns = data_mobilitas.columns.str.replace('_', ' ')
data_mobilitas.columns = data_mobilitas.columns.str.title()

data_mobilitas.reset_index(drop=True, inplace=True)

data_mobilitas

Unnamed: 0,Retail and recreation percent change from baseline,Grocery and pharmacy percent change from baseline,Parks percent change from baseline,Transit stations percent change from baseline,Workplaces percent change from baseline,Residential percent change from baseline
0,-46,-27,-51,-61,-73,22
1,-39,-15,-50,-41,-33,11
2,-40,-19,-58,-34,-15,9
3,-29,-5,-50,-39,-32,13
4,-32,-9,-53,-39,-33,14
...,...,...,...,...,...,...
360,-5,12,-14,-26,-25,8
361,-7,10,-16,-25,-25,8
362,-6,11,-15,-25,-25,8
363,-2,18,-9,-22,-25,7


### Cleaning Data Covid

In [112]:
# Menghilangkan data kosong dan kolom yang tidak terpakai
data_covid = data_covid.drop(columns=['Meninggal (Indonesia)','Sembuh (Indonesia)', 'Dirawat (Indonesia)',
       'Positif (Indonesia)', 'Meninggal Harian (Indonesia)', 'Sembuh Harian (Indonesia)',
       'Dirawat Harian (Indonesia)','Positif Harian (Indonesia)']).dropna()

# Sorting dan filter tanggal hanya saat tahun 2022
data_covid["Tanggal"] = pd.to_datetime(data_covid["Tanggal"], dayfirst=True)
data_covid = data_covid.sort_values(by="Tanggal")
data_covid = data_covid[(data_covid['Tanggal'] >= "2021-01-01") & (data_covid['Tanggal']<= "2021-12-31")]

# Mengubah data yang bernilai negatif menjadi positif
data_covid_date = data_covid['Tanggal'].dt.strftime('%d/%m/%Y')
data_covid = data_covid.drop(columns=['Tanggal']).astype(int).abs()
data_covid.insert(0, "Tanggal", data_covid_date)

data_covid.columns = data_covid.columns.str.replace('(Jakarta)', '', regex=False)

data_covid.reset_index(drop=True, inplace=True)

display(data_covid)

Unnamed: 0,Tanggal,Meninggal,Sembuh,Self-Isolation,Dirawat,Positif,Positif Aktif,Meninggal Harian,Sembuh Harian,Self-Isolation Harian,Dirawat Harian,Positif Harian,Positif Aktif Harian
0,01/01/2021,3308,166512,10082,5789,185691,15871,21,1631,336,32,1956,304
1,02/01/2021,3334,168781,10872,4599,187586,15471,26,2269,790,1190,1895,400
2,03/01/2021,3345,170510,10978,4410,189243,15388,11,1729,106,189,1657,83
3,04/01/2021,3369,173036,10371,4299,191075,14670,24,2526,607,111,1832,718
4,05/01/2021,3392,174131,10897,4479,192899,15376,23,1095,526,180,1824,706
...,...,...,...,...,...,...,...,...,...,...,...,...,...
360,27/12/2021,13584,851096,171,183,865034,354,0,88,5,41,42,46
361,28/12/2021,13585,851138,236,171,865130,407,1,42,65,12,96,53
362,29/12/2021,13587,851167,238,165,865157,403,2,29,2,6,27,4
363,30/12/2021,13587,851201,251,171,865210,422,0,34,13,6,53,19


In [113]:
# Merge 2 dataframe
df = pd.concat([data_covid, data_mobilitas], axis=1)

display(df)

Unnamed: 0,Tanggal,Meninggal,Sembuh,Self-Isolation,Dirawat,Positif,Positif Aktif,Meninggal Harian,Sembuh Harian,Self-Isolation Harian,Dirawat Harian,Positif Harian,Positif Aktif Harian,Retail and recreation percent change from baseline,Grocery and pharmacy percent change from baseline,Parks percent change from baseline,Transit stations percent change from baseline,Workplaces percent change from baseline,Residential percent change from baseline
0,01/01/2021,3308,166512,10082,5789,185691,15871,21,1631,336,32,1956,304,-46,-27,-51,-61,-73,22
1,02/01/2021,3334,168781,10872,4599,187586,15471,26,2269,790,1190,1895,400,-39,-15,-50,-41,-33,11
2,03/01/2021,3345,170510,10978,4410,189243,15388,11,1729,106,189,1657,83,-40,-19,-58,-34,-15,9
3,04/01/2021,3369,173036,10371,4299,191075,14670,24,2526,607,111,1832,718,-29,-5,-50,-39,-32,13
4,05/01/2021,3392,174131,10897,4479,192899,15376,23,1095,526,180,1824,706,-32,-9,-53,-39,-33,14
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
360,27/12/2021,13584,851096,171,183,865034,354,0,88,5,41,42,46,-5,12,-14,-26,-25,8
361,28/12/2021,13585,851138,236,171,865130,407,1,42,65,12,96,53,-7,10,-16,-25,-25,8
362,29/12/2021,13587,851167,238,165,865157,403,2,29,2,6,27,4,-6,11,-15,-25,-25,8
363,30/12/2021,13587,851201,251,171,865210,422,0,34,13,6,53,19,-2,18,-9,-22,-25,7
