STUDI KASUS PRAKTIKUM 3

    1. Bagaimana kita dapat memprediksi jumlah total kasus COVID-19 di suatu lokasi berdasarkan fitur-fitur seperti jumlah kematian, jumlah yang sembuh, densitas populasi, dan tingkat fatalitas kasus? (supervised learning)

    2. Bagaimana kita dapat mengelompokkan lokasi-lokasi di Indonesia berdasarkan jumlah total kasus, jumlah kematian, jumlah yang sembuh, dan densitas populasi menggunakan metode clustering seperti KMeans? (unsupervised learning)

IMPORT MODUL

In [5]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score, mean_squared_error
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans

from sklearn.ensemble import RandomForestRegressor
import streamlit as st
import plotly.express as px

from pyngrok import ngrok
import os
import time


LOAD DATASET

In [6]:

df = pd.read_csv("covid_19_indonesia_time_series_all.csv")
print(df.columns)
data = df[['Date', 'Location ISO Code', 'Location', 'Total Cases', 'Total Deaths', 'Total Recovered', 'Population Density']]


data = data.dropna()

Index(['Date', 'Location ISO Code', 'Location', 'New Cases', 'New Deaths',
       'New Recovered', 'New Active Cases', 'Total Cases', 'Total Deaths',
       'Total Recovered', 'Total Active Cases', 'Location Level',
       'City or Regency', 'Province', 'Country', 'Continent', 'Island',
       'Time Zone', 'Special Status', 'Total Regencies', 'Total Cities',
       'Total Districts', 'Total Urban Villages', 'Total Rural Villages',
       'Area (km2)', 'Population', 'Population Density', 'Longitude',
       'Latitude', 'New Cases per Million', 'Total Cases per Million',
       'New Deaths per Million', 'Total Deaths per Million',
       'Total Deaths per 100rb', 'Case Fatality Rate', 'Case Recovered Rate',
       'Growth Factor of New Cases', 'Growth Factor of New Deaths'],
      dtype='object')


    1. Bagaimana kita dapat memprediksi jumlah total kasus COVID-19 di suatu lokasi berdasarkan fitur-fitur seperti jumlah kematian, jumlah yang sembuh, densitas populasi, dan tingkat fatalitas kasus? (supervised learning)

In [7]:
df = df[['Location', 'Total Cases', 'Total Deaths', 'Total Recovered', 'Population Density']]
df.dropna(inplace=True)

df['Fatality Rate'] = df['Total Deaths'] / df['Total Cases']


data_loc = df.groupby('Location').mean()

features = ['Total Deaths', 'Total Recovered', 'Population Density', 'Fatality Rate']
X = data_loc[features]
y = data_loc['Total Cases']


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)


model = RandomForestRegressor(random_state=42)
model.fit(X_train, y_train)


y_pred = model.predict(X_test)


mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Supervised Learning - Prediksi Total Cases")
print(f"MSE: {mse:.2f}")
print(f"R-squared: {r2:.2f}")

Supervised Learning - Prediksi Total Cases
MSE: 1027562235.23
R-squared: 0.94


    2. Bagaimana kita dapat mengelompokkan lokasi-lokasi di Indonesia berdasarkan jumlah total kasus, jumlah kematian, jumlah yang sembuh, dan densitas populasi menggunakan metode clustering seperti KMeans? (unsupervised learning)

In [8]:
df = df[['Location', 'Total Cases', 'Total Deaths', 'Total Recovered', 'Population Density']]
df.dropna(inplace=True)


data_loc = df.groupby('Location').mean()
features = ['Total Cases', 'Total Deaths', 'Total Recovered', 'Population Density']


scaler = StandardScaler()
X_scaled = scaler.fit_transform(data_loc[features])

kmeans = KMeans(n_clusters=4, random_state=42)
data_loc['Cluster'] = kmeans.fit_predict(X_scaled)

print("Unsupervised Learning - Hasil Clustering Wilayah:")
print(data_loc[['Total Cases', 'Total Deaths', 'Total Recovered', 'Population Density', 'Cluster']].head())

Unsupervised Learning - Hasil Clustering Wilayah:
               Total Cases  Total Deaths  Total Recovered  Population Density  \
Location                                                                        
Aceh          22082.057522   1085.080752     19784.528761               90.54   
Bali          72086.640914   2195.159956     67546.262242              729.43   
Banten       108872.312771   1543.959957    103163.648268             1109.64   
Bengkulu      13734.685206    270.363737     12950.609566              100.38   
DKI Jakarta  587519.964478   8259.145318    566710.544672            16334.31   

             Cluster  
Location              
Aceh               0  
Bali               0  
Banten             0  
Bengkulu           0  
DKI Jakarta        2  


In [9]:
import streamlit as st
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
import plotly.express as px

st.set_page_config(page_title="COVID-19 Clustering Dashboard", layout="wide")
st.title("Dashboard Klasterisasi COVID-19 - Modul 3")

@st.cache_data
def load_data():
    df = pd.read_csv("covid_19_indonesia_time_series_all.csv")
    return df

df = load_data()

# ambil kolom penting
df = df[['Date', 'Location', 'Total Cases', 'Total Deaths', 'Total Recovered', 'Population Density']]
df.dropna(inplace=True)

# lokasi unik
unique_locations = df['Location'].unique()
selected_location = st.sidebar.selectbox("Pilih Lokasi", unique_locations)
location_data = df[df['Location'] == selected_location]

# ========================== 📈 TREND LINE CHART =============================
st.subheader(f"📊 Tren Total Kasus di {selected_location}")
fig, ax = plt.subplots(figsize=(10, 4))
daily_cases = location_data.groupby("Date").sum(numeric_only=True)['Total Cases']
daily_cases.plot(ax=ax, color='#FF9800')  # warna orange biar beda dari default
ax.set_ylabel("Total Kasus")
ax.set_xlabel("Tanggal")
st.pyplot(fig)

# ========================== 🤖 CLUSTERING =============================
st.subheader("🔬 Klasterisasi Wilayah")
cluster_features = df.groupby("Location")[['Total Cases', 'Total Deaths', 'Total Recovered', 'Population Density']].mean()

scaler = StandardScaler()
scaled_features = scaler.fit_transform(cluster_features)

kmeans = KMeans(n_clusters=4, random_state=42, n_init='auto')
clusters = kmeans.fit_predict(scaled_features)
cluster_features['Cluster'] = clusters

df_clustered = df.merge(cluster_features['Cluster'], on='Location')

# ========================== 🗺️ MAP KOORDINAT =============================
kordinat = pd.DataFrame({
    'Location': [
        'DKI Jakarta', 'Jawa Barat', 'Jawa Tengah', 'Jawa Timur',
        'Bali', 'Sumatera Utara', 'Kalimantan Timur', 'Sulawesi Selatan'
    ],
    'lat': [
        -6.2088, -6.9039, -7.1500, -7.2504,
        -8.4095, 3.5952, 0.5383, -5.1477
    ],
    'lon': [
        106.8456, 107.6186, 110.1403, 112.7688,
        115.1889, 98.6722, 116.4194, 119.4327
    ]
})

map_df = cluster_features.reset_index().merge(kordinat, on='Location')

# ========================== 🗺️ SCATTER MAPBOX =============================
st.subheader("🗺️ Peta Klaster Wilayah")
fig_map = px.scatter_mapbox(
    map_df,
    lat="lat", lon="lon",
    hover_name="Location",
    color="Cluster",
    size="Total Cases",
    zoom=4,
    height=500,
    mapbox_style="carto-positron",
    color_discrete_sequence=["#E91E63", "#3F51B5", "#009688", "#FF9800"]  # warna custom: pink, biru, hijau, orange
)
st.plotly_chart(fig_map, use_container_width=True)

# ========================== 📋 RINGKASAN =============================
st.subheader("📋 Ringkasan Risiko per Klaster")
st.dataframe(cluster_features.sort_values("Cluster"))


Writing covid_dashboard.py
