In [1]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from lazypredict.Supervised import LazyClassifier
from sklearn.metrics import (
    confusion_matrix, ConfusionMatrixDisplay,
    classification_report, accuracy_score,
    precision_score, recall_score, f1_score,
    roc_auc_score, roc_curve
)

In [2]:
df = pd.read_csv("../data/processed/final_data.csv")
df.head()

Unnamed: 0,date,total_precipitation,rain_only,soil_moisture_0_7cm,soil_moisture_7_28cm,soil_moisture_28_100cm,soil_moisture_100_255cm,surface_pressure,humidity,soil_moisture_avg,rain_sum_6h,hydro_saturation_index,rain_lag_1h,soil_lag_1h,rain_lag_3h,soil_lag_3h,rain_lag_6h,soil_lag_6h,target_alert_level
0,2024-01-01 06:00:00,0.0,0.0,0.49,0.48,0.34,0.4,1008.01,97.63,0.43,0.0,0.0,0.0,0.43,0.0,0.43,0.0,0.43,0
1,2024-01-01 07:00:00,0.0,0.0,0.49,0.48,0.34,0.4,1008.82,93.09,0.43,0.0,0.0,0.0,0.43,0.0,0.43,0.0,0.43,0
2,2024-01-01 08:00:00,0.0,0.0,0.49,0.48,0.34,0.4,1008.84,84.49,0.43,0.0,0.0,0.0,0.43,0.0,0.43,0.0,0.43,0
3,2024-01-01 09:00:00,0.1,0.1,0.49,0.48,0.34,0.4,1009.34,74.26,0.43,0.1,0.05,0.0,0.43,0.0,0.43,0.0,0.43,0
4,2024-01-01 10:00:00,0.1,0.1,0.49,0.48,0.34,0.4,1009.25,70.29,0.43,0.2,0.05,0.1,0.43,0.0,0.43,0.0,0.43,0


In [10]:
df['target_alert_level'].value_counts()

target_alert_level
0    16805
1      704
2       28
3        1
Name: count, dtype: int64

In [3]:
# 1. Tentukan Fitur (X) dan Target (y)
# Kita hapus kolom non-numerik seperti 'date' dan kolom target itu sendiri
X = df.drop(columns=['target_alert_level', 'date'])
y = df['target_alert_level']

In [4]:
# Membagi data menjadi set pelatihan dan set pengujian
split_index = int(len(X) * 0.8)
X_train, X_test = X.iloc[:split_index], X.iloc[split_index:]
y_train, y_test = y.iloc[:split_index], y.iloc[split_index:]

# Tampilkan bentuk set pelatihan dan set uji untuk memastikan split
print("Training set shape:")
print(f"X_train={X_train.shape}")
print(f"y_train={y_train.shape} \n")
print("Test set shape:")
print(f"X_train={X_test.shape}")
print(f"y_train={y_test.shape}")

Training set shape:
X_train=(14030, 17)
y_train=(14030,) 

Test set shape:
X_train=(3508, 17)
y_train=(3508,)


In [5]:
scaler = MinMaxScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [6]:
def evaluate_model(y_true, y_pred, model_name="Model", train_accuracy=None, target_names=None):
    """
    Menampilkan evaluasi lengkap: Confusion Matrix, classification report, dan metrik evaluasi.

    Parameters:
    - y_true: array-like, label sebenarnya
    - y_pred: array-like, label hasil prediksi
    - model_name: str, nama model (default: "Model")
    - train_accuracy: float, akurasi pada data latih (opsional)
    - target_names: list of str, nama label untuk classification report
    """
    # Metode evaluasi
    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred, average='weighted', zero_division=0)
    recall = recall_score(y_true, y_pred, average='weighted', zero_division=0)
    f1 = f1_score(y_true, y_pred, average='weighted', zero_division=0)

    print(f"ðŸ›‘ EVALUASI MODEL {model_name.upper()}")
    print("=" * 60)
    print(f"Accuracy : {accuracy:.2f}")
    print(f"Precision: {precision:.2f}")
    print(f"Recall   : {recall:.2f}")
    print(f"F1 Score : {f1:.2f}\n")

    print(f"ðŸ›‘ CLASSIFICATION REPORT")
    print("=" * 60)
    print(classification_report(y_true, y_pred, target_names=target_names))
    print("=" * 60)
    if train_accuracy is not None:
        print(f"*{model_name.upper()} - Akurasi pada data latih: {train_accuracy:.4f}")
    print(f"*{model_name.upper()} - Akurasi pada data uji: {accuracy:.4f}")
    print("=" * 60)
    print("\n")
    print("Visualisasi:")

    # Confusion Matrix
    cm = confusion_matrix(y_true, y_pred)
    fig, ax = plt.subplots(figsize=(6, 5))
    disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=target_names)
    disp.plot(ax=ax, cmap='Blues', colorbar=True)
    plt.title(f"Confusion Matrix - {model_name}")
    plt.tight_layout()
    plt.show()

In [7]:
from sklearn.ensemble import RandomForestClassifier

# 3. Inisialisasi Model
# n_estimators=100 berarti kita membuat 100 "pohon keputusan" untuk voting
model = RandomForestClassifier(n_estimators=100, random_state=42, class_weight='balanced')

# 4. Melatih Model
model.fit(X_train, y_train)

# 5. Prediksi pada data ujian
y_pred = model.predict(X_test)

In [11]:
# Mendapatkan label unik yang benar-benar ada di y_test dan y_pred
unique_labels = sorted(list(set(y_test) | set(y_pred)))

# Mapping nama kategori agar sesuai dengan jumlah label yang ada
all_names = ['Aman', 'Waspada', 'Siaga', 'Darurat']
target_names = [all_names[i] for i in unique_labels]

print("=== EVALUASI MODEL NOTIFIKASI BANJIR ===")
print(f"Akurasi Total: {accuracy_score(y_test, y_pred):.2%}")
print("\nDetail Per Kategori:")
print(classification_report(y_test, y_pred, labels=unique_labels, target_names=target_names))

=== EVALUASI MODEL NOTIFIKASI BANJIR ===
Akurasi Total: 96.44%

Detail Per Kategori:
              precision    recall  f1-score   support

        Aman       0.96      1.00      0.98      3383
     Waspada       0.00      0.00      0.00       120
       Siaga       0.00      0.00      0.00         5

    accuracy                           0.96      3508
   macro avg       0.32      0.33      0.33      3508
weighted avg       0.93      0.96      0.95      3508



In [13]:
def generate_notification(data_point):
    # Prediksi level (0-3)
    level = model.predict(data_point)[0]
    
    messages = {
        0: "ðŸŸ¢ STATUS: AMAN. Kondisi cuaca dan tanah terpantau normal.",
        1: "ðŸŸ¡ STATUS: WASPADA. Hujan sedang terdeteksi, waspadai genangan di titik rendah.",
        2: "ðŸŸ  STATUS: SIAGA! Hujan lebat & tanah mulai jenuh. Siapkan perlengkapan banjir.",
        3: "ðŸš¨ STATUS: DARURAT!! Potensi banjir besar dalam 3 jam. Segera evakuasi!"
    }
    
    print("*"*30)
    print(messages.get(level))
    print("*"*30)

# Uji coba dengan 1 baris data dari test set
generate_notification(X_test[[0]])

******************************
ðŸŸ¢ STATUS: AMAN. Kondisi cuaca dan tanah terpantau normal.
******************************
