---
## **1. Setup & Load Data**
### 1.1 Mount Google Drive & Import Libraries

In [None]:
# Mount Google Drive untuk akses dataset
from google.colab import drive
drive.mount('/content/drive')

In [None]:

# Import semua library yang diperlukan
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Preprocessing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler, LabelEncoder

# Deep Learning - Keras/TensorFlow
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization
from tensorflow.keras.callbacks import EarlyStopping

# Evaluasi
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score, f1_score
from sklearn.metrics import roc_curve, auc, roc_auc_score

# Pengaturan tampilan
import warnings
warnings.filterwarnings('ignore')
pd.set_option('display.max_columns', None)

print(f"TensorFlow Version: {tf.__version__}")
print(f"Keras Version: {keras.__version__}")


### 1.2 Load Dataset

In [None]:
# Sesuaikan path dengan lokasi file di Google Drive Anda
# Contoh: '/content/drive/MyDrive/Tugas/WA_Fn-UseC_-Telco-Customer-Churn.csv'

file_path = '/content/drive/MyDrive/WA_Fn-UseC_-Telco-Customer-Churn.csv'  # SESUAIKAN PATH INI!

# Load dataset
df = pd.read_csv(file_path)

print(f"Dataset berhasil dimuat!")
print(f"Jumlah baris: {df.shape[0]}")
print(f"Jumlah kolom: {df.shape[1]}")

---
## **2. Exploratory Data Analysis (EDA) Singkat**
### 2.1 Overview Dataset

In [None]:
# Tampilkan 5 baris pertama
df.head()

In [None]:
# Informasi struktur dataset
print("=" * 60)
print("INFORMASI DATASET")
print("=" * 60)
df.info()

In [None]:
# Statistik deskriptif
df.describe(include='all')

In [None]:
# Cek nilai unik setiap kolom
print("=" * 60)
print("NILAI UNIK SETIAP KOLOM")
print("=" * 60)
for col in df.columns:
    print(f"{col}: {df[col].nunique()} unique values")

### 2.2 Cek Missing Values

In [None]:
# Cek missing values
print("=" * 60)
print("MISSING VALUES")
print("=" * 60)
missing_values = df.isnull().sum()
missing_percentage = (df.isnull().sum() / len(df)) * 100

missing_df = pd.DataFrame({
    'Missing Values': missing_values,
    'Percentage (%)': missing_percentage
})
print(missing_df[missing_df['Missing Values'] > 0])

if missing_df['Missing Values'].sum() == 0:
    print("\n‚úì Tidak ada missing values yang terdeteksi (null/NaN)")

In [None]:
# Cek nilai kosong atau spasi pada kolom object
print("=" * 60)
print("CEK NILAI KOSONG/SPASI PADA KOLOM KATEGORIK")
print("=" * 60)

for col in df.select_dtypes(include=['object']).columns:
    empty_count = (df[col].str.strip() == '').sum()
    if empty_count > 0:
        print(f"{col}: {empty_count} nilai kosong/spasi")

### 2.3 Distribusi Target (Churn)

In [None]:
# Distribusi target variable (Churn)
fig, axes = plt.subplots(1, 2, figsize=(12, 4))

# Count plot
sns.countplot(data=df, x='Churn', ax=axes[0], palette='coolwarm')
axes[0].set_title('Distribusi Customer Churn', fontsize=12, fontweight='bold')
axes[0].set_xlabel('Churn')
axes[0].set_ylabel('Count')

# Pie chart
churn_counts = df['Churn'].value_counts()
axes[1].pie(churn_counts, labels=churn_counts.index, autopct='%1.1f%%', 
            colors=['#3498db', '#e74c3c'], startangle=90, explode=[0, 0.05])
axes[1].set_title('Persentase Churn', fontsize=12, fontweight='bold')

plt.tight_layout()
plt.show()

print(f"\nDistribusi Churn:")
print(df['Churn'].value_counts())
print(f"\nPersentase:")
print(df['Churn'].value_counts(normalize=True) * 100)

---
## **3. Advanced Data Preprocessing (Bobot: 30%)**
### 3.1 Handling Missing Values & Data Cleaning

In [None]:
# Copy dataframe untuk preprocessing
df_clean = df.copy()

# Drop kolom customerID (tidak relevan untuk prediksi)
df_clean = df_clean.drop('customerID', axis=1)

print("‚úì Kolom 'customerID' telah dihapus")
print(f"Jumlah kolom sekarang: {df_clean.shape[1]}")

In [None]:
# Konversi TotalCharges ke numerik (ada nilai spasi yang perlu di-handle)
# TotalCharges seharusnya numerik tapi mungkin ada nilai kosong/spasi

# Cek tipe data TotalCharges
print(f"Tipe data TotalCharges: {df_clean['TotalCharges'].dtype}")

# Konversi ke numerik, error akan jadi NaN
df_clean['TotalCharges'] = pd.to_numeric(df_clean['TotalCharges'], errors='coerce')

# Cek missing values setelah konversi
print(f"\nMissing values di TotalCharges setelah konversi: {df_clean['TotalCharges'].isnull().sum()}")

In [None]:
# HANDLING MISSING VALUES - Imputasi
# Untuk TotalCharges yang missing, bisa diisi dengan median atau nilai berdasarkan tenure

print("=" * 60)
print("HANDLING MISSING VALUES")
print("=" * 60)

# Cek baris dengan TotalCharges NaN
missing_total_charges = df_clean[df_clean['TotalCharges'].isnull()]
print(f"\nBaris dengan TotalCharges kosong:")
print(missing_total_charges[['tenure', 'MonthlyCharges', 'TotalCharges']])

# Insight: Customer dengan tenure = 0 memiliki TotalCharges kosong
# Imputasi: TotalCharges = tenure * MonthlyCharges (jika tenure=0, maka TotalCharges=0)
# Atau gunakan median

# Metode 1: Imputasi dengan MonthlyCharges (karena tenure=0, TotalCharges = MonthlyCharges)
df_clean['TotalCharges'].fillna(df_clean['MonthlyCharges'], inplace=True)

# Verifikasi tidak ada lagi missing values
print(f"\n‚úì Missing values setelah imputasi: {df_clean.isnull().sum().sum()}")

### 3.2 Encoding - Label Encoder untuk Target & One-Hot Encoding untuk Fitur Kategori

In [None]:
# Identifikasi kolom numerik dan kategorik
print("=" * 60)
print("IDENTIFIKASI TIPE KOLOM")
print("=" * 60)

numerical_cols = df_clean.select_dtypes(include=['int64', 'float64']).columns.tolist()
categorical_cols = df_clean.select_dtypes(include=['object']).columns.tolist()

# Hapus target dari categorical (akan di-encode terpisah)
categorical_cols.remove('Churn')

print(f"\nKolom Numerik ({len(numerical_cols)}): {numerical_cols}")
print(f"\nKolom Kategorik ({len(categorical_cols)}): {categorical_cols}")
print(f"\nTarget: Churn")

In [None]:
# LABEL ENCODER untuk target biner (Churn: Yes/No -> 1/0)
print("=" * 60)
print("LABEL ENCODING - TARGET (Churn)")
print("=" * 60)

label_encoder = LabelEncoder()
df_clean['Churn'] = label_encoder.fit_transform(df_clean['Churn'])

print(f"Mapping: {dict(zip(label_encoder.classes_, label_encoder.transform(label_encoder.classes_)))}")
print(f"\nDistribusi Churn setelah encoding:")
print(df_clean['Churn'].value_counts())

In [None]:
# ONE-HOT ENCODING untuk fitur kategori nominal
print("=" * 60)
print("ONE-HOT ENCODING - FITUR KATEGORIK")
print("=" * 60)

# Tampilkan nilai unik setiap kolom kategorik sebelum encoding
for col in categorical_cols:
    print(f"\n{col}: {df_clean[col].unique()}")

In [None]:
# Lakukan One-Hot Encoding dengan pd.get_dummies
# drop_first=True untuk menghindari multicollinearity

df_encoded = pd.get_dummies(df_clean, columns=categorical_cols, drop_first=True)

print(f"\nJumlah kolom sebelum encoding: {df_clean.shape[1]}")
print(f"Jumlah kolom setelah One-Hot Encoding: {df_encoded.shape[1]}")
print(f"\nKolom baru setelah encoding:")
print(df_encoded.columns.tolist())

In [None]:
# Tampilkan sample data setelah encoding
df_encoded.head()

### 3.3 Feature Scaling (WAJIB)

In [None]:
# Pisahkan fitur (X) dan target (y)
X = df_encoded.drop('Churn', axis=1)
y = df_encoded['Churn']

print(f"Shape of X: {X.shape}")
print(f"Shape of y: {y.shape}")
print(f"\nJumlah fitur input: {X.shape[1]}")

In [None]:
# FEATURE SCALING menggunakan StandardScaler
# StandardScaler: mean=0, std=1 (lebih baik untuk ANN)
# MinMaxScaler: range [0,1]

print("=" * 60)
print("FEATURE SCALING - StandardScaler")
print("=" * 60)

scaler = StandardScaler()

# Fit dan transform data
X_scaled = scaler.fit_transform(X)

# Konversi kembali ke DataFrame untuk visualisasi
X_scaled_df = pd.DataFrame(X_scaled, columns=X.columns)

print("\nStatistik sebelum scaling:")
print(X[['tenure', 'MonthlyCharges', 'TotalCharges']].describe())

print("\nStatistik setelah scaling:")
print(X_scaled_df[['tenure', 'MonthlyCharges', 'TotalCharges']].describe())

### 3.4 Split Data: Training, Validation, dan Test Set

In [None]:
# SPLIT DATA
# Pertama: Split menjadi Train (80%) dan Test (20%)
# Validation set akan dibuat otomatis saat training (validation_split)

print("=" * 60)
print("SPLIT DATA")
print("=" * 60)

X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, 
    test_size=0.2, 
    random_state=42,
    stratify=y  # Menjaga proporsi kelas
)

print(f"\nUkuran Training Set: {X_train.shape[0]} samples ({X_train.shape[0]/len(X)*100:.1f}%)")
print(f"Ukuran Test Set: {X_test.shape[0]} samples ({X_test.shape[0]/len(X)*100:.1f}%)")

print(f"\nDistribusi Churn di Training Set:")
print(pd.Series(y_train).value_counts())
print(f"\nDistribusi Churn di Test Set:")
print(pd.Series(y_test).value_counts())

In [None]:
# Konversi ke numpy array dan pastikan tipe data yang benar
X_train = np.array(X_train).astype('float32')
X_test = np.array(X_test).astype('float32')
y_train = np.array(y_train).astype('float32')
y_test = np.array(y_test).astype('float32')

print(f"X_train shape: {X_train.shape}")
print(f"X_test shape: {X_test.shape}")
print(f"y_train shape: {y_train.shape}")
print(f"y_test shape: {y_test.shape}")

---
## **4. Pembangunan Model (Model Building) (Bobot: 20%)**
### 4.1 Arsitektur Model ANN

In [None]:
# Definisi jumlah fitur input
input_dim = X_train.shape[1]
print(f"Jumlah fitur input (input_dim): {input_dim}")

In [None]:
# MEMBANGUN MODEL ANN dengan Keras Sequential
print("=" * 60)
print("ARSITEKTUR MODEL ANN")
print("=" * 60)

# Inisialisasi model Sequential
model = Sequential(name='ANN_Customer_Churn')

# INPUT LAYER + HIDDEN LAYER 1
model.add(Dense(
    units=64,                    # Jumlah neuron
    activation='relu',           # Fungsi aktivasi ReLU
    input_dim=input_dim,         # Dimensi input
    name='hidden_layer_1'
))
model.add(BatchNormalization())  # Batch Normalization untuk stabilitas
model.add(Dropout(0.3))          # Dropout untuk mencegah overfitting

# HIDDEN LAYER 2
model.add(Dense(
    units=32,                    # Jumlah neuron
    activation='relu',           # Fungsi aktivasi ReLU
    name='hidden_layer_2'
))
model.add(BatchNormalization())
model.add(Dropout(0.3))

# HIDDEN LAYER 3 (Tambahan untuk performa lebih baik)
model.add(Dense(
    units=16,                    # Jumlah neuron
    activation='relu',           # Fungsi aktivasi ReLU
    name='hidden_layer_3'
))
model.add(Dropout(0.2))

# OUTPUT LAYER
# Klasifikasi Biner: 1 neuron dengan aktivasi Sigmoid
model.add(Dense(
    units=1,                     # 1 output untuk binary classification
    activation='sigmoid',        # Sigmoid untuk probabilitas 0-1
    name='output_layer'
))

print("\n‚úì Model berhasil dibuat!")

In [None]:
# Tampilkan ringkasan arsitektur model
model.summary()

In [None]:
# Visualisasi arsitektur model (opsional)
try:
    from tensorflow.keras.utils import plot_model
    plot_model(model, show_shapes=True, show_layer_names=True, 
               to_file='model_architecture.png', dpi=100)
    from IPython.display import Image
    display(Image('model_architecture.png'))
except:
    print("Visualisasi model tidak tersedia (memerlukan graphviz)")

---
## **5. Kompilasi & Training (Bobot: 20%)**
### 5.1 Kompilasi Model

In [None]:
# COMPILE MODEL
print("=" * 60)
print("KOMPILASI MODEL")
print("=" * 60)

model.compile(
    optimizer='adam',                          # Optimizer Adam (adaptive learning rate)
    loss='binary_crossentropy',                # Loss function untuk binary classification
    metrics=['accuracy']                       # Metrik evaluasi
)

print("\n‚úì Model berhasil dikompilasi!")
print(f"   - Optimizer: Adam")
print(f"   - Loss Function: Binary Crossentropy")
print(f"   - Metrics: Accuracy")

### 5.2 Training Model

#### ‚ö†Ô∏è Penanganan Data Imbalance dengan class_weight
Pada dataset churn, jumlah pelanggan yang churn biasanya jauh lebih sedikit dibanding yang tidak churn (data imbalance).
Agar model lebih sensitif terhadap kelas minoritas (churn), kita bisa menggunakan parameter `class_weight` pada `model.fit`.
Dengan ini, model akan memberi bobot lebih besar pada kesalahan prediksi churn, sehingga prediksi churn menjadi lebih baik.

In [None]:
# Definisi Early Stopping Callback untuk mencegah overfitting
early_stopping = EarlyStopping(
    monitor='val_loss',      # Monitor validation loss
    patience=10,             # Berhenti jika tidak ada improvement dalam 10 epoch
    restore_best_weights=True,  # Kembalikan ke weights terbaik
    verbose=1
)

print("‚úì Early Stopping callback telah dikonfigurasi")

In [None]:
# TRAINING MODEL dengan class_weight untuk mengatasi imbalance
print("=" * 60)
print("TRAINING MODEL")
print("=" * 60)

# Hyperparameter training
EPOCHS = 100
BATCH_SIZE = 32
VALIDATION_SPLIT = 0.2  # 20% dari training data untuk validation

# Hitung class_weight secara otomatis
from sklearn.utils.class_weight import compute_class_weight
import numpy as np
class_weights = compute_class_weight('balanced', classes=np.unique(y_train), y=y_train)
class_weight_dict = {i: w for i, w in enumerate(class_weights)}
print(f'Class weight: {class_weight_dict}')

print(f"\nHyperparameter:")
print(f"   - Epochs: {EPOCHS}")
print(f"   - Batch Size: {BATCH_SIZE}")
print(f"   - Validation Split: {VALIDATION_SPLIT}")
print("\nMemulai training...\n")

# Training dan simpan history
history = model.fit(
    X_train, y_train,
    epochs=EPOCHS,
    batch_size=BATCH_SIZE,
    validation_split=VALIDATION_SPLIT,  # Otomatis membuat validation set
    callbacks=[early_stopping],
    verbose=1,
    class_weight=class_weight_dict  # <--- inilah kuncinya
)

print("\n" + "=" * 60)
print("‚úì Training selesai!")
print("=" * 60)

---
## **6. Visualisasi Grafik (Loss/Accuracy)**
### 6.1 Plot Loss dan Accuracy

In [None]:
# Ekstrak data dari history
train_loss = history.history['loss']
val_loss = history.history['val_loss']
train_acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
epochs_range = range(1, len(train_loss) + 1)

print(f"Jumlah epoch yang dijalankan: {len(train_loss)}")

In [None]:
# VISUALISASI LOSS DAN ACCURACY
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Plot 1: Loss (Train vs Validation)
axes[0].plot(epochs_range, train_loss, 'b-', label='Training Loss', linewidth=2)
axes[0].plot(epochs_range, val_loss, 'r-', label='Validation Loss', linewidth=2)
axes[0].set_title('Model Loss (Train vs Validation)', fontsize=14, fontweight='bold')
axes[0].set_xlabel('Epoch', fontsize=12)
axes[0].set_ylabel('Loss', fontsize=12)
axes[0].legend(loc='upper right', fontsize=10)
axes[0].grid(True, alpha=0.3)
axes[0].set_xlim([1, len(train_loss)])

# Plot 2: Accuracy (Train vs Validation)
axes[1].plot(epochs_range, train_acc, 'b-', label='Training Accuracy', linewidth=2)
axes[1].plot(epochs_range, val_acc, 'r-', label='Validation Accuracy', linewidth=2)
axes[1].set_title('Model Accuracy (Train vs Validation)', fontsize=14, fontweight='bold')
axes[1].set_xlabel('Epoch', fontsize=12)
axes[1].set_ylabel('Accuracy', fontsize=12)
axes[1].legend(loc='lower right', fontsize=10)
axes[1].grid(True, alpha=0.3)
axes[1].set_xlim([1, len(train_acc)])

plt.tight_layout()
plt.savefig('training_history.png', dpi=150, bbox_inches='tight')
plt.show()

print("\n‚úì Grafik disimpan sebagai 'training_history.png'")

In [None]:
# ANALISIS OVERFITTING
print("=" * 60)
print("ANALISIS OVERFITTING")
print("=" * 60)

# Cek nilai akhir
final_train_loss = train_loss[-1]
final_val_loss = val_loss[-1]
final_train_acc = train_acc[-1]
final_val_acc = val_acc[-1]

print(f"\nNilai Akhir Training:")
print(f"   - Training Loss: {final_train_loss:.4f}")
print(f"   - Validation Loss: {final_val_loss:.4f}")
print(f"   - Training Accuracy: {final_train_acc:.4f}")
print(f"   - Validation Accuracy: {final_val_acc:.4f}")

# Analisis gap
loss_gap = final_val_loss - final_train_loss
acc_gap = final_train_acc - final_val_acc

print(f"\nAnalisis Gap:")
print(f"   - Gap Loss (Val - Train): {loss_gap:.4f}")
print(f"   - Gap Accuracy (Train - Val): {acc_gap:.4f}")

# Interpretasi
print(f"\nüìä Interpretasi:")
if loss_gap > 0.1:
    print("   ‚ö†Ô∏è Terdapat indikasi OVERFITTING (validation loss > training loss)")
    print("   ‚Üí Model mungkin terlalu 'menghafal' data training")
else:
    print("   ‚úì Model terlihat BAIK (gap loss kecil)")
    print("   ‚Üí Model memiliki generalisasi yang baik")

if acc_gap > 0.05:
    print("   ‚ö†Ô∏è Gap accuracy cukup besar, pertimbangkan regularisasi tambahan")
else:
    print("   ‚úì Gap accuracy dalam batas wajar")

---
## **7. Evaluasi Akhir (Bobot: 30%)**
### 7.1 Prediksi pada Test Set

In [None]:
# Evaluasi model pada Test Set
print("=" * 60)
print("EVALUASI PADA TEST SET")
print("=" * 60)

# Evaluasi menggunakan model.evaluate()
test_loss, test_accuracy = model.evaluate(X_test, y_test, verbose=0)

print(f"\nHasil Evaluasi Test Set:")
print(f"   - Test Loss: {test_loss:.4f}")
print(f"   - Test Accuracy: {test_accuracy:.4f} ({test_accuracy*100:.2f}%)")

In [None]:

# Prediksi pada Test Set + threshold optimal berbasis F1 agar churn tidak terlewat
y_pred_proba = model.predict(X_test)  # Probabilitas

# Cari threshold terbaik dengan grid sederhana
threshold_candidates = np.linspace(0.2, 0.6, 9)
threshold_scores = []
for th in threshold_candidates:
    preds_tmp = (y_pred_proba > th).astype(int).flatten()
    score = f1_score(y_test, preds_tmp)
    threshold_scores.append((th, score))

best_threshold, best_f1 = max(threshold_scores, key=lambda x: x[1])

print(f"Kandidat threshold: {np.round(threshold_candidates, 2)}")
print(f"Threshold terbaik (F1): {best_threshold:.2f} | F1: {best_f1:.4f}")

# Gunakan threshold terbaik
y_pred = (y_pred_proba > best_threshold).astype(int).flatten()

print(f"Shape y_pred_proba: {y_pred_proba.shape}")
print(f"Shape y_pred: {y_pred.shape}")
print(f"\nSample prediksi (5 pertama):")
print(f"Probabilitas: {y_pred_proba[:5].flatten()}")
print(f"Prediksi (threshold {best_threshold:.2f}): {y_pred[:5]}")
print(f"Aktual: {y_test[:5].astype(int)}")


### 7.2 Confusion Matrix

In [None]:
# CONFUSION MATRIX
print("=" * 60)
print("CONFUSION MATRIX")
print("=" * 60)

cm = confusion_matrix(y_test, y_pred)

# Visualisasi Confusion Matrix
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
            xticklabels=['No Churn (0)', 'Churn (1)'],
            yticklabels=['No Churn (0)', 'Churn (1)'],
            annot_kws={'size': 14})
plt.title('Confusion Matrix - Customer Churn Prediction', fontsize=14, fontweight='bold')
plt.xlabel('Predicted Label', fontsize=12)
plt.ylabel('True Label', fontsize=12)
plt.tight_layout()
plt.savefig('confusion_matrix.png', dpi=150, bbox_inches='tight')
plt.show()

print(f"\nConfusion Matrix:")
print(cm)
print(f"\n   - True Negative (TN): {cm[0,0]} (Tidak Churn, diprediksi Tidak Churn)")
print(f"   - False Positive (FP): {cm[0,1]} (Tidak Churn, diprediksi Churn)")
print(f"   - False Negative (FN): {cm[1,0]} (Churn, diprediksi Tidak Churn)")
print(f"   - True Positive (TP): {cm[1,1]} (Churn, diprediksi Churn)")

### 7.3 Classification Report

In [None]:
# CLASSIFICATION REPORT
print("=" * 60)
print("CLASSIFICATION REPORT")
print("=" * 60)

print(classification_report(y_test, y_pred, target_names=['No Churn', 'Churn']))

In [None]:
# Hitung metrik tambahan
TN, FP, FN, TP = cm.ravel()

accuracy = (TP + TN) / (TP + TN + FP + FN)
precision = TP / (TP + FP) if (TP + FP) > 0 else 0
recall = TP / (TP + FN) if (TP + FN) > 0 else 0
specificity = TN / (TN + FP) if (TN + FP) > 0 else 0
f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0

print("=" * 60)
print("RINGKASAN METRIK EVALUASI")
print("=" * 60)
print(f"\n   Accuracy    : {accuracy:.4f} ({accuracy*100:.2f}%)")
print(f"   Precision   : {precision:.4f} ({precision*100:.2f}%)")
print(f"   Recall      : {recall:.4f} ({recall*100:.2f}%)")
print(f"   Specificity : {specificity:.4f} ({specificity*100:.2f}%)")
print(f"   F1-Score    : {f1:.4f} ({f1*100:.2f}%)")

### 7.4 ROC Curve dan AUC Score

In [None]:
# ROC CURVE dan AUC SCORE
print("=" * 60)
print("ROC CURVE & AUC SCORE")
print("=" * 60)

# Hitung ROC curve
fpr, tpr, thresholds = roc_curve(y_test, y_pred_proba)
roc_auc = auc(fpr, tpr)

# Plot ROC Curve
plt.figure(figsize=(8, 6))
plt.plot(fpr, tpr, color='darkorange', lw=2, label=f'ROC Curve (AUC = {roc_auc:.4f})')
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--', label='Random Classifier')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate (1 - Specificity)', fontsize=12)
plt.ylabel('True Positive Rate (Sensitivity/Recall)', fontsize=12)
plt.title('Receiver Operating Characteristic (ROC) Curve', fontsize=14, fontweight='bold')
plt.legend(loc='lower right', fontsize=10)
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.savefig('roc_curve.png', dpi=150, bbox_inches='tight')
plt.show()

print(f"\nAUC Score: {roc_auc:.4f}")
print(f"\nüìä Interpretasi AUC:")
if roc_auc >= 0.9:
    print("   ‚úì Excellent (AUC ‚â• 0.9)")
elif roc_auc >= 0.8:
    print("   ‚úì Good (0.8 ‚â§ AUC < 0.9)")
elif roc_auc >= 0.7:
    print("   ‚Üí Fair (0.7 ‚â§ AUC < 0.8)")
else:
    print("   ‚ö†Ô∏è Poor (AUC < 0.7)")

---
## **8. Kesimpulan**

In [None]:

# RINGKASAN HASIL
print("=" * 70)
print("                    RINGKASAN HASIL MODEL ANN")
print("                  PREDIKSI CUSTOMER CHURN")
print("=" * 70)

print(f"\n Dataset: Telco Customer Churn")
print(f"   - Total samples: {len(df)}")
print(f"   - Total features (setelah encoding): {input_dim}")

print(f"\n Arsitektur Model:")
print(f"   - Input Layer: {input_dim} features")
print(f"   - Hidden Layer 1: 64 neurons (ReLU) + BatchNorm + Dropout(0.3)")
print(f"   - Hidden Layer 2: 32 neurons (ReLU) + BatchNorm + Dropout(0.3)")
print(f"   - Hidden Layer 3: 16 neurons (ReLU) + Dropout(0.2)")
print(f"   - Output Layer: 1 neuron (Sigmoid)")

print(f"\n Training Configuration:")
print(f"   - Optimizer: Adam")
print(f"   - Loss Function: Binary Crossentropy")
print(f"   - Epochs: {len(train_loss)} (dengan Early Stopping)")
print(f"   - Batch Size: {BATCH_SIZE}")
print(f"   - Threshold Prediksi (F1): {globals().get('best_threshold', 0.5):.2f}")

print(f"\n Hasil Evaluasi (Test Set):")
print(f"   - Accuracy: {test_accuracy*100:.2f}%")
print(f"   - Precision: {precision*100:.2f}%")
print(f"   - Recall: {recall*100:.2f}%")
print(f"   - F1-Score: {f1*100:.2f}%")
print(f"   - AUC Score: {roc_auc:.4f}")

print(f"\n Analisis Overfitting:")
print(f"   - Training Loss: {final_train_loss:.4f}")
print(f"   - Validation Loss: {final_val_loss:.4f}")
print(f"   - Gap: {loss_gap:.4f}")

print("\n" + "=" * 70)
print("                         END OF NOTEBOOK")
print("=" * 70)


---
## **9. Simpan Model (Opsional)**

In [None]:

# Simpan model untuk penggunaan selanjutnya
model.save('customer_churn_ann_model.h5')
print(" Model disimpan sebagai 'customer_churn_ann_model.h5'")

# Simpan scaler untuk deployment
import joblib
joblib.dump(scaler, 'scaler.pkl')
print(" Scaler disimpan sebagai 'scaler.pkl'")

# Simpan nama kolom fitur untuk deployment
import json
feature_columns = X.columns.tolist()
with open('feature_columns.json', 'w') as f:
    json.dump(feature_columns, f)
print(" Feature columns disimpan sebagai 'feature_columns.json'")

# Simpan kategori unik untuk setiap kolom kategorik (untuk dropdown di Streamlit)
categorical_options = {}
for col in categorical_cols:
    categorical_options[col] = df[col].unique().tolist()

with open('categorical_options.json', 'w') as f:
    json.dump(categorical_options, f)
print(" Categorical options disimpan sebagai 'categorical_options.json'")

# Simpan threshold terbaik (berbasis F1) untuk deployment
best_threshold_to_save = float(globals().get('best_threshold', 0.5))
with open('threshold.json', 'w') as f:
    json.dump({'threshold': best_threshold_to_save}, f)
print(f" Threshold terbaik ({best_threshold_to_save:.2f}) disimpan sebagai 'threshold.json'")

print("\n" + "="*60)
print("SEMUA FILE UNTUK DEPLOYMENT SUDAH TERSIMPAN!")
print("="*60)
print("\nFile yang dihasilkan:")
print("1. customer_churn_ann_model.h5 - Model ANN")
print("2. scaler.pkl - StandardScaler")
print("3. feature_columns.json - Nama kolom fitur")
print("4. categorical_options.json - Opsi untuk input kategorik")
print("5. threshold.json - Ambang probabilitas terbaik (F1)")

# Download file (untuk Google Colab)
print("\n" + "="*60)
print("DOWNLOAD FILES UNTUK DEPLOYMENT")
print("="*60)
try:
    from google.colab import files
    print("\nMengunduh file...")
    files.download('customer_churn_ann_model.h5')
    files.download('scaler.pkl')
    files.download('feature_columns.json')
    files.download('categorical_options.json')
    files.download('threshold.json')
    print(" Semua file berhasil diunduh!")
except:
    print("\nJika di Google Colab, gunakan kode berikut untuk download:")
    print("from google.colab import files")
    print("files.download('customer_churn_ann_model.h5')")
    print("files.download('scaler.pkl')")
    print("files.download('feature_columns.json')")
    print("files.download('categorical_options.json')")
    print("files.download('threshold.json')")
