## Kriteria 5: Membangun Model Klasifikasi

In [4]:
# --- Kriteria 5: Membangun Model Klasifikasi ---
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
import warnings
warnings.filterwarnings('ignore')

print("\nMemulai Kriteria 5: Membangun Model Klasifikasi")

# 1. Persiapan Data untuk Klasifikasi
try:
    if hasattr(kmeans, 'labels_') and len(kmeans.labels_) == len(df_processed):
        X = df_processed.drop('Target', axis=1, errors='ignore').copy()
        X['Target'] = kmeans.labels_
        y = X.pop('Target')
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)
        print(f"✅ Data telah dibagi menjadi {len(X_train)} data latih dan {len(X_test)} data uji.")
    else:
        raise ValueError("Label cluster tidak sesuai jumlah data. Pastikan proses clustering berhasil.")
except Exception as e:
    print(f"❌ Error pada persiapan data klasifikasi: {e}")
    X_train = X_test = y_train = y_test = None

# 2. Melatih dan mengevaluasi beberapa model
if X_train is not None and y_train is not None:
    print("\n2. Melatih dan mengevaluasi beberapa model klasifikasi...")
    models = {
        "Decision Tree": DecisionTreeClassifier(random_state=42),
        "Random Forest": RandomForestClassifier(random_state=42),
        "Logistic Regression": LogisticRegression(random_state=42, max_iter=1000)
    }
    best_model_name = ""
    best_model_score = 0.0
    best_model_instance = None
    for name, model in models.items():
        try:
            print(f"\n--- Melatih {name} ---")
            model.fit(X_train, y_train)
            y_pred = model.predict(X_test)
            report = classification_report(y_test, y_pred, target_names=[f'Cluster {i}' for i in sorted(set(y))])
            print(report)
            f1_avg = classification_report(y_test, y_pred, output_dict=True)['macro avg']['f1-score']
            if f1_avg > best_model_score:
                best_model_score = f1_avg
                best_model_name = name
                best_model_instance = model
        except Exception as e:
            print(f"❌ Error melatih {name}: {e}")
    try:
        joblib.dump(models["Decision Tree"], 'decision_tree_model.h5')
        print("\n✅ Model Decision Tree telah disimpan sebagai 'decision_tree_model.h5'.")
    except Exception as e:
        print(f"❌ Gagal menyimpan Decision Tree: {e}")
    if best_model_instance is not None:
        try:
            explore_model_filename = f'explore_{best_model_name.replace(" ", "")}_classification.h5'
            joblib.dump(best_model_instance, explore_model_filename)
            print(f"✅ Model eksplorasi terbaik ({best_model_name}) telah disimpan sebagai '{explore_model_filename}'.")
        except Exception as e:
            print(f"❌ Gagal menyimpan model terbaik: {e}")
else:
    print("❌ Data latih/tes tidak tersedia, proses klasifikasi dihentikan.")

# 3. Hyperparameter Tuning pada Model Terbaik
if best_model_instance is not None and best_model_name:
    print(f"\n3. Melakukan Hyperparameter Tuning pada {best_model_name}...")
    param_grid = {
        'n_estimators': [100, 200],
        'max_depth': [10, 20, None],
        'min_samples_leaf': [1, 2],
        'criterion': ['gini', 'entropy']
    } if best_model_name == "Random Forest" else {}
    if param_grid:
        try:
            grid_search = GridSearchCV(estimator=best_model_instance, param_grid=param_grid, cv=3, n_jobs=-1, verbose=1, scoring='f1_macro')
            grid_search.fit(X_train, y_train)
            print(f"\nParameter terbaik ditemukan: {grid_search.best_params_}")
            tuned_model = grid_search.best_estimator_
            print("\nLaporan klasifikasi untuk model yang sudah di-tuning:")
            y_pred_tuned = tuned_model.predict(X_test)
            print(classification_report(y_test, y_pred_tuned, target_names=[f'Cluster {i}' for i in sorted(set(y))]))
            joblib.dump(tuned_model, 'tuning_classification.h5')
            print("✅ Model hasil tuning telah disimpan sebagai 'tuning_classification.h5'.")
        except Exception as e:
            print(f"❌ Error tuning model: {e}")
    else:
        print("(Model terbaik bukan Random Forest, tuning dilewati atau sesuaikan param_grid.)")
else:
    print("❌ Tidak ada model terbaik untuk tuning.")

print("\n--- Proyek Selesai! ---")


Memulai Kriteria 5: Membangun Model Klasifikasi
❌ Error pada persiapan data klasifikasi: name 'kmeans' is not defined
❌ Data latih/tes tidak tersedia, proses klasifikasi dihentikan.
❌ Tidak ada model terbaik untuk tuning.

--- Proyek Selesai! ---
