In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import numpy as np
import pandas as pd
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import MinMaxScaler
from sklearn.feature_selection import SelectKBest, mutual_info_classif
from imblearn.over_sampling import SMOTE
from sklearn.metrics import f1_score
from sklearn.linear_model import LogisticRegression


class DNN:
    def __init__(self, input_dim, hidden_layers,
                 activation="relu", output_activation="sigmoid",
                 learning_rate=0.001, alpha=0.5, gamma=2.0, loss=None,
                 optimizer="adam"):
        self.input_dim = input_dim
        self.hidden_layers = hidden_layers
        self.activation = activation
        self.output_activation = output_activation
        self.learning_rate = learning_rate
        self.alpha = alpha
        self.gamma = gamma
        self.optimizer = optimizer
        self.loss = loss if loss is not None else self.focal_loss
        self.model = self._build_model()

    def focal_loss(self, y_true, y_pred):
        y_true = tf.cast(y_true, tf.float32)
        y_pred = tf.clip_by_value(y_pred, 1e-7, 1 - 1e-7)
        cross_entropy = -(y_true * tf.math.log(y_pred) +
                          (1 - y_true) * tf.math.log(1 - y_pred))
        prob_t = y_true * y_pred + (1 - y_true) * (1 - y_pred)
        focal_weight = self.alpha * tf.pow(1 - prob_t, self.gamma)
        return tf.reduce_mean(focal_weight * cross_entropy)

    def _get_optimizer(self):
        if isinstance(self.optimizer, str):
            opt = self.optimizer.lower()
            if opt == "adam":
                return keras.optimizers.Adam(learning_rate=self.learning_rate)
            elif opt == "sgd":
                return keras.optimizers.SGD(learning_rate=self.learning_rate, momentum=0.9)
            elif opt == "rmsprop":
                return keras.optimizers.RMSprop(learning_rate=self.learning_rate)
            elif opt == "adagrad":
                return keras.optimizers.Adagrad(learning_rate=self.learning_rate)
            else:
                raise ValueError(f"Optimizer '{self.optimizer}' không được hỗ trợ!")
        else:
            return self.optimizer

    def _build_model(self):
        model = keras.Sequential()
        model.add(layers.Input(shape=(self.input_dim,)))
        for units in self.hidden_layers:
            model.add(layers.Dense(units, activation=self.activation))
        model.add(layers.Dense(1, activation=self.output_activation))
        model.compile(optimizer=self._get_optimizer(),
                      loss=self.loss,
                      metrics=["accuracy"])
        return model

    def fit(self, X_train, y_train, epochs=20, batch_size=32, validation_split=0.2, verbose=0):
        return self.model.fit(X_train, y_train,
                              epochs=epochs,
                              batch_size=batch_size,
                              validation_split=validation_split,
                              verbose=verbose,
                              shuffle=True)

    def predict(self, X):
        return self.model.predict(X, verbose=0)


class DNNStacking:
    def __init__(self, model1_params, model2_params):
        self.model1 = DNN(**model1_params)
        self.model2 = DNN(**model2_params)
        self.meta = LogisticRegression(max_iter=1000)

    def fit(self, X_train, y_train, epochs=30, batch_size=32, verbose=0):
        self.model1.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, verbose=verbose)
        self.model2.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, verbose=verbose)

        pred1 = self.model1.predict(X_train).reshape(-1, 1)
        pred2 = self.model2.predict(X_train).reshape(-1, 1)
        X_meta = np.hstack([pred1, pred2])
        self.meta.fit(X_meta, y_train)

    def predict(self, X):
        pred1 = self.model1.predict(X).reshape(-1, 1)
        pred2 = self.model2.predict(X).reshape(-1, 1)
        X_meta = np.hstack([pred1, pred2])
        return self.meta.predict(X_meta)

NASAfile = ['CM1', 'KC1', 'KC3', 'MC1', 'MC2', 'MW1',
            'PC1', 'PC3', 'PC4', 'PC5']

results = {}

for file in NASAfile:
    data = pd.read_csv(f"C:/NASA/{file}.csv")
    data['Defective'] = data['Defective'].map({'N': 0, 'Y': 1})

    X = data.drop(['Defective'], axis=1).values
    y = data['Defective'].values

    # Chuẩn hóa
    scaler = MinMaxScaler()
    X = scaler.fit_transform(X)

    skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)
    f1_scores = []

    for train_idx, test_idx in skf.split(X, y):
        X_train, X_test = X[train_idx], X[test_idx]
        y_train, y_test = y[train_idx], y[test_idx]

        # chọn 10 đặc trưng tốt nhất
        selector = SelectKBest(score_func=mutual_info_classif, k=10)
        X_train = selector.fit_transform(X_train, y_train)
        X_test = selector.transform(X_test)

        # SMOTE
        smote = SMOTE(random_state=42)
        X_train, y_train = smote.fit_resample(X_train, y_train)

        # tham số DNN
        params1 = {"input_dim": X_train.shape[1], "hidden_layers": [128, 64], "learning_rate": 0.001}
        params2 = {"input_dim": X_train.shape[1], "hidden_layers": [256, 128], "learning_rate": 0.001}

        # Train stacking
        stack_model = DNNStacking(params1, params2)
        stack_model.fit(X_train, y_train, epochs=20, batch_size=32, verbose=0)

        y_pred = stack_model.predict(X_test)
        f1_scores.append(f1_score(y_test, y_pred, zero_division=0))

    avg_f1 = np.mean(f1_scores)
    results[file] = avg_f1
    print(f"{file}: F1-score trung bình = {avg_f1:.4f}")

print("\n=== Tổng hợp ===")
for k, v in results.items():
    print(f"{k}: {v:.4f}")


CM1: F1-score trung bình = 0.3801
KC1: F1-score trung bình = 0.4788
KC3: F1-score trung bình = 0.3876
MC1: F1-score trung bình = 0.0830
MC2: F1-score trung bình = 0.6215
MW1: F1-score trung bình = 0.3789
PC1: F1-score trung bình = 0.2889
PC3: F1-score trung bình = 0.4377
PC4: F1-score trung bình = 0.5791
PC5: F1-score trung bình = 0.5162

=== Tổng hợp ===
CM1: 0.3801
KC1: 0.4788
KC3: 0.3876
MC1: 0.0830
MC2: 0.6215
MW1: 0.3789
PC1: 0.2889
PC3: 0.4377
PC4: 0.5791
PC5: 0.5162
