In [None]:
#%% [code]
import os
import json
import time
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm
from imblearn.over_sampling import SMOTE
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score,
    roc_auc_score, log_loss, matthews_corrcoef, confusion_matrix
)
import tensorflow as tf
from tensorflow.keras import layers, Sequential, Model
from tensorflow.keras.layers import (
    Input, LSTM, GRU, SimpleRNN, Conv1D, MaxPooling1D,
    GlobalAveragePooling1D, Dense, UpSampling1D, Concatenate, Bidirectional
)
from pandas.plotting import parallel_coordinates

In [None]:

# Ensure plot directory exists
os.makedirs("plot", exist_ok=True)

#---------------------------
# Custom EarlyStopping
#---------------------------
class ConsecutiveEarlyStopping(tf.keras.callbacks.Callback):
    def __init__(self, monitor='accuracy', threshold=0.99, patience=5, verbose=1):
        super().__init__()
        self.monitor = monitor
        self.threshold = threshold
        self.patience = patience
        self.verbose = verbose
        self.counter = 0

    def on_epoch_end(self, epoch, logs=None):
        val = logs.get(self.monitor)
        if val is None:
            return
        if val > self.threshold:
            self.counter += 1
            if self.counter >= self.patience:
                if self.verbose:
                    print(f"\nEpoch {epoch+1}: {self.monitor} > {self.threshold}"
                          f" for {self.patience} consecutive epochs. Stopping.")
                self.model.stop_training = True
        else:
            self.counter = 0

#---------------------------
# Data Loading & Preprocessing
#---------------------------
def load_and_preprocess():
    df_feat = pd.read_csv("cdf.csv")
    df_lab  = pd.read_csv("cdl.csv")
    rows_per = 100
    n_feat   = df_feat.shape[1]
    n_samp   = df_feat.shape[0] // rows_per

    X_flat, y = [], []
    for i in range(n_samp):
        block = df_feat.iloc[i*rows_per:(i+1)*rows_per].values
        X_flat.append(block.flatten())
        y.append(df_lab.iloc[i,0])
    X_flat = np.array(X_flat)
    y      = np.array(y)

    # SMOTE balancing
    sm = SMOTE(random_state=42)
    X_res, y_res = sm.fit_resample(X_flat, y)
    n_new = X_res.shape[0]

    # Save reshaped feature CSV (100x6 blocks stacked)
    X_blocks = X_res.reshape(n_new, rows_per, n_feat)
    X2 = X_blocks.reshape(n_new*rows_per, n_feat)
    pd.DataFrame(X2, columns=df_feat.columns).to_csv("final_f.csv", index=False)
    pd.DataFrame(y_res, columns=["label"]).to_csv("final_l.csv", index=False)

    return X_res, y_res, rows_per, n_feat

#---------------------------
# Feature Scaling
#---------------------------
def scale_features(X):
    scaler = MinMaxScaler((0,1))
    Xs = scaler.fit_transform(X)
    params = {
        "min_": scaler.min_.tolist(),
        "scale_": scaler.scale_.tolist(),
        "data_min_": scaler.data_min_.tolist(),
        "data_max_": scaler.data_max_.tolist(),
        "data_range_": scaler.data_range_.tolist()
    }
    with open("scaler1.json","w") as f:
        json.dump(params, f, indent=4)
    return Xs

#---------------------------
# Model Factory (15 variants)
#---------------------------
def create_model(idx, input_shape):
    opt = tf.keras.optimizers.Adam(learning_rate=0.005)
    # Sequential models
    if idx == 0:
        m = Sequential(name="LSTM_Basic")
        m.add(LSTM(32, return_sequences=True, input_shape=input_shape))
        m.add(LSTM(16))
    elif idx == 1:
        m = Sequential(name="GRU_Basic")
        m.add(GRU(32, return_sequences=True, input_shape=input_shape))
        m.add(GRU(16))
    elif idx == 2:
        m = Sequential(name="RNN_Basic")
        m.add(SimpleRNN(32, return_sequences=True, input_shape=input_shape))
        m.add(SimpleRNN(16))
    elif idx == 3:
        m = Sequential(name="CNN_1D")
        m.add(Conv1D(32, 3, activation='relu', input_shape=input_shape))
        m.add(MaxPooling1D(2))
        m.add(Conv1D(64, 3, activation='relu'))
        m.add(GlobalAveragePooling1D())
    elif idx == 4:
        m = Sequential(name="CNN_LSTM")
        m.add(Conv1D(32, 3, activation='relu', input_shape=input_shape))
        m.add(MaxPooling1D(2))
        m.add(LSTM(32))
    elif idx == 5:
        m = Sequential(name="CNN_GRU")
        m.add(Conv1D(32, 3, activation='relu', input_shape=input_shape))
        m.add(MaxPooling1D(2))
        m.add(GRU(32))
    elif idx == 6:
        m = Sequential(name="CNN_RNN")
        m.add(Conv1D(32, 3, activation='relu', input_shape=input_shape))
        m.add(MaxPooling1D(2))
        m.add(SimpleRNN(32))
    elif idx == 7:
        m = Sequential(name="Deeper_CNN")
        m.add(Conv1D(32,3,activation='relu',input_shape=input_shape))
        m.add(Conv1D(32,3,activation='relu'))
        m.add(MaxPooling1D(2))
        m.add(Conv1D(64,3,activation='relu'))
        m.add(MaxPooling1D(2))
        m.add(GlobalAveragePooling1D())
    elif idx == 8:
        m = Sequential(name="Stacked_LSTM")
        m.add(LSTM(64,return_sequences=True,input_shape=input_shape))
        m.add(LSTM(64,return_sequences=True))
        m.add(LSTM(32))
    elif idx == 9:
        m = Sequential(name="Stacked_GRU")
        m.add(GRU(64,return_sequences=True,input_shape=input_shape))
        m.add(GRU(64,return_sequences=True))
        m.add(GRU(32))
    elif idx == 10:
        m = Sequential(name="CNN_BiLSTM")
        m.add(Conv1D(32, 3, activation='relu', input_shape=input_shape))
        m.add(MaxPooling1D(2))
        m.add(Bidirectional(LSTM(32)))
    elif idx == 11:
        m = Sequential(name="CNN_BiGRU")
        m.add(Conv1D(32, 3, activation='relu', input_shape=input_shape))
        m.add(MaxPooling1D(2))
        m.add(Bidirectional(GRU(32)))
    elif idx == 14:
        m = Sequential(name="FCN")
        m.add(Conv1D(128,8,activation='relu',padding='same',input_shape=input_shape))
        m.add(Conv1D(256,5,activation='relu',padding='same'))
        m.add(Conv1D(128,3,activation='relu',padding='same'))
        m.add(GlobalAveragePooling1D())
    # Functional models
    elif idx == 12:
        inp = Input(shape=input_shape)
        c1 = Conv1D(16,3,activation='relu',padding='same')(inp)
        p1 = MaxPooling1D(2)(c1)
        c2 = Conv1D(32,3,activation='relu',padding='same')(p1)
        p2 = MaxPooling1D(2)(c2)
        bn = Conv1D(64,3,activation='relu',padding='same')(p2)
        u2 = UpSampling1D(2)(bn)
        cat2 = Concatenate()([u2, c2])
        c3 = Conv1D(32,3,activation='relu',padding='same')(cat2)
        u1 = UpSampling1D(2)(c3)
        cat1 = Concatenate()([u1, c1])
        c4 = Conv1D(16,3,activation='relu',padding='same')(cat1)
        gap = GlobalAveragePooling1D()(c4)
        out = Dense(1, activation='sigmoid')(gap)
        m = Model(inputs=inp, outputs=out, name="UNet_1D")
    elif idx == 13:
        inp = Input(shape=input_shape)
        x = Conv1D(32,3,activation='relu',padding='same')(inp)
        sc = x
        x = Conv1D(32,3,activation='relu',padding='same')(x)
        x = layers.Add()([x, sc])
        x = MaxPooling1D(2)(x)
        x = GlobalAveragePooling1D()(x)
        out = Dense(1, activation='sigmoid')(x)
        m = Model(inputs=inp, outputs=out, name="ResNet_1D")
    else:
        raise ValueError("Model index must be between 0 and 14.")

    # Add final dense for sequential
    if isinstance(m, Sequential):
        m.add(Dense(1, activation='sigmoid'))

    m.compile(optimizer=opt, loss='binary_crossentropy', metrics=['accuracy'])
    return m

#---------------------------
# Training & Metrics Collection
#---------------------------
def train_and_collect(models, X, y, rows, n_feat):
    records = []
    for iteration in range(20):
        X_seq = X.reshape(-1, rows, n_feat)
        X_tr, X_temp, y_tr, y_temp = train_test_split(X_seq, y, test_size=0.2, random_state=iteration)
        X_val, X_te, y_val, y_te = train_test_split(X_temp, y_temp, test_size=0.5, random_state=iteration)
        for idx, model in enumerate(models):
            cb = ConsecutiveEarlyStopping()
            model.fit(
                X_tr, y_tr,
                epochs=200,
                batch_size=16,
                validation_data=(X_val, y_val),
                callbacks=[cb],
                verbose=0
            )
            prob = model.predict(X_te)
            pred = (prob > 0.5).astype(int).flatten()
            tn, fp, fn, tp = confusion_matrix(y_te, pred).ravel()
            records.append({
                'model_index': idx,
                'accuracy': accuracy_score(y_te, pred),
                'precision': precision_score(y_te, pred, zero_division=0),
                'recall': recall_score(y_te, pred, zero_division=0),
                'f1_score': f1_score(y_te, pred, zero_division=0),
                'roc_auc': roc_auc_score(y_te, prob) if len(np.unique(y_te))>1 else np.nan,
                'mcc': matthews_corrcoef(y_te, pred),
                'specificity': tn/(tn+fp) if (tn+fp)>0 else np.nan,
                'log_loss': log_loss(y_te, prob)
            })
    return pd.DataFrame(records)

#---------------------------
# Aggregation & Plotting
#---------------------------
def aggregate_and_save(df):
    metrics = ['accuracy','precision','recall','f1_score','roc_auc','mcc','specificity','log_loss']
    agg = df.groupby('model_index')[metrics].mean().reset_index()
    agg.to_csv('eval.csv', index=False)
    return agg

def plot_aggregated(agg):
    agg['inv_log_loss'] = 1/(1+agg['log_loss'])
    labels = ['accuracy','precision','recall','f1_score','roc_auc','mcc','specificity','inv_log_loss']
    # Heatmap
    plt.figure(figsize=(10,6))
    sns.heatmap(agg.set_index('model_index')[labels], annot=True, cmap='YlGnBu', fmt='.3f')
    plt.title('Heatmap of Average Metrics per Model')
    plt.savefig('plot/heatmap_avg.png')
    plt.close()
    # Radar Chart
    angles = np.linspace(0,2*np.pi,len(labels),endpoint=False).tolist(); angles += angles[:1]
    fig, ax = plt.subplots(figsize=(8,8), subplot_kw=dict(polar=True))
    for _,row in agg.iterrows():
        vals = row[labels].tolist(); vals += vals[:1]
        ax.plot(angles, vals, label=f"Model {int(row['model_index'])}")
        ax.fill(angles, vals, alpha=0.1)
    ax.set_xticks(angles[:-1]); ax.set_xticklabels(labels)
    ax.set_title('Radar Chart of Average Metrics per Model')
    ax.legend(bbox_to_anchor=(1.1,1.1)); plt.savefig('plot/radar_avg.png'); plt.close()
    # Parallel Coordinates
    pc = agg.copy(); pc['model'] = pc['model_index'].astype(str)
    plt.figure(figsize=(12,6))
    parallel_coordinates(pc[['model']+labels],'model'); plt.title('Parallel Coordinates Plot');
    plt.savefig('plot/parallel_avg.png'); plt.close()
    # Grouped Bar
    melt = agg.melt(id_vars='model_index',value_vars=labels,var_name='metric',value_name='value')
    plt.figure(figsize=(12,6))
    sns.barplot(data=melt, x='metric', y='value', hue='model_index'); plt.title('Grouped Bar Chart')
    plt.savefig('plot/grouped_bar_avg.png'); plt.close()

#---------------------------
# Main Execution
#---------------------------
def main():
    X, y, rows, n_feat = load_and_preprocess()
    print('Data loaded and preprocessed.')
    Xs = scale_features(X); print('Features scaled.')
    models = [create_model(i, (rows, n_feat)) for i in range(15)]; print('Models initialized.')
    df_metrics = train_and_collect(models, Xs, y, rows, n_feat); print('Training complete.')
    agg = aggregate_and_save(df_metrics); print('Aggregated metrics saved.')
    plot_aggregated(agg); print('Plots saved in plot/')

if __name__ == '__main__':
    main()

#%% [markdown]
# Instructions:
# 1. Ensure `cdf.csv` and `cdl.csv` exist.
# 2. Run this cell in Jupyter.
# 3. Check outputs in working directory and `plot/` folder.
