In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import time

from sklearn.preprocessing import LabelEncoder, OneHotEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score,
    cohen_kappa_score, roc_auc_score, confusion_matrix, matthews_corrcoef
)

import tensorflow as tf
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import (
    Dense, Dropout, Flatten, Conv2D, Conv1D, MaxPooling2D, MaxPooling1D,
    AveragePooling1D, BatchNormalization, SpatialDropout1D, LSTM, ReLU,
    Input, Concatenate, Attention
)
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras import regularizers, layers, models


In [3]:

# In retrain_models_for_fold function, add:
early_stop = EarlyStopping(
    monitor='val_loss',
    patience=5,  # Stop if no improvement for 5 epochs
    restore_best_weights=True,
    verbose=0
)

In [5]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pandas as pd

# Load the datasets
dataset_1_group_1 = pd.read_csv('Layer 2 Training EBE.csv')   # Load the first dataset for Group 1
dataset_2_group_1 = pd.read_csv('Layer 2 Test EBE.csv')  # Load the second dataset (external validation) for Group 1

dataset_1_group_2 = pd.read_csv('Layer 2 Train PPSC .csv')  # Load the first dataset for Group 2
dataset_2_group_2 = pd.read_csv('Layer 2 Test PPSC.csv')  # Load the second dataset (external validation) for Group 2

dataset_1_group_3 = pd.read_csv('Layer 2 Training SBE.csv')  # Load the first dataset for Group 3
dataset_2_group_3 = pd.read_csv('Layer 2 Test SBE.csv')  # Load the second dataset (external validation) for Group 3

# Splitting the datasets into train-test parts (80-20 split)
X_train_1, X_test_1, y_train_1, y_test_1 = train_test_split(dataset_1_group_1.iloc[:, :-1], dataset_1_group_1.iloc[:, -1], test_size=0.1, random_state=42)
X_train_2, X_test_2, y_train_2, y_test_2 = train_test_split(dataset_1_group_2.iloc[:, :-1], dataset_1_group_2.iloc[:, -1], test_size=0.1, random_state=42)
X_train_3, X_test_3, y_train_3, y_test_3 = train_test_split(dataset_1_group_3.iloc[:, :-1], dataset_1_group_3.iloc[:, -1], test_size=0.1, random_state=42)

# Preparing external validation datasets
X_external_1, y_external_1 = dataset_2_group_1.iloc[:, :-1], dataset_2_group_1.iloc[:, -1]
X_external_2, y_external_2 = dataset_2_group_2.iloc[:, :-1], dataset_2_group_2.iloc[:, -1]
X_external_3, y_external_3 = dataset_2_group_3.iloc[:, :-1], dataset_2_group_3.iloc[:, -1]

# Initialize StandardScaler
scaler_1 = StandardScaler()
scaler_2 = StandardScaler()
scaler_3 = StandardScaler()

# Standardize the datasets for Group 1
X_train_1 = scaler_1.fit_transform(X_train_1)
X_test_1 = scaler_1.transform(X_test_1)
X_external_1 = scaler_1.transform(X_external_1)

# Standardize the datasets for Group 2
X_train_2 = scaler_2.fit_transform(X_train_2)
X_test_2 = scaler_2.transform(X_test_2)
X_external_2 = scaler_2.transform(X_external_2)

# Standardize the datasets for Group 3
X_train_3 = scaler_3.fit_transform(X_train_3)
X_test_3 = scaler_3.transform(X_test_3)
X_external_3 = scaler_3.transform(X_external_3)

# Organize datasets for each group of models
datasets = [
    (X_train_1, X_test_1, y_train_1, y_test_1, X_external_1, y_external_1),  # Group 1
    (X_train_2, X_test_2, y_train_2, y_test_2, X_external_2, y_external_2),  # Group 2
    (X_train_3, X_test_3, y_train_3, y_test_3, X_external_3, y_external_3)   # Group 3
]

# Now `datasets` contains standardized datasets for all groups.


In [6]:
# Assuming all variables are defined
print("Group 1:")
print(f"X_train_1 shape: {X_train_1.shape}")
print(f"X_test_1 shape: {X_test_1.shape}")
print(f"y_train_1 shape: {y_train_1.shape}")
print(f"y_test_1 shape: {y_test_1.shape}")
print(f"X_external_1 shape: {X_external_1.shape}")
print(f"y_external_1 shape: {y_external_1.shape}")

print("\nGroup 2:")
print(f"X_train_2 shape: {X_train_2.shape}")
print(f"X_test_2 shape: {X_test_2.shape}")
print(f"y_train_2 shape: {y_train_2.shape}")
print(f"y_test_2 shape: {y_test_2.shape}")
print(f"X_external_2 shape: {X_external_2.shape}")
print(f"y_external_2 shape: {y_external_2.shape}")

print("\nGroup 3:")
print(f"X_train_3 shape: {X_train_3.shape}")
print(f"X_test_3 shape: {X_test_3.shape}")
print(f"y_train_3 shape: {y_train_3.shape}")
print(f"y_test_3 shape: {y_test_3.shape}")
print(f"X_external_3 shape: {X_external_3.shape}")
print(f"y_external_3 shape: {y_external_3.shape}")


Group 1:
X_train_1 shape: (630, 150)
X_test_1 shape: (70, 150)
y_train_1 shape: (630,)
y_test_1 shape: (70,)
X_external_1 shape: (177, 150)
y_external_1 shape: (177,)

Group 2:
X_train_2 shape: (630, 84)
X_test_2 shape: (70, 84)
y_train_2 shape: (630,)
y_test_2 shape: (70,)
X_external_2 shape: (177, 84)
y_external_2 shape: (177,)

Group 3:
X_train_3 shape: (630, 420)
X_test_3 shape: (70, 420)
y_train_3 shape: (630,)
y_test_3 shape: (70,)
X_external_3 shape: (177, 420)
y_external_3 shape: (177,)


In [7]:
# Define the number of features (timesteps) and reshape accordingly
def create_input_shape(num_features):
    return (num_features, 1)

# Dataset 1
num_features_1 = X_train_1.shape[1]  # Number of features
input_shape_1 = create_input_shape(num_features_1)

# Dataset 2
num_features_2 = X_train_2.shape[1]  # Number of features
input_shape_2 = create_input_shape(num_features_2)

# Dataset 3
num_features_3 = X_train_3.shape[1]  # Number of features
input_shape_3 = create_input_shape(num_features_3)

# Print input shapes
print(f"Input shape for Group 1: {input_shape_1}")
print(f"Input shape for Group 2: {input_shape_2}")
print(f"Input shape for Group 3: {input_shape_3}")



Input shape for Group 1: (150, 1)
Input shape for Group 2: (84, 1)
Input shape for Group 3: (420, 1)


In [20]:
#Complie this part after compilation of all 6CNNs and 36 branch classifiers.

# Define the models with the correct input shapes for each group
models_group_1 = [
    model1, model1c1, model1c2, model1c3, model1c4,
    model2, model2c1, model2c2, model2c3, model2c4, model2c5, model2c6
]

models_group_2 = [
    model3, model3c1, model3c2, model3c3, model3c4,
    model4, model4c1, model4c2, model4c3, model4c4, model4c5, model4c6
]

models_group_3 = [
    model5, model5c1, model5c2, model5c3, model5c4,
    model6, model6c1, model6c2, model6c3, model6c4, model6c5, model6c6
]

# Combine all models
models = models_group_1 + models_group_2 + models_group_3



In [21]:
print(X_test_1.shape)
print(X_test_2.shape)
print(X_test_3.shape)
print(X_external_1.shape)
print(X_external_2.shape)
print(X_external_3.shape)
print(y_test_1.shape)
print(y_test_2.shape)
print(y_test_3.shape)
print(y_external_1.shape)
print(y_external_2.shape)
print(y_external_3.shape)


(70, 150)
(70, 84)
(70, 420)
(177, 150)
(177, 84)
(177, 420)
(70,)
(70,)
(70,)
(177,)
(177,)
(177,)
