This will implement reduce overfitting, not done

In [1]:
import tensorflow as tf
import numpy as np
import pandas as pd
from sklearn.metrics import confusion_matrix, matthews_corrcoef, accuracy_score, balanced_accuracy_score
from sklearn.model_selection import KFold
from sklearn.preprocessing import RobustScaler
import matplotlib.pyplot as plt
import random

In [2]:
# Set random seeds for reproducibility
SEED = 42
random.seed(SEED)
np.random.seed(SEED)
tf.random.set_seed(SEED)
tf.keras.utils.set_random_seed(SEED)  # This sets all random seeds in keras
tf.config.experimental.enable_op_determinism()  # For complete reproducibility

In [3]:
def prepare_sequence_data(df):
    """Convert sequences to integer encoding"""
    alphabet = 'ARNDCQEGHILKMFPSTWYV-'
    char_to_int = dict((c, i) for i, c in enumerate(alphabet))
    
    sequences = df['sequence'].values
    encodings = []
    
    for seq in sequences:
        try:
            integer_encoded = [char_to_int[char] for char in seq]
            encodings.append(integer_encoded)
        except Exception as e:
            print(f"Error processing sequence: {e}")
            continue
    
    return np.array(encodings)

In [4]:
def prepare_structure_data(df):
    """Enhanced feature preparation with better normalization"""
    
    # Normalize angles to their circular nature
    def normalize_angles(angle_array):
        angle_rad = np.pi * angle_array / 180.0
        return np.stack([np.sin(angle_rad), np.cos(angle_rad)], axis=-1)
    
    # Process each feature type
    features_list = []
    
    # 1. Process angles (phi, psi, omega, tau)
    angles = ['phi', 'psi', 'omega', 'tau']
    for angle in angles:
        # Convert string to array
        angle_arrays = np.array([np.array(eval(x)) for x in df[angle]])
        # Get sin/cos representations
        angle_features = normalize_angles(angle_arrays)
        features_list.append(angle_features)
    
    # 2. Process SASA
    sasa_arrays = np.array([np.array(eval(x)) for x in df['sasa']])
    scaler = RobustScaler()
    sasa_flat = sasa_arrays.reshape(-1, 1)
    sasa_scaled = scaler.fit_transform(sasa_flat).reshape(sasa_arrays.shape)
    features_list.append(sasa_scaled[..., np.newaxis])
    
    # 3. Process secondary structure
    ss_arrays = np.array([list(seq) for seq in df['ss']])
    ss_encoded = np.zeros((len(ss_arrays), ss_arrays.shape[1], 3))
    ss_map = {'H': 0, 'E': 1, 'L': 2}
    for i in range(len(ss_arrays)):
        for j in range(len(ss_arrays[i])):
            ss_encoded[i, j, ss_map[ss_arrays[i, j]]] = 1
    features_list.append(ss_encoded)
    
    # Combine all features
    features = np.concatenate(features_list, axis=-1)
    
    return features

In [5]:
def create_combined_model(seq_length=33, struct_features=12, struct_window=0):
    """Create simpler model with both sequence and structure tracks"""
    middle_pos = seq_length // 2
    
    # Simplified Sequence track
    seq_input = tf.keras.layers.Input(shape=(seq_length,), name='sequence_input')
    x_seq = tf.keras.layers.Embedding(21, 21, input_length=seq_length)(seq_input)
    x_seq = tf.keras.layers.Conv1D(32, kernel_size=3, activation='relu')(x_seq)
    x_seq = tf.keras.layers.GlobalMaxPooling1D()(x_seq)
    x_seq = tf.keras.layers.Dense(32, activation='relu')(x_seq)
    x_seq = tf.keras.layers.Dropout(0.3)(x_seq)
    
    # Simplified Structure track
    struct_input = tf.keras.layers.Input(shape=(seq_length, struct_features), name='structure_input')
    
    # Extract middle position(s)
    if struct_window == 0:
        x_struct = tf.keras.layers.Lambda(
            lambda x: x[:, middle_pos:middle_pos+1, :]
        )(struct_input)
    else:
        x_struct = tf.keras.layers.Lambda(
            lambda x: x[:, middle_pos-struct_window:middle_pos+struct_window+1, :]
        )(struct_input)
    
    x_struct = tf.keras.layers.Conv1D(32, 3, padding='same', activation='relu')(x_struct)
    x_struct = tf.keras.layers.GlobalMaxPooling1D()(x_struct)
    x_struct = tf.keras.layers.Dense(32, activation='relu')(x_struct)
    x_struct = tf.keras.layers.Dropout(0.3)(x_struct)
    
    # Simple weight learning
    track_weights = tf.keras.layers.Dense(2, activation='softmax', name='track_weights')(
        tf.keras.layers.Concatenate()([x_seq, x_struct])
    )
    
    # Apply weights
    weighted_seq = tf.keras.layers.Multiply()([
        x_seq,
        tf.keras.layers.Lambda(lambda x: x[:, 0:1])(track_weights)
    ])
    
    weighted_struct = tf.keras.layers.Multiply()([
        x_struct,
        tf.keras.layers.Lambda(lambda x: x[:, 1:2])(track_weights)
    ])
    
    # Simple combination
    combined = tf.keras.layers.Concatenate()([weighted_seq, weighted_struct])
    
    # Single dense layer for final prediction
    outputs = tf.keras.layers.Dense(1, activation='sigmoid')(combined)
    
    model = tf.keras.Model(inputs=[seq_input, struct_input], outputs=outputs)
    
    def get_track_weights():
        weights = model.get_layer('track_weights').get_weights()
        if len(weights) > 0:
            w = weights[0]
            b = weights[1] if len(weights) > 1 else 0
            exp_weights = np.exp(np.mean(w, axis=0) + b)
            normalized = exp_weights / np.sum(exp_weights)
            return {
                'sequence': float(normalized[0]),
                'structure': float(normalized[1])
            }
        return {'sequence': 0.5, 'structure': 0.5}
    
    model.get_track_weights = get_track_weights
    
    return model

In [10]:
def train_and_evaluate(struct_window=0):
    """Training function with improved regularization and training strategy"""
    # Load and prepare data (keep existing code)
    print("Loading data...")
    train_df = pd.read_csv("../data/processed_data_train_after.csv")
    test_df = pd.read_csv("../data/processed_data_test_after.csv")
    
    # Prepare data (keep existing data preparation code)
    X_train_seq = prepare_sequence_data(train_df)
    X_test_seq = prepare_sequence_data(test_df)
    X_train_struct = prepare_structure_data(train_df)
    X_test_struct = prepare_structure_data(test_df)
    
    y_train = train_df['label'].values
    y_test = test_df['label'].values
    
    # Calculate class weights (keep existing code)
    total_samples = len(y_train)
    pos_samples = np.sum(y_train == 1)
    neg_samples = np.sum(y_train == 0)
    
    # class_weights = {
    #     0: total_samples / (2 * neg_samples),
    #     1: total_samples / (2 * pos_samples)
    # }
    
    # Enhanced callbacks
    callbacks = [
        # Early stopping with more patience but stricter monitoring
        tf.keras.callbacks.EarlyStopping(
            monitor='val_loss',
            patience=10,
            restore_best_weights=True,
            mode='min'
        ),
        # More gradual learning rate reduction
        tf.keras.callbacks.ReduceLROnPlateau(
            monitor='val_loss',
            factor=0.2,  # More gentle reduction
            patience=5,
            min_lr=1e-6,
            mode='min',
            verbose=1
        ),
        # Model checkpoint to save best model
        tf.keras.callbacks.ModelCheckpoint(
            filepath='best_model_weights.keras',  # Changed from .h5 to .keras
            monitor='val_loss',
            save_best_only=True,
            mode='min',
            verbose=1
        )
    ]

    # Also modify class weights calculation for better balance
    class_weights = {
        0: 1,  # Start with equal weights
        1: pos_samples / neg_samples  # Adjust weight for minority class
    }
    
    # Cross validation setup
    kfold = KFold(n_splits=5, shuffle=True, random_state=42)
    metrics = {'acc': [], 'balanced_acc': [], 'mcc': [], 'sn': [], 'sp': []}
    test_predictions = []
    track_weights_history = []
    
    # Print initial shapes
    print("\nInitial data shapes:")
    print(f"X_train_seq shape: {X_train_seq.shape}")
    print(f"X_train_struct shape: {X_train_struct.shape}")
    print(f"y_train shape: {y_train.shape}")
    
    for fold, (train_idx, val_idx) in enumerate(kfold.split(X_train_seq), 1):
        print(f"\nFold {fold}/5")
        
        # Print fold data shapes
        print("\nFold data shapes:")
        print(f"Train seq shape: {X_train_seq[train_idx].shape}")
        print(f"Train struct shape: {X_train_struct[train_idx].shape}")
        print(f"Train labels shape: {y_train[train_idx].shape}")
        print(f"Val seq shape: {X_train_seq[val_idx].shape}")
        print(f"Val struct shape: {X_train_struct[val_idx].shape}")
        print(f"Val labels shape: {y_train[val_idx].shape}")
        
        # Create and compile model
        model = create_combined_model(
            seq_length=33,
            struct_features=X_train_struct.shape[2],
            struct_window=struct_window
        )
        
        # Verify input shapes match model expectations
        print("\nModel input shapes:")
        print(f"Sequence input shape: {model.get_layer('sequence_input').input_shape}")
        print(f"Structure input shape: {model.get_layer('structure_input').input_shape}")
        
        model.compile(
            optimizer=tf.keras.optimizers.Adam(learning_rate=5e-4),
            loss='binary_crossentropy',
            metrics=['accuracy']
        )
        
        # Ensure data types are correct
        X_train_seq = X_train_seq.astype('float32')
        X_train_struct = X_train_struct.astype('float32')
        y_train = y_train.astype('float32')
        
        # Train with validation
        try:
            history = model.fit(
                [X_train_seq[train_idx], X_train_struct[train_idx]],
                y_train[train_idx],
                validation_data=(
                    [X_train_seq[val_idx], X_train_struct[val_idx]],
                    y_train[val_idx]
                ),
                batch_size=32,
                epochs=50,
                callbacks=callbacks,
                class_weight=class_weights,
                verbose=1
            )
        except Exception as e:
            print(f"\nError during training: {str(e)}")
            print("\nData information:")
            print(f"Training data types: {X_train_seq.dtype}, {X_train_struct.dtype}")
            print(f"Label data type: {y_train.dtype}")
            raise e
        
        # Plot training curves
        plt.figure(figsize=(12, 4))
        
        # Loss subplot
        plt.subplot(1, 2, 1)
        plt.plot(history.history['loss'], label='Training Loss')
        plt.plot(history.history['val_loss'], label='Validation Loss')
        plt.title(f'Model Loss - Fold {fold}')
        plt.xlabel('Epoch')
        plt.ylabel('Loss')
        plt.legend()
        
        # Accuracy subplot
        plt.subplot(1, 2, 2)
        plt.plot(history.history['accuracy'], label='Training Accuracy')
        plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
        plt.title(f'Model Accuracy - Fold {fold}')
        plt.xlabel('Epoch')
        plt.ylabel('Accuracy')
        plt.legend()
        
        plt.tight_layout()
        plt.show()
        
        # Evaluate on validation set
        y_pred = model.predict([X_train_seq[val_idx], X_train_struct[val_idx]])
        y_pred_binary = (y_pred > 0.5).astype(int)
        
        # Calculate metrics
        cm = confusion_matrix(y_train[val_idx], y_pred_binary)
        metrics['acc'].append(accuracy_score(y_train[val_idx], y_pred_binary))
        metrics['balanced_acc'].append(balanced_accuracy_score(y_train[val_idx], y_pred_binary))
        metrics['mcc'].append(matthews_corrcoef(y_train[val_idx], y_pred_binary))
        metrics['sn'].append(cm[1][1]/(cm[1][1]+cm[1][0]))
        metrics['sp'].append(cm[0][0]/(cm[0][0]+cm[0][1]))
        
        # Store track weights
        final_weights = model.get_track_weights()
        track_weights_history.append(final_weights)
        
        # Predict on test set
        test_pred = model.predict([X_test_seq, X_test_struct])
        test_predictions.append(test_pred)
        
        # Print fold results
        print(f"\nFold {fold} Results:")
        print(f"Accuracy: {metrics['acc'][-1]:.4f}")
        print(f"Balanced Accuracy: {metrics['balanced_acc'][-1]:.4f}")
        print(f"MCC: {metrics['mcc'][-1]:.4f}")
        print(f"Sensitivity: {metrics['sn'][-1]:.4f}")
        print(f"Specificity: {metrics['sp'][-1]:.4f}")
        print(f"Track weights: Sequence={final_weights['sequence']:.4f}, Structure={final_weights['structure']:.4f}")
    
    # Print final results
    print("\nFinal Cross-validation Results:")
    for metric in metrics:
        print(f"{metric.upper()}: {np.mean(metrics[metric]):.4f} ± {np.std(metrics[metric]):.4f}")
    
    # Calculate and print average track weights
    avg_seq_weight = np.mean([w['sequence'] for w in track_weights_history])
    avg_struct_weight = np.mean([w['structure'] for w in track_weights_history])
    print(f"\nAverage track weights:")
    print(f"Sequence: {avg_seq_weight:.4f} ± {np.std([w['sequence'] for w in track_weights_history]):.4f}")
    print(f"Structure: {avg_struct_weight:.4f} ± {np.std([w['structure'] for w in track_weights_history]):.4f}")
    
    return model

In [11]:
if __name__ == "__main__":
    model = train_and_evaluate()

Loading data...

Initial data shapes:
X_train_seq shape: (8850, 33)
X_train_struct shape: (8850, 33, 12)
y_train shape: (8850,)

Fold 1/5

Fold data shapes:
Train seq shape: (7080, 33)
Train struct shape: (7080, 33, 12)
Train labels shape: (7080,)
Val seq shape: (1770, 33)
Val struct shape: (1770, 33, 12)
Val labels shape: (1770,)

Model input shapes:




AttributeError: 'InputLayer' object has no attribute 'input_shape'

In [13]:
import tensorflow as tf
import numpy as np
import pandas as pd
from sklearn.metrics import confusion_matrix, matthews_corrcoef, accuracy_score, balanced_accuracy_score
from sklearn.model_selection import KFold
from sklearn.preprocessing import RobustScaler
import matplotlib.pyplot as plt
import random

# Set random seeds for reproducibility
SEED = 42
random.seed(SEED)
np.random.seed(SEED)
tf.random.set_seed(SEED)
tf.keras.utils.set_random_seed(SEED)
tf.config.experimental.enable_op_determinism()

def prepare_sequence_data(df):
    """Convert sequences to integer encoding with validation"""
    alphabet = 'ARNDCQEGHILKMFPSTWYV-'
    char_to_int = dict((c, i) for i, c in enumerate(alphabet))
    
    sequences = df['sequence'].values
    encodings = []
    
    for i, seq in enumerate(sequences):
        try:
            if pd.isna(seq):
                print(f"Warning: Found null sequence at index {i}")
                continue
            if not isinstance(seq, str):
                print(f"Warning: Non-string sequence at index {i}: {type(seq)}")
                continue
            integer_encoded = [char_to_int[char] for char in seq]
            encodings.append(integer_encoded)
        except Exception as e:
            print(f"Error processing sequence at index {i}: {str(e)}")
            print(f"Sequence: {seq}")
            continue
    
    encodings = np.array(encodings, dtype=np.float32)
    print(f"Sequence data shape: {encodings.shape}")
    return encodings

def prepare_structure_data(df):
    """Enhanced feature preparation with validation"""
    def normalize_angles(angle_array):
        angle_rad = np.pi * angle_array / 180.0
        return np.stack([np.sin(angle_rad), np.cos(angle_rad)], axis=-1)
    
    features_list = []
    
    # Process angles
    for angle in ['phi', 'psi', 'omega', 'tau']:
        try:
            angle_arrays = np.array([np.array(eval(x)) for x in df[angle]])
            angle_features = normalize_angles(angle_arrays)
            features_list.append(angle_features)
        except Exception as e:
            print(f"Error processing {angle}: {str(e)}")
            raise
    
    # Process SASA
    try:
        sasa_arrays = np.array([np.array(eval(x)) for x in df['sasa']])
        scaler = RobustScaler()
        sasa_flat = sasa_arrays.reshape(-1, 1)
        sasa_scaled = scaler.fit_transform(sasa_flat).reshape(sasa_arrays.shape)
        features_list.append(sasa_scaled[..., np.newaxis])
    except Exception as e:
        print(f"Error processing SASA: {str(e)}")
        raise
    
    # Process secondary structure
    try:
        ss_arrays = np.array([list(seq) for seq in df['ss']])
        ss_encoded = np.zeros((len(ss_arrays), ss_arrays.shape[1], 3))
        ss_map = {'H': 0, 'E': 1, 'L': 2}
        for i in range(len(ss_arrays)):
            for j in range(len(ss_arrays[i])):
                ss_encoded[i, j, ss_map[ss_arrays[i, j]]] = 1
        features_list.append(ss_encoded)
    except Exception as e:
        print(f"Error processing secondary structure: {str(e)}")
        raise
    
    # Combine features
    features = np.concatenate(features_list, axis=-1)
    features = features.astype(np.float32)
    print(f"Structure data shape: {features.shape}")
    return features

def create_combined_model(seq_length=33, struct_features=12, struct_window=0):
    """Create simplified model with both sequence and structure tracks"""
    middle_pos = seq_length // 2
    
    # Sequence track
    seq_input = tf.keras.layers.Input(shape=(seq_length,), name='sequence_input')
    x_seq = tf.keras.layers.Embedding(21, 21, input_length=seq_length)(seq_input)
    x_seq = tf.keras.layers.Conv1D(32, kernel_size=3, activation='relu')(x_seq)
    x_seq = tf.keras.layers.GlobalMaxPooling1D()(x_seq)
    x_seq = tf.keras.layers.Dense(32, activation='relu')(x_seq)
    x_seq = tf.keras.layers.Dropout(0.3)(x_seq)
    
    # Structure track
    struct_input = tf.keras.layers.Input(shape=(seq_length, struct_features), name='structure_input')
    
    if struct_window == 0:
        x_struct = tf.keras.layers.Lambda(
            lambda x: x[:, middle_pos:middle_pos+1, :]
        )(struct_input)
    else:
        x_struct = tf.keras.layers.Lambda(
            lambda x: x[:, middle_pos-struct_window:middle_pos+struct_window+1, :]
        )(struct_input)
    
    x_struct = tf.keras.layers.Conv1D(32, 3, padding='same', activation='relu')(x_struct)
    x_struct = tf.keras.layers.GlobalMaxPooling1D()(x_struct)
    x_struct = tf.keras.layers.Dense(32, activation='relu')(x_struct)
    x_struct = tf.keras.layers.Dropout(0.3)(x_struct)
    
    # Track weights
    track_weights = tf.keras.layers.Dense(
        2, activation='softmax', name='track_weights'
    )(tf.keras.layers.Concatenate()([x_seq, x_struct]))
    
    # Apply weights
    weighted_seq = tf.keras.layers.Multiply()(
        [x_seq, tf.keras.layers.Lambda(lambda x: x[:, 0:1])(track_weights)]
    )
    weighted_struct = tf.keras.layers.Multiply()(
        [x_struct, tf.keras.layers.Lambda(lambda x: x[:, 1:2])(track_weights)]
    )
    
    # Combine and predict
    combined = tf.keras.layers.Concatenate()([weighted_seq, weighted_struct])
    outputs = tf.keras.layers.Dense(1, activation='sigmoid')(combined)
    
    model = tf.keras.Model(inputs=[seq_input, struct_input], outputs=outputs)
    
    def get_track_weights():
        weights = model.get_layer('track_weights').get_weights()
        if len(weights) > 0:
            w = weights[0]
            b = weights[1] if len(weights) > 1 else 0
            exp_weights = np.exp(np.mean(w, axis=0) + b)
            normalized = exp_weights / np.sum(exp_weights)
            return {
                'sequence': float(normalized[0]),
                'structure': float(normalized[1])
            }
        return {'sequence': 0.5, 'structure': 0.5}
    
    model.get_track_weights = get_track_weights
    return model

def train_and_evaluate(struct_window=0):
    """Training function with improved monitoring and validation"""
    # Load data
    print("Loading data...")
    train_df = pd.read_csv("../data/processed_data_train_after.csv")
    test_df = pd.read_csv("../data/processed_data_test_after.csv")
    
    # Prepare data
    X_train_seq = prepare_sequence_data(train_df)
    X_test_seq = prepare_sequence_data(test_df)
    X_train_struct = prepare_structure_data(train_df)
    X_test_struct = prepare_structure_data(test_df)
    
    y_train = train_df['label'].values.astype(np.float32)
    y_test = test_df['label'].values.astype(np.float32)
    
    # Calculate class weights
    total = len(y_train)
    pos = np.sum(y_train == 1)
    neg = np.sum(y_train == 0)
    class_weights = {0: 1.0, 1: neg/pos}
    
    # Callbacks
    callbacks = [
        tf.keras.callbacks.EarlyStopping(
            monitor='val_loss',
            patience=10,
            restore_best_weights=True,
            mode='min'
        ),
        tf.keras.callbacks.ReduceLROnPlateau(
            monitor='val_loss',
            factor=0.2,
            patience=5,
            min_lr=1e-6,
            mode='min',
            verbose=1
        ),
        tf.keras.callbacks.ModelCheckpoint(
            filepath='best_model.keras',
            monitor='val_loss',
            save_best_only=True,
            mode='min',
            verbose=1
        )
    ]
    
    # Cross validation
    kfold = KFold(n_splits=5, shuffle=True, random_state=42)
    metrics = {'acc': [], 'balanced_acc': [], 'mcc': [], 'sn': [], 'sp': []}
    test_predictions = []
    track_weights_history = []
    
    for fold, (train_idx, val_idx) in enumerate(kfold.split(X_train_seq), 1):
        print(f"\nFold {fold}/5")
        
        model = create_combined_model(
            seq_length=33,
            struct_features=X_train_struct.shape[2],
            struct_window=struct_window
        )
        
        model.compile(
            optimizer=tf.keras.optimizers.Adam(learning_rate=5e-4),
            loss='binary_crossentropy',
            metrics=['accuracy']
        )
        
        history = model.fit(
            [X_train_seq[train_idx], X_train_struct[train_idx]],
            y_train[train_idx],
            validation_data=(
                [X_train_seq[val_idx], X_train_struct[val_idx]],
                y_train[val_idx]
            ),
            batch_size=32,
            epochs=50,
            callbacks=callbacks,
            class_weight=class_weights,
            verbose=1
        )
        
        # Evaluate fold
        y_pred = model.predict([X_train_seq[val_idx], X_train_struct[val_idx]])
        y_pred_binary = (y_pred > 0.5).astype(int)
        
        cm = confusion_matrix(y_train[val_idx], y_pred_binary)
        metrics['acc'].append(accuracy_score(y_train[val_idx], y_pred_binary))
        metrics['balanced_acc'].append(balanced_accuracy_score(y_train[val_idx], y_pred_binary))
        metrics['mcc'].append(matthews_corrcoef(y_train[val_idx], y_pred_binary))
        metrics['sn'].append(cm[1][1]/(cm[1][1]+cm[1][0]))
        metrics['sp'].append(cm[0][0]/(cm[0][0]+cm[0][1]))
        
        # Store weights and predictions
        track_weights_history.append(model.get_track_weights())
        test_pred = model.predict([X_test_seq, X_test_struct])
        test_predictions.append(test_pred)
        
        # Print fold results
        print(f"\nFold {fold} Results:")
        for metric in metrics:
            print(f"{metric.upper()}: {metrics[metric][-1]:.4f}")
        print(f"Track weights: {track_weights_history[-1]}")
        
        # Plot training curves
        plt.figure(figsize=(12, 4))
        plt.subplot(1, 2, 1)
        plt.plot(history.history['loss'], label='Training Loss')
        plt.plot(history.history['val_loss'], label='Validation Loss')
        plt.title(f'Loss - Fold {fold}')
        plt.legend()
        
        plt.subplot(1, 2, 2)
        plt.plot(history.history['accuracy'], label='Training Accuracy')
        plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
        plt.title(f'Accuracy - Fold {fold}')
        plt.legend()
        plt.show()
    
    # Print final results
    print("\nFinal Cross-validation Results:")
    for metric in metrics:
        mean_val = np.mean(metrics[metric])
        std_val = np.std(metrics[metric])
        print(f"{metric.upper()}: {mean_val:.4f} ± {std_val:.4f}")
    
    # Calculate ensemble predictions
    test_pred_avg = np.mean(test_predictions, axis=0)
    test_pred_binary = (test_pred_avg > 0.5).astype(int)
    
    # Final metrics
    final_cm = confusion_matrix(y_test, test_pred_binary)
    print("\nFinal Test Set Results:")
    print(f"Accuracy: {accuracy_score(y_test, test_pred_binary):.4f}")
    print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, test_pred_binary):.4f}")
    print(f"MCC: {matthews_corrcoef(y_test, test_pred_binary):.4f}")
    print(f"Sensitivity: {final_cm[1][1]/(final_cm[1][1]+final_cm[1][0]):.4f}")
    print(f"Specificity: {final_cm[0][0]/(final_cm[0][0]+final_cm[0][1]):.4f}")
    
    return model

if __name__ == "__main__":
    model = train_and_evaluate(struct_window=1)

Loading data...
Sequence data shape: (8850, 33)
Sequence data shape: (2737, 33)
Structure data shape: (8850, 33, 12)
Structure data shape: (2737, 33, 12)

Fold 1/5
Epoch 1/50




[1m222/222[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step - accuracy: 0.5315 - loss: 0.6655
Epoch 1: val_loss improved from inf to 0.68917, saving model to best_model.keras


TypeError: Unsupported integer size (0)