# Part I: Prepare the training/test set for protein contact structure prediction

In [None]:
## mount the google drive

from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
## specify the directory with the training and test dataset

#%cd '/content/drive/MyDrive/datasets/'
%cd '/content/drive/MyDrive/homework3_train'

/content/drive/MyDrive/homework3_train


In [None]:
## Unzip the training and test dataset
## Only need to run this once

#!unzip homework3_train.zip
#!unzip homework3_test.zip

In [None]:
# %cd homework3_train

In [None]:
## Load the features

import pickle
feature_file = '1at0A_features.pkl'
features = pickle.load(open(feature_file, 'rb'))
print(features.keys())

dict_keys(['seq', 'ss', 'sa', 'ccmpred', 'freecon', 'entropy', 'potential', 'pssm'])


In [None]:
## Load the labels

import numpy as np
label_file = '1at0A_labels.npy'
(seq_length, seq, distance_map) = np.load(label_file, allow_pickle = True)


In [None]:
# Change non-diagonal values to 1 if they are < 8, otherwise, 0 (but excluding diagonal elements)
mask = (distance_map < 8) & (np.eye(seq_length) == 0)
contact_map = distance_map.copy()
contact_map[mask] = 1
contact_map[~mask] = 0

In [None]:
## Define a data loader function
import os
import pickle
import numpy as np

def data_loader(dataset_dir):

    data = []  # List to store the data for all proteins

    # Iterate through all files in the directory
    for file_name in os.listdir(dataset_dir):
        # Check if the file is a feature file
        if file_name.endswith('_features.pkl'):
            # Get the base name of the file (without the suffix)
            base_name = file_name.split('_features.pkl')[0]

            # Construct paths for the feature and label files
            feature_file = os.path.join(dataset_dir, file_name)
            label_file = os.path.join(dataset_dir, f'{base_name}_labels.npy')

            # Load features from the .pkl file
            with open(feature_file, 'rb') as f:
                features = pickle.load(f)

            # Load labels from the .npy file
            seq_length, seq, distance_map = np.load(label_file, allow_pickle=True)

            # Convert distance map to contact map
            mask = (distance_map < 8) & (np.eye(seq_length) == 0)
            contact_map = distance_map.copy()
            contact_map[mask] = 1
            contact_map[~mask] = 0

            # Append the data for this protein to the list
            data.append({
                'features': features,  # Dictionary of features
                'seq_length': seq_length,  # Length of the protein sequence
                'seq': seq,  # Protein sequence
                'contact_map': contact_map  # Contact map
            })

    return data

In [None]:
## call the data loader function
# training_data = data_loader('/content/drive/MyDrive/datasets/homework3_train')
training_data = data_loader('/content/drive/MyDrive/homework3_train')
training_data[0]

{'features': {'seq': 'HMDINNKARIHWACRRGMRELDISIMPFFEHEYDSLSDDEKRIFIRLLECDDPDLFNWLMNHGKPADAELEMMVRLIQTRNRERGPVAI',
  'ss': array([[0.999, 0.911, 0.836, 0.077, 0.068, 0.031, 0.008, 0.004, 0.004,
          0.004, 0.005, 0.008, 0.021, 0.057, 0.066, 0.056, 0.036, 0.037,
          0.027, 0.018, 0.017, 0.015, 0.027, 0.163, 0.194, 0.025, 0.017,
          0.015, 0.011, 0.027, 0.065, 0.147, 0.14 , 0.265, 0.487, 0.984,
          0.989, 0.012, 0.01 , 0.006, 0.003, 0.003, 0.003, 0.003, 0.003,
          0.005, 0.013, 0.076, 0.412, 0.902, 0.928, 0.966, 0.026, 0.017,
          0.007, 0.003, 0.004, 0.01 , 0.02 , 0.373, 0.949, 0.975, 0.976,
          0.979, 0.99 , 0.99 , 0.258, 0.05 , 0.011, 0.015, 0.009, 0.005,
          0.003, 0.003, 0.003, 0.003, 0.003, 0.003, 0.006, 0.011, 0.037,
          0.017, 0.118, 0.631, 0.978, 0.971, 0.914, 0.924, 0.999],
         [0.   , 0.086, 0.191, 0.928, 0.94 , 0.972, 0.994, 0.997, 0.997,
          0.997, 0.997, 0.994, 0.984, 0.954, 0.944, 0.953, 0.97 , 0.969,
          

In [None]:
print(training_data[0].keys())
print(training_data[0]['features'].keys())

dict_keys(['features', 'seq_length', 'seq', 'contact_map'])
dict_keys(['seq', 'ss', 'sa', 'ccmpred', 'freecon', 'entropy', 'potential', 'pssm'])


In [None]:
print(features)

{'seq': 'CFTPESTALLESGVRKPLGELSIGDRVLSMTANGQAVYSEVILFMDRNLEQMQNFVQLHTDGGAVLTVTPAHLVSVWQPESQKLTFVFADRIEEKNQVLVRDVETGELRPQRVVKVGSVRSKGVVAPLTREGTIVVNSVAASCYA', 'ss': array([[0.999, 0.823, 0.903, 0.984, 0.982, 0.977, 0.067, 0.016, 0.023,
        0.371, 0.942, 0.985, 0.982, 0.966, 0.051, 0.033, 0.036, 0.028,
        0.091, 0.068, 0.858, 0.842, 0.927, 0.974, 0.959, 0.059, 0.039,
        0.11 , 0.162, 0.353, 0.76 , 0.933, 0.958, 0.937, 0.791, 0.114,
        0.039, 0.046, 0.139, 0.07 , 0.026, 0.02 , 0.022, 0.03 , 0.044,
        0.177, 0.897, 0.966, 0.87 , 0.805, 0.732, 0.409, 0.107, 0.037,
        0.031, 0.017, 0.017, 0.019, 0.033, 0.397, 0.947, 0.988, 0.975,
        0.862, 0.074, 0.056, 0.04 , 0.096, 0.837, 0.901, 0.878, 0.531,
        0.206, 0.185, 0.094, 0.119, 0.231, 0.793, 0.938, 0.845, 0.765,
        0.813, 0.661, 0.291, 0.137, 0.127, 0.102, 0.16 , 0.305, 0.517,
        0.325, 0.494, 0.579, 0.429, 0.721, 0.772, 0.143, 0.027, 0.017,
        0.031, 0.142, 0.714, 0.938, 0.968, 0.959, 0.964,

In [None]:
for key, value in training_data[0]['features'].items():
    print(key, np.array(value).shape)


print('contact_map', np.array(training_data[0]['contact_map']).shape)

seq ()
ss (3, 89)
sa (89,)
ccmpred (89, 89)
freecon (89, 89)
entropy (89,)
potential (89, 89)
pssm (89, 22)
contact_map (89, 89)


In [None]:
print(training_data[0]['features']['ccmpred'])

[[0.     0.2363 0.2362 ... 0.2113 0.219  0.2585]
 [0.2363 0.     0.3447 ... 0.1581 0.1663 0.221 ]
 [0.2362 0.3447 0.     ... 0.1293 0.1469 0.2115]
 ...
 [0.2113 0.1581 0.1293 ... 0.     0.2754 0.2167]
 [0.219  0.1663 0.1469 ... 0.2754 0.     0.2281]
 [0.2585 0.221  0.2115 ... 0.2167 0.2281 0.    ]]


In [None]:
## Define data parser for feature generation

import numpy as np

def pad_matrix(matrix, target_size, pad_value=0):
    """Pads a matrix to (target_size, target_size) with a given pad_value."""
    padded = np.full((target_size, target_size), pad_value, dtype=matrix.dtype)
    W, H = matrix.shape
    padded[:W, :H] = matrix
    return padded

def pad_vector(vector, target_size, pad_value=0):
    """Pads a vector to (target_size,) with a given pad_value."""
    padded = np.full((target_size,), pad_value, dtype=vector.dtype)
    W = vector.shape[0]
    padded[:W] = vector
    return padded

def prepare_batch_input_output(dataset):
    """
    Processes multiple protein samples into padded input features (X) and labels (Y).

    Parameters:
    - dataset (list of dicts): Each dict contains a protein's features and contact map.

    Returns:
    - X (numpy array): Input features with shape (N, max_W, max_H, D).
    - Y (numpy array): Contact map labels with shape (N, max_W, max_H, 1).
    """
    # Find max sequence length in the dataset
    max_seq_length = max(data['seq_length'] for data in dataset)

    all_X, all_Y = [], []

    for data in dataset:
        features = data['features']
        seq_length = data['seq_length']

        # Extract pairwise features (W, H)
        ccmpred = pad_matrix(features['ccmpred'], max_seq_length)
        freecon = pad_matrix(features['freecon'], max_seq_length)
        potential = pad_matrix(features['potential'], max_seq_length)

        # Shape change for pssm
        original_length_pssm = features['pssm'].shape[0]
        pssm = np.zeros((max_seq_length, features['pssm'].shape[1]))
        pssm[:original_length_pssm, :] = features['pssm']

        # Shape change for ss
        original_length_ss = features['ss'].T.shape[0]
        ss = np.zeros((max_seq_length, features['ss'].T.shape[1]))
        ss[:original_length_ss, :] = features['ss'].T

        # Shape change for sa and entropy
        sa = pad_vector(features['sa'], max_seq_length)[:, np.newaxis]  # Shape (W, 1)
        entropy = pad_vector(features['entropy'], max_seq_length)[:, np.newaxis]  # Shape (W, 1)

        W = max_seq_length  # Same as H
        D_pssm = pssm.shape[1]  # Should be 22

        # Expand PSSM to pairwise format (W, H, 22)
        expanded_pssm = np.tile(pssm[:, np.newaxis, :], (1, W, 1))  # (W, H, 22)
        expanded_pssm_T = np.transpose(expanded_pssm, (1, 0, 2))  # Swap row/col (W, H, 22)

        # Stack pairwise features
        pairwise_features = np.stack([ccmpred, freecon, potential], axis=-1)  # (W, H, 3)

        # Concatenate PSSM pairwise representation
        pairwise_features = np.concatenate([pairwise_features, expanded_pssm, expanded_pssm_T], axis=-1)  # (W, H, 47)

        # Expand single-sequence features to pairwise format
        expanded_ss = np.tile(ss[:, np.newaxis, :], (1, W, 1))  # (W, H, 3)
        expanded_ss_T = np.transpose(expanded_ss, (1, 0, 2))  # (W, H, 3)

        expanded_sa = np.tile(sa[:, np.newaxis, :], (1, W, 1)) # (W, H, 1)
        expanded_sa_T = np.transpose(expanded_sa, (1, 0, 2)) # (W, H, 1)

        expanded_entropy = np.tile(entropy[:, np.newaxis, :], (1, W, 1)) # (W, H, 1)
        expanded_entropy_T = np.transpose(expanded_entropy, (1, 0, 2)) # (W, H, 1)

        expanded_single_features = np.concatenate([expanded_ss, expanded_ss_T, expanded_sa, expanded_sa_T, expanded_entropy, expanded_entropy_T], axis = - 1) # (W, H, 10)

        # Final input tensor (W, H, D_total)
        X = np.concatenate([pairwise_features, expanded_single_features], axis=-1) # (W, H, 57)

        # Extract contact map as labels
        Y = pad_matrix(data['contact_map'], max_seq_length)[..., np.newaxis]  # (W, H, 1)

        # Append to batch lists
        all_X.append(X)
        all_Y.append(Y)

    # Convert to numpy arrays with batch dimension (N, W, H, D)
    X_batch = np.array(all_X)  # Shape (N, max_W, max_H, D_total)
    Y_batch = np.array(all_Y)  # Shape (N, max_W, max_H, 1)

    return X_batch, Y_batch

In [None]:
X_train, Y_train = prepare_batch_input_output(training_data)

print('X_train.shape: ', X_train.shape)
print('Y_train.shape: ', Y_train.shape)

X_train.shape:  (500, 149, 149, 57)
Y_train.shape:  (500, 149, 149, 1)


In [None]:
from sklearn.model_selection import train_test_split

# Split the data into training and validation sets
X_train, X_val, Y_train, Y_val = train_test_split(X_train, Y_train, test_size=0.2, random_state=42)

In [None]:
print('X_train.shape: ', X_train.shape)
print('Y_train.shape: ', Y_train.shape)
print('X_val.shape: ', X_val.shape)
print('Y_val.shape: ', Y_val.shape)

X_train.shape:  (400, 149, 149, 57)
Y_train.shape:  (400, 149, 149, 1)
X_val.shape:  (100, 149, 149, 57)
Y_val.shape:  (100, 149, 149, 1)


In [None]:
# test_data = data_loader('/content/drive/MyDrive/datasets/homework3_test')
test_data = data_loader('/content/drive/MyDrive/homework3_test')
test_data[0]

{'features': {'seq': 'PAVVHLQGQGSAIQVKNDLSGGVLNDWSRITMNPKVFKLHPRSGELEVLVDGTYFIYSQVYYINFTDFASYEVVVDEKPFLQCTRSIETGKTNYNTCYTAGVCLLKARQKIAVKMVHADISINMSKHTTFFGAIRLGEAP',
  'ss': array([[0.998, 0.509, 0.136, 0.04 , 0.045, 0.041, 0.061, 0.173, 0.638,
          0.897, 0.741, 0.446, 0.262, 0.36 , 0.604, 0.796, 0.879, 0.854,
          0.798, 0.844, 0.883, 0.934, 0.39 , 0.193, 0.451, 0.693, 0.745,
          0.268, 0.227, 0.5  , 0.68 , 0.853, 0.921, 0.9  , 0.794, 0.58 ,
          0.251, 0.168, 0.049, 0.097, 0.675, 0.814, 0.965, 0.971, 0.061,
          0.013, 0.015, 0.057, 0.888, 0.904, 0.947, 0.094, 0.021, 0.017,
          0.024, 0.026, 0.028, 0.029, 0.028, 0.026, 0.023, 0.022, 0.041,
          0.471, 0.919, 0.964, 0.878, 0.681, 0.566, 0.186, 0.129, 0.035,
          0.016, 0.02 , 0.067, 0.966, 0.991, 0.066, 0.058, 0.348, 0.228,
          0.118, 0.311, 0.33 , 0.287, 0.417, 0.651, 0.763, 0.889, 0.941,
          0.968, 0.927, 0.906, 0.828, 0.503, 0.337, 0.178, 0.146, 0.068,
          0.065, 0.049, 0.

In [None]:
X_test, Y_test = prepare_batch_input_output(test_data)

print('X_test.shape: ', X_test.shape)
print('Y_test.shape: ', Y_test.shape)

X_test.shape:  (100, 149, 149, 57)
Y_test.shape:  (100, 149, 149, 1)


# Part II: Apply Convolutional Neural Network on protein 2D contact prediction



## Task 1: Create the models' architectures

### Design a CNN Model

In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, BatchNormalization, Dropout

def build_cnn_model(input_shape):
    """
    Creates and compiles a CNN model for protein contact prediction.

    Parameters:
    - input_shape (tuple): Shape of the input tensor (W, H, D_total).

    Returns:
    - model (tf.keras.Model): Compiled CNN model.
    """
    # Create Sequential Model
    CNN_model = Sequential()

    # First convolutional block
    CNN_model.add(Conv2D(32, (3,3), input_shape=input_shape, padding='same', activation='relu'))
    CNN_model.add(BatchNormalization())
    CNN_model.add(Dropout(0.2))

    # Second convolutional block
    CNN_model.add(Conv2D(64, (3,3), padding='same', activation='relu'))
    CNN_model.add(BatchNormalization())
    CNN_model.add(Dropout(0.2))

    # Third convolutional block (Dilated Convolution to Expand Receptive Field)
    CNN_model.add(Conv2D(128, (3,3), dilation_rate=2, padding='same', activation='relu'))
    CNN_model.add(BatchNormalization())
    CNN_model.add(Dropout(0.2))

    # Fourth convolutional block (Another Dilated Conv)
    CNN_model.add(Conv2D(64, (3,3), dilation_rate=2, padding='same', activation='relu'))
    CNN_model.add(BatchNormalization())
    CNN_model.add(Dropout(0.3))

    # Output Layer (1x1 Conv for Binary Classification)
    CNN_model.add(Conv2D(1, (1,1), activation='sigmoid', padding='same'))

    # Compile the Model
    CNN_model.compile(loss="binary_crossentropy", optimizer="adam", metrics=['accuracy'])

    return CNN_model

### Design a ResNet Model

In [None]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Add, Activation, Input, Conv2D, BatchNormalization, MaxPooling2D, Dropout, UpSampling2D

def residual_block(x, filters, kernel_size=(3, 3), strides=(1, 1), use_dropout=False):
    """
    Creates a residual block with two convolutional layers and a skip connection.
    """
    shortcut = x
    x = Conv2D(filters, kernel_size, strides=strides, padding='same')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = Conv2D(filters, kernel_size, padding='same')(x)
    x = BatchNormalization()(x)

    if shortcut.shape[-1] != filters:  # Adjust dimensions if necessary
        shortcut = Conv2D(filters, (1, 1), strides=strides, padding='same')(shortcut)
        shortcut = BatchNormalization()(shortcut)

    x = Add()([shortcut, x])
    x = Activation('relu')(x)

    if use_dropout:
        x = Dropout(0.2)(x)
    return x

def build_resnet_model(input_shape):
    """
    Creates and compiles a ResNet-based model for protein contact prediction.
    """
    inputs = Input(shape=input_shape)

    # Initial convolutional layer
    x = Conv2D(64, (7, 7), padding='same')(inputs)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    # Remove MaxPooling2D to keep same output dimension
    #x = MaxPooling2D((2, 2))(x)

    # Stack of residual blocks
    x = residual_block(x, filters=64, use_dropout=True)
    x = residual_block(x, filters=64, use_dropout=True)
    x = residual_block(x, filters=128, strides=(1, 1), use_dropout=True)  # Change strides to (1, 1) to prevent spatial dimension reduction
    x = residual_block(x, filters=128, use_dropout=True)
    x = residual_block(x, filters=256, strides=(1, 1), use_dropout=True)  # Change strides to (1, 1)
    x = residual_block(x, filters=256, use_dropout=True)

    # Output layer (1x1 Conv for Binary Classification)
    outputs = Conv2D(1, (1, 1), activation='sigmoid', padding='same')(x)

    # Create the model
    model = Model(inputs, outputs)

    # Compile the model
    model.compile(loss="binary_crossentropy", optimizer="adam", metrics=['accuracy'])

    return model

## Task 2: Run the models on a simulated dataset

In [None]:
# Generate simulated dataset with input shape 5x149x149x599 and output shape 5x149x149x1

import numpy as np

# Define dataset dimensions
num_samples = 2   # Number of protein samples
seq_length = 149  # Sequence length (W, H)
num_features = 57  # Number of feature channels
output_channels = 1  # Binary contact map output

# Generate random input data (X) with values between 0 and 1
X_simulated = np.random.rand(num_samples, seq_length, seq_length, num_features).astype(np.float32)

# Generate random binary output data (Y) with values 0 or 1
Y_simulated = np.random.randint(0, 2, size=(num_samples, seq_length, seq_length, output_channels)).astype(np.float32)

# Display the generated dataset shapes
print("X_simulated shape:", X_simulated.shape)
print("Y_simulated shape:", Y_simulated.shape)

X_simulated shape: (2, 149, 149, 57)
Y_simulated shape: (2, 149, 149, 1)


### Train the CNN model with simulated dataset

In [None]:
input_shape = (149, 149, 57)
CNN_model = build_cnn_model(input_shape)
CNN_model.summary()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [None]:
# Train the CNN model using the simulated dataset
CNN_model.fit(X_simulated, Y_simulated, epochs=5, batch_size=32)

Epoch 1/5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 10s/step - accuracy: 0.4972 - loss: 0.9960
Epoch 2/5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 55ms/step - accuracy: 0.5088 - loss: 0.9280
Epoch 3/5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 51ms/step - accuracy: 0.5109 - loss: 0.8985
Epoch 4/5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 50ms/step - accuracy: 0.5097 - loss: 0.8757
Epoch 5/5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 50ms/step - accuracy: 0.5133 - loss: 0.8594


<keras.src.callbacks.history.History at 0x7dfa783d2290>

### Train the ResNet Model with simulated dataset

In [None]:
input_shape = (149, 149, 57)
resnet_model = build_resnet_model(input_shape)
resnet_model.summary()

In [None]:
# Train the ResNet model using the simulated dataset
resnet_model.fit(X_simulated, Y_simulated, epochs=5, batch_size=32)

Epoch 1/5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 20s/step - accuracy: 0.5042 - loss: 1.1629
Epoch 2/5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 70ms/step - accuracy: 0.5019 - loss: 1.0219
Epoch 3/5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 69ms/step - accuracy: 0.5038 - loss: 0.9145
Epoch 4/5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 68ms/step - accuracy: 0.5028 - loss: 0.8896
Epoch 5/5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 68ms/step - accuracy: 0.5075 - loss: 0.8784


<keras.src.callbacks.history.History at 0x7dfc04ee6990>

## Task 3: Train the models using the original dataset

### Train the CNN model using the original dataset

In [None]:
input_shape = (149, 149, 57)
CNN_model_1 = build_cnn_model(input_shape)
CNN_model_1.summary()

In [None]:
CNN_model_1.fit(X_train, Y_train, validation_data = (X_val, Y_val), epochs=5, batch_size=32)

Epoch 1/5
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 974ms/step - accuracy: 0.5746 - loss: 0.8457 - val_accuracy: 0.9508 - val_loss: 0.6094
Epoch 2/5
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 86ms/step - accuracy: 0.7975 - loss: 0.6027 - val_accuracy: 0.9663 - val_loss: 0.5109
Epoch 3/5
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 87ms/step - accuracy: 0.9296 - loss: 0.4893 - val_accuracy: 0.9651 - val_loss: 0.4139
Epoch 4/5
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 87ms/step - accuracy: 0.9549 - loss: 0.4090 - val_accuracy: 0.9647 - val_loss: 0.3359
Epoch 5/5
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 86ms/step - accuracy: 0.9638 - loss: 0.3543 - val_accuracy: 0.9637 - val_loss: 0.2669


<keras.src.callbacks.history.History at 0x7dfc09e4abd0>

###Train the resnet model using original dataset

In [None]:
input_shape = (149, 149, 57)
resnet_model_1 = build_resnet_model(input_shape)
resnet_model_1.summary()

In [None]:
resnet_model_1.fit(X_train, Y_train, validation_data = (X_val, Y_val), epochs=5, batch_size=32)

Epoch 1/5
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m70s[0m 2s/step - accuracy: 0.7443 - loss: 0.7865 - val_accuracy: 0.8337 - val_loss: 3.6920
Epoch 2/5
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 270ms/step - accuracy: 0.9600 - loss: 0.1181 - val_accuracy: 0.5433 - val_loss: 1.5421
Epoch 3/5
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 270ms/step - accuracy: 0.9626 - loss: 0.1090 - val_accuracy: 0.7204 - val_loss: 1.4966
Epoch 4/5
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 270ms/step - accuracy: 0.9642 - loss: 0.1034 - val_accuracy: 0.8871 - val_loss: 0.2661
Epoch 5/5
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 270ms/step - accuracy: 0.9656 - loss: 0.1001 - val_accuracy: 0.9421 - val_loss: 0.1619


<keras.src.callbacks.history.History at 0x7dfc09d27b50>

In [None]:
import numpy as np
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

def evaluate_model_in_batches(model, X, Y, dataset_name="Dataset", batch_size=2):
    """
    Evaluates the model on a given dataset using batch-wise predictions
    to avoid memory overload.

    Parameters:
    - model: Trained Keras model.
    - X: Input features (N, W, H, D_total).
    - Y: Ground truth contact maps (N, W, H, 1).
    - dataset_name: Name of the dataset (e.g., "Training", "Validation", "Testing").
    - batch_size: Number of samples processed per batch.

    Returns:
    - accuracy, precision, recall, f1-score.
    """
    num_samples = X.shape[0]  # Total number of samples
    Y_pred_probs = np.zeros(Y.shape, dtype=np.float32)  # Placeholder for predictions

    # Process data in smaller chunks (batch-wise)
    for i in range(0, num_samples, batch_size):
        X_batch = X[i:i + batch_size]
        Y_pred_probs[i:i + batch_size] = model.predict(X_batch)  # Get batch predictions

    # Convert probabilities to binary predictions
    Y_pred_binary = (Y_pred_probs > 0.5).astype(int)

    # Flatten arrays for metric computation
    Y_true_flat = Y.flatten()
    Y_pred_flat = Y_pred_binary.flatten()

    # Compute evaluation metrics
    accuracy = accuracy_score(Y_true_flat, Y_pred_flat)
    precision = precision_score(Y_true_flat, Y_pred_flat, zero_division=1)
    recall = recall_score(Y_true_flat, Y_pred_flat, zero_division=1)
    f1 = f1_score(Y_true_flat, Y_pred_flat, zero_division=1)

    return accuracy, precision, recall, f1

In [None]:
acc_train, prec_train, rec_train, f1_train = evaluate_model_in_batches(CNN_model_1, X_train, Y_train, "Training Dataset", batch_size=2)
acc_val, prec_val, rec_val, f1_val = evaluate_model_in_batches(CNN_model_1, X_val, Y_val, "Validation Dataset", batch_size=2)
acc_test, prec_test, rec_test, f1_test = evaluate_model_in_batches(CNN_model_1, X_test, Y_test, "Testing Dataset", batch_size=2)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 601ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 41ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3

In [None]:
print("Metrics on Training Dataset:")
print(f"Accuracy: {acc_train:.4f}")
print(f"Precision: {prec_train:.4f}")
print(f"Recall: {rec_train:.4f}")
print(f"F1-Score: {f1_train:.4f}\n")

print("Metrics on Validation Dataset:")
print(f"Accuracy: {acc_val:.4f}")
print(f"Precision: {prec_val:.4f}")
print(f"Recall: {rec_val:.4f}")
print(f"F1-Score: {f1_val:.4f}\n")

print("Metrics on Testing Dataset:")
print(f"Accuracy: {acc_test:.4f}")
print(f"Precision: {prec_test:.4f}")
print(f"Recall: {rec_test:.4f}")
print(f"F1-Score: {f1_test:.4f}")

Metrics on Training Dataset:
Accuracy: 0.9625
Precision: 0.9835
Recall: 0.1039
F1-Score: 0.1879

Metrics on Validation Dataset:
Accuracy: 0.9637
Precision: 0.9847
Recall: 0.1113
F1-Score: 0.2000

Metrics on Testing Dataset:
Accuracy: 0.9649
Precision: 0.9843
Recall: 0.1150
F1-Score: 0.2059


In [None]:
acc_train, prec_train, rec_train, f1_train = evaluate_model_in_batches(resnet_model_1, X_train, Y_train, "Training Dataset", batch_size=2)
acc_val, prec_val, rec_val, f1_val = evaluate_model_in_batches(resnet_model_1, X_val, Y_val, "Validation Dataset", batch_size=2)
acc_test, prec_test, rec_test, f1_test = evaluate_model_in_batches(resnet_model_1, X_test, Y_test, "Testing Dataset", batch_size=2)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms

In [None]:
print("Metrics on Training Dataset:")
print(f"Accuracy: {acc_train:.4f}")
print(f"Precision: {prec_train:.4f}")
print(f"Recall: {rec_train:.4f}")
print(f"F1-Score: {f1_train:.4f}\n")

print("Metrics on Validation Dataset:")
print(f"Accuracy: {acc_val:.4f}")
print(f"Precision: {prec_val:.4f}")
print(f"Recall: {rec_val:.4f}")
print(f"F1-Score: {f1_val:.4f}\n")

print("Metrics on Testing Dataset:")
print(f"Accuracy: {acc_test:.4f}")
print(f"Precision: {prec_test:.4f}")
print(f"Recall: {rec_test:.4f}")
print(f"F1-Score: {f1_test:.4f}")

Metrics on Training Dataset:
Accuracy: 0.9392
Precision: 0.3497
Recall: 0.5302
F1-Score: 0.4214

Metrics on Validation Dataset:
Accuracy: 0.9421
Precision: 0.3552
Recall: 0.5132
F1-Score: 0.4198

Metrics on Testing Dataset:
Accuracy: 0.9425
Precision: 0.3472
Recall: 0.5128
F1-Score: 0.4141


## Task 4: Ablation study

### CNN Model

In [None]:
## Define data parser for feature generation

import numpy as np

def prepare_batch_input_output_ablation(dataset):
    """
    Processes multiple protein samples into padded input features (X) and labels (Y).

    Parameters:
    - dataset (list of dicts): Each dict contains a protein's features and contact map.

    Returns:
    - X (numpy array): Input features with shape (N, max_W, max_H, D).
    - Y (numpy array): Contact map labels with shape (N, max_W, max_H, 1).
    """
    # Find max sequence length in the dataset
    max_seq_length = max(data['seq_length'] for data in dataset)

    all_X, all_Y = [], []

    for data in dataset:
        features = data['features']
        seq_length = data['seq_length']

        # Extract pairwise features (W, H)
        potential = pad_matrix(features['potential'], max_seq_length)

        # Shape change for pssm
        original_length_pssm = features['pssm'].shape[0]
        pssm = np.zeros((max_seq_length, features['pssm'].shape[1]))
        pssm[:original_length_pssm, :] = features['pssm']

        # Shape change for ss
        original_length_ss = features['ss'].T.shape[0]
        ss = np.zeros((max_seq_length, features['ss'].T.shape[1]))
        ss[:original_length_ss, :] = features['ss'].T

        # Shape change for sa and entropy
        sa = pad_vector(features['sa'], max_seq_length)[:, np.newaxis]  # Shape (W, 1)
        entropy = pad_vector(features['entropy'], max_seq_length)[:, np.newaxis]  # Shape (W, 1)

        W = max_seq_length  # Same as H
        D_pssm = pssm.shape[1]  # Should be 22

        # Expand PSSM to pairwise format (W, H, 22)
        expanded_pssm = np.tile(pssm[:, np.newaxis, :], (1, W, 1))  # (W, H, 22)
        expanded_pssm_T = np.transpose(expanded_pssm, (1, 0, 2))  # Swap row/col (W, H, 22)

        # Stack pairwise features
        pairwise_features = np.stack([potential], axis=-1)  # (W, H, 3)

        # Concatenate PSSM pairwise representation
        pairwise_features = np.concatenate([pairwise_features, expanded_pssm, expanded_pssm_T], axis=-1)  # (W, H, 47)

        # Expand single-sequence features to pairwise format
        expanded_ss = np.tile(ss[:, np.newaxis, :], (1, W, 1))  # (W, H, 3)
        expanded_ss_T = np.transpose(expanded_ss, (1, 0, 2))  # (W, H, 3)

        expanded_sa = np.tile(sa[:, np.newaxis, :], (1, W, 1)) # (W, H, 1)
        expanded_sa_T = np.transpose(expanded_sa, (1, 0, 2)) # (W, H, 1)

        expanded_entropy = np.tile(entropy[:, np.newaxis, :], (1, W, 1)) # (W, H, 1)
        expanded_entropy_T = np.transpose(expanded_entropy, (1, 0, 2)) # (W, H, 1)

        expanded_single_features = np.concatenate([expanded_ss, expanded_ss_T, expanded_sa, expanded_sa_T, expanded_entropy, expanded_entropy_T], axis = - 1) # (W, H, 10)

        # Final input tensor (W, H, D_total)
        X = np.concatenate([pairwise_features, expanded_single_features], axis=-1) # (W, H, 57)

        # Extract contact map as labels
        Y = pad_matrix(data['contact_map'], max_seq_length)[..., np.newaxis]  # (W, H, 1)

        # Append to batch lists
        all_X.append(X)
        all_Y.append(Y)

    # Convert to numpy arrays with batch dimension (N, W, H, D)
    X_batch = np.array(all_X)  # Shape (N, max_W, max_H, D_total)
    Y_batch = np.array(all_Y)  # Shape (N, max_W, max_H, 1)

    return X_batch, Y_batch

In [None]:
X_train_ablation, Y_train_ablation = prepare_batch_input_output_ablation(training_data)
X_test_ablation, Y_test_ablation = prepare_batch_input_output_ablation(test_data)

In [None]:
from sklearn.model_selection import train_test_split

# Split the data into training and validation sets
X_train_ablation, X_val_ablation, Y_train_ablation, Y_val_ablation = train_test_split(X_train_ablation, Y_train_ablation, test_size=0.2, random_state=42)

In [None]:
print('X_train_ablation.shape: ', X_train_ablation.shape)
print('Y_train_ablation.shape: ', Y_train_ablation.shape)
print('X_val_ablation.shape: ', X_val_ablation.shape)
print('Y_val_ablation.shape: ', Y_val_ablation.shape)
print('X_test_ablation.shape: ', X_test_ablation.shape)
print('Y_test_ablation.shape: ', Y_test_ablation.shape)

X_train_ablation.shape:  (400, 149, 149, 55)
Y_train_ablation.shape:  (400, 149, 149, 1)
X_val_ablation.shape:  (100, 149, 149, 55)
Y_val_ablation.shape:  (100, 149, 149, 1)
X_test_ablation.shape:  (100, 149, 149, 55)
Y_test_ablation.shape:  (100, 149, 149, 1)


In [None]:
input_shape = (149, 149, 55)
CNN_model_2 = build_cnn_model(input_shape)
CNN_model_2.summary()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [None]:
CNN_model_2.fit(X_train_ablation, Y_train_ablation, validation_data = (X_val_ablation, Y_val_ablation), epochs=5, batch_size=32)

Epoch 1/5
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 706ms/step - accuracy: 0.4960 - loss: 0.8746 - val_accuracy: 0.8189 - val_loss: 0.6633
Epoch 2/5
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 84ms/step - accuracy: 0.5721 - loss: 0.6968 - val_accuracy: 0.9575 - val_loss: 0.5959
Epoch 3/5
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 84ms/step - accuracy: 0.6903 - loss: 0.6283 - val_accuracy: 0.9592 - val_loss: 0.5391
Epoch 4/5
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 84ms/step - accuracy: 0.8093 - loss: 0.5720 - val_accuracy: 0.9592 - val_loss: 0.4931
Epoch 5/5
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 84ms/step - accuracy: 0.8845 - loss: 0.5150 - val_accuracy: 0.9592 - val_loss: 0.4375


<keras.src.callbacks.history.History at 0x7dfc09b21ed0>

In [None]:
acc_train_abalation, prec_train_ablation, rec_train_ablation, f1_train_ablation = evaluate_model_in_batches(CNN_model_2, X_train_ablation, Y_train_ablation, "Training Dataset", batch_size=2)
acc_val_ablation, prec_val_ablation, rec_val_ablation, f1_val_ablation = evaluate_model_in_batches(CNN_model_2, X_val_ablation, Y_val_ablation, "Validation Dataset", batch_size=2)
acc_test_ablation, prec_test_ablation, rec_test_ablation, f1_test_ablation = evaluate_model_in_batches(CNN_model_2, X_test_ablation, Y_test_ablation, "Testing Dataset", batch_size=2)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 588ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 41ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4

In [None]:
print("Metrics on Training Dataset:")
print(f"Accuracy: {acc_train_abalation:.4f}")
print(f"Precision: {prec_train_ablation:.4f}")
print(f"Recall: {rec_train_ablation:.4f}")
print(f"F1-Score: {f1_train_ablation:.4f}\n")

print("Metrics on Validation Dataset:")
print(f"Accuracy: {acc_val_ablation:.4f}")
print(f"Precision: {prec_val_ablation:.4f}")
print(f"Recall: {rec_val_ablation:.4f}")
print(f"F1-Score: {f1_val_ablation:.4f}\n")

print("Metrics on Testing Dataset:")
print(f"Accuracy: {acc_test_ablation:.4f}")
print(f"Precision: {prec_test_ablation:.4f}")
print(f"Recall: {rec_test_ablation:.4f}")
print(f"F1-Score: {f1_test_ablation:.4f}")

Metrics on Training Dataset:
Accuracy: 0.9582
Precision: 1.0000
Recall: 0.0000
F1-Score: 0.0000

Metrics on Validation Dataset:
Accuracy: 0.9592
Precision: 1.0000
Recall: 0.0000
F1-Score: 0.0000

Metrics on Testing Dataset:
Accuracy: 0.9604
Precision: 1.0000
Recall: 0.0000
F1-Score: 0.0000


### ResNet Model

In [None]:
input_shape = (149, 149, 55)
resnet_model_2 = build_resnet_model(input_shape)
resnet_model_2.summary()

In [None]:
resnet_model_2.fit(X_train_ablation, Y_train_ablation, validation_data = (X_val_ablation, Y_val_ablation), epochs=5, batch_size=32)

Epoch 1/5
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 1s/step - accuracy: 0.7741 - loss: 0.6062 - val_accuracy: 0.0420 - val_loss: 467.4917
Epoch 2/5
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 268ms/step - accuracy: 0.9588 - loss: 0.1512 - val_accuracy: 0.0621 - val_loss: 204.6503
Epoch 3/5
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 268ms/step - accuracy: 0.9582 - loss: 0.1446 - val_accuracy: 0.3000 - val_loss: 52.7935
Epoch 4/5
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 268ms/step - accuracy: 0.9583 - loss: 0.1330 - val_accuracy: 0.6188 - val_loss: 6.8752
Epoch 5/5
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 268ms/step - accuracy: 0.9579 - loss: 0.1282 - val_accuracy: 0.8983 - val_loss: 0.2736


<keras.src.callbacks.history.History at 0x7dfbfc226e50>

In [None]:
acc_train_abalation, prec_train_ablation, rec_train_ablation, f1_train_ablation = evaluate_model_in_batches(resnet_model_2, X_train_ablation, Y_train_ablation, "Training Dataset", batch_size=2)
acc_val_ablation, prec_val_ablation, rec_val_ablation, f1_val_ablation = evaluate_model_in_batches(resnet_model_2, X_val_ablation, Y_val_ablation, "Validation Dataset", batch_size=2)
acc_test_ablation, prec_test_ablation, rec_test_ablation, f1_test_ablation = evaluate_model_in_batches(resnet_model_2, X_test_ablation, Y_test_ablation, "Testing Dataset", batch_size=2)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 46ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 46ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 46ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 46ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 46ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 46ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms

In [None]:
print("Metrics on Training Dataset:")
print(f"Accuracy: {acc_train_abalation:.4f}")
print(f"Precision: {prec_train_ablation:.4f}")
print(f"Recall: {rec_train_ablation:.4f}")
print(f"F1-Score: {f1_train_ablation:.4f}\n")

print("Metrics on Validation Dataset:")
print(f"Accuracy: {acc_val_ablation:.4f}")
print(f"Precision: {prec_val_ablation:.4f}")
print(f"Recall: {rec_val_ablation:.4f}")
print(f"F1-Score: {f1_val_ablation:.4f}\n")

print("Metrics on Testing Dataset:")
print(f"Accuracy: {acc_test_ablation:.4f}")
print(f"Precision: {prec_test_ablation:.4f}")
print(f"Recall: {rec_test_ablation:.4f}")
print(f"F1-Score: {f1_test_ablation:.4f}")

Metrics on Training Dataset:
Accuracy: 0.9002
Precision: 0.1066
Recall: 0.1883
F1-Score: 0.1362

Metrics on Validation Dataset:
Accuracy: 0.8983
Precision: 0.0968
Recall: 0.1790
F1-Score: 0.1256

Metrics on Testing Dataset:
Accuracy: 0.9048
Precision: 0.1008
Recall: 0.1771
F1-Score: 0.1285
