In [None]:
# ISHMAM RAHMAN FINAL EXAM

# --- Import Libraries ---
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, LSTM, Dense, Dropout, Flatten, Input, Bidirectional
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.regularizers import l2
from tensorflow.keras.layers import BatchNormalization

print("--- Data Loading and Preprocessing ---")

# --- One-hot encode DNA sequences ---
def one_hot_encode_sequence(sequence, max_len):
    mapping = {
        'A': [1, 0, 0, 0], 'a': [1, 0, 0, 0],
        'T': [0, 1, 0, 0], 't': [0, 1, 0, 0],
        'G': [0, 0, 1, 0], 'g': [0, 0, 1, 0],
        'C': [0, 0, 0, 1], 'c': [0, 0, 0, 1],
        'N': [0, 0, 0, 0], 'n': [0, 0, 0, 0]
    }
    encoded = [mapping.get(char, [0, 0, 0, 0]) for char in sequence]
    encoded = np.array(encoded, dtype=np.float32)
    if len(encoded) < max_len:
        pad = np.zeros((max_len - len(encoded), 4), dtype=np.float32)
        encoded = np.vstack((encoded, pad))
    return encoded[:max_len]

# --- Load training and test data ---
train_df = pd.read_csv('dm3.kc167.tads.train.csv', header=None)
X_train_raw, y_train = train_df[0].values, train_df[1].values
test_df = pd.read_csv('dm3.kc167.tads.test.csv', header=None)
X_test_raw, y_test = test_df[0].values, test_df[1].values

# --- Encode sequences and get input shape ---
sequence_length = len(X_train_raw[0])
X_train = np.array([one_hot_encode_sequence(seq, sequence_length) for seq in X_train_raw])
X_test = np.array([one_hot_encode_sequence(seq, sequence_length) for seq in X_test_raw])
print("Train shape:", X_train.shape, "Test shape:", X_test.shape)
input_shape = (sequence_length, 4)
print("Input shape: ", input_shape)

# ===========================
# Model 1 - 1 CNN + Dense
# ===========================

# --- Build and compile Model 1 ---
model_1 = Sequential([
    Conv1D(64, 5, activation='relu', input_shape=input_shape),
    MaxPooling1D(2),
    Dropout(0.3),
    Flatten(),
    Dense(64, activation='relu'),
    Dropout(0.3),
    Dense(32, activation='relu'),
    Dropout(0.3),
    Dense(1, activation='sigmoid')
])
model_1.compile(optimizer=Adam(0.001), loss='binary_crossentropy', metrics=['accuracy'])
checkpoint = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

# --- Train Model 1 ---
print("\n--- Training Model 1 ---")
history_1 = model_1.fit(X_train, y_train, batch_size=64, epochs=100, validation_split=0.2, callbacks=[ModelCheckpoint('best_model1.keras', save_best_only=True), checkpoint], verbose=2)

# --- Evaluate Model 1 ---
print("\n--- Evaluating Model 1 ---")
test_loss_1, test_acc_1 = model_1.evaluate(X_test, y_test, verbose=0)
print(f" Model 1 Test Accuracy: {test_acc_1:.4f}")

# ===========================
# Model 2 - 2 CNN + Dense
# ===========================

# --- Build and compile Model 2 ---
model_2 = Sequential([
    Conv1D(64, 5, activation='relu', input_shape=input_shape),
    MaxPooling1D(2),
    Dropout(0.3),
    Conv1D(128, 5, activation='relu'),
    MaxPooling1D(2),
    Dropout(0.3),
    Flatten(),
    Dense(64, activation='relu'),
    Dropout(0.3),
    Dense(32, activation='relu'),
    Dropout(0.3),
    Dense(1, activation='sigmoid')
])
model_2.compile(optimizer=Adam(0.001), loss='binary_crossentropy', metrics=['accuracy'])
checkpoint = EarlyStopping(monitor='val_loss', patience=8, restore_best_weights=True)

# --- Train Model 2 ---
print("\n--- Training Model 2 ---")
history_2 = model_2.fit(X_train, y_train, batch_size=64, epochs=100, validation_split=0.2, callbacks=[ModelCheckpoint('best_model2.keras', save_best_only=True), checkpoint], verbose=2)

# --- Evaluate Model 2 ---
print("\n--- Evaluating Model 2 ---")
test_loss_2, test_acc_2 = model_2.evaluate(X_test, y_test, verbose=0)
print(f" Model 2 Test Accuracy: {test_acc_2:.4f}")

# ===========================
# Model 3 - 3 CNN + Dense
# ===========================

# --- Build and compile Model 3 ---
model_3 = Sequential([
    Conv1D(32, 5, activation='relu', input_shape=input_shape),
    MaxPooling1D(2),
    Dropout(0.3),
    Conv1D(64, 5, activation='relu'),
    MaxPooling1D(2),
    Dropout(0.3),
    Conv1D(128, 5, activation='relu'),
    MaxPooling1D(2),
    Dropout(0.3),
    Flatten(),
    Dense(64, activation='relu'),
    Dropout(0.3),
    Dense(32, activation='relu'),
    Dropout(0.3),
    Dense(1, activation='sigmoid')
])
model_3.compile(optimizer=Adam(0.001), loss='binary_crossentropy', metrics=['accuracy'])
checkpoint = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

# --- Train Model 3 ---
print("\n--- Training Model 3 ---")
history_3 = model_3.fit(X_train, y_train, batch_size=64, epochs=100, validation_split=0.2, callbacks=[ModelCheckpoint('best_model3.keras', save_best_only=True), checkpoint], verbose=2)

# --- Evaluate Model 3 ---
print("\n--- Evaluating Model 3 ---")
test_loss_3, test_acc_3 = model_3.evaluate(X_test, y_test, verbose=0)
print(f" Model 3 Test Accuracy: {test_acc_3:.4f}")

# ===========================
# Model 4 - 1 CNN only
# ===========================

# --- Build and compile Model 4 ---
model_4 = Sequential([
    Conv1D(64, 11, activation='relu', padding='same', input_shape=input_shape),
    Dropout(0.4),
    MaxPooling1D(pool_size=2),
    Dropout(0.3),
    Flatten(),
    Dense(1, activation='sigmoid')
])
model_4.compile(optimizer=Adam(0.001), loss='binary_crossentropy', metrics=['accuracy'])
checkpoint = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

# --- Train Model 4 ---
print("\n--- Training Model 4 ---")
history_4 = model_4.fit(X_train, y_train, batch_size=64, epochs=100, validation_split=0.2, callbacks=[ModelCheckpoint('best_model4.keras', save_best_only=True), checkpoint], verbose=2)

# --- Evaluate Model 4 ---
print("\n--- Evaluating Model 4 ---")
test_loss_4, test_acc_4 = model_4.evaluate(X_test, y_test, verbose=0)
print(f"Model 4 Test Accuracy: {test_acc_4:.4f}")

# ===========================
# Model 5 - 2 CNN only
# ===========================

# --- Build and compile Model 5 ---
model_5 = Sequential([
    Conv1D(64, 11, activation='relu', padding='same', input_shape=input_shape),
    Dropout(0.3),
    Conv1D(32, 7, activation='relu', padding='same'),
    Dropout(0.3),
    MaxPooling1D(pool_size=2),
    Dropout(0.3),
    Flatten(),
    Dense(1, activation='sigmoid')
])
model_5.compile(optimizer=Adam(0.001), loss='binary_crossentropy', metrics=['accuracy'])
checkpoint = EarlyStopping(monitor='val_loss', patience=8, restore_best_weights=True)

# --- Train Model 5 ---
print("\n--- Training Model 5 ---")
history_5 = model_5.fit(X_train, y_train, batch_size=64, epochs=100, validation_split=0.2, callbacks=[ModelCheckpoint('best_model5.keras', save_best_only=True), checkpoint], verbose=2)

# --- Evaluate Model 5 ---
print("\n--- Evaluating Model 5 ---")
test_loss_5, test_acc_5 = model_5.evaluate(X_test, y_test, verbose=0)
print(f" Model 5 Test Accuracy: {test_acc_5:.4f}")

# ===========================
# Model 6 - 3 CNN only
# ===========================

# --- Build and compile Model 6 ---
model_6 = Sequential([
    Conv1D(32, 11, activation='relu', input_shape=input_shape),
    Dropout(0.3),
    Conv1D(64, 7, activation='relu'),
    Dropout(0.3),
    Conv1D(32, 5, activation='relu'),
    Dropout(0.3),
    MaxPooling1D(pool_size=2),
    Dropout(0.3),
    Flatten(),
    Dense(1, activation='sigmoid')
])
model_6.compile(optimizer=Adam(0.001), loss='binary_crossentropy', metrics=['accuracy'])
checkpoint = EarlyStopping(monitor='val_loss', patience=8, restore_best_weights=True)

# --- Train Model 6 ---
print("\n--- Training Model 6 ---")
history_6 = model_6.fit(X_train, y_train, batch_size=64, epochs=100, validation_split=0.2, callbacks=[ModelCheckpoint('best_model6.keras', save_best_only=True), checkpoint], verbose=2)

# --- Evaluate Model 6 ---
print("\n--- Evaluating Model 6 ---")
test_loss_6, test_acc_6 = model_6.evaluate(X_test, y_test, verbose=0)
print(f"Model 6 Test Accuracy: {test_acc_6:.4f}")

# ===========================
# Model 7 - 1 CNN + Bi-LSTM
# ===========================

# --- Build and compile Model 7 ---
model_7 = Sequential([
    Conv1D(32, 7, activation='relu', input_shape=input_shape),
    MaxPooling1D(pool_size=2),
    Dropout(0.3),
    Bidirectional(LSTM(16, return_sequences=False)),
    Dropout(0.3),
    Dense(1, activation='sigmoid')
])
model_7.compile(optimizer=Adam(0.001), loss='binary_crossentropy', metrics=['accuracy'])
checkpoint = EarlyStopping(monitor='val_loss', patience=4, restore_best_weights=True)

# --- Train Model 7 ---
print("\n--- Training Model 7 ---")
history_7 = model_7.fit(X_train, y_train, batch_size=64, epochs=100, validation_split=0.2, callbacks=[ModelCheckpoint('best_model7.keras', save_best_only=True), checkpoint], verbose=2)

# --- Evaluate Model 7 ---
print("\n--- Evaluating Model 7 ---")
test_loss_7, test_acc_7 = model_7.evaluate(X_test, y_test, verbose=0)
print(f"Model 7 Test Accuracy : {test_acc_7:.4f}")
