# ISHMAM RAHMAN FINAL EXAM

In [12]:
# Data Loading and Preprocessing
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, LSTM, Dense, Dropout, Flatten, Input, Bidirectional
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.regularizers import l2
from tensorflow.keras.layers import BatchNormalization

print("--- Data Loading and Preprocessing ---")

# --- One-hot encode DNA sequences ---
def one_hot_encode_sequence(sequence, max_len):
    mapping = {
        'A': [1, 0, 0, 0], 'a': [1, 0, 0, 0],
        'T': [0, 1, 0, 0], 't': [0, 1, 0, 0],
        'G': [0, 0, 1, 0], 'g': [0, 0, 1, 0],
        'C': [0, 0, 0, 1], 'c': [0, 0, 0, 1],
        'N': [0, 0, 0, 0], 'n': [0, 0, 0, 0]
    }
    encoded = [mapping.get(char, [0, 0, 0, 0]) for char in sequence]
    encoded = np.array(encoded, dtype=np.float32)
    if len(encoded) < max_len:
        pad = np.zeros((max_len - len(encoded), 4), dtype=np.float32)
        encoded = np.vstack((encoded, pad))
    return encoded[:max_len]

# --- Load training and test data ---
train_df = pd.read_csv('dm3.kc167.tads.train.csv', header=None)
X_train_raw, y_train = train_df[0].values, train_df[1].values
test_df = pd.read_csv('dm3.kc167.tads.test.csv', header=None)
X_test_raw, y_test = test_df[0].values, test_df[1].values

# --- Encode sequences and get input shape ---
sequence_length = len(X_train_raw[0])
X_train = np.array([one_hot_encode_sequence(seq, sequence_length) for seq in X_train_raw])
X_test = np.array([one_hot_encode_sequence(seq, sequence_length) for seq in X_test_raw])
print("Train shape:", X_train.shape, "Test shape:", X_test.shape)
input_shape = (sequence_length, 4)
print("Input shape: ",input_shape)


--- Data Loading and Preprocessing ---
Train shape: (28140, 1000, 4) Test shape: (2000, 1000, 4)
Input shape:  (1000, 4)


## Model 1 - 1 layer CNN with dense layers

In [13]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Dropout, Flatten, Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

# --- Model 1 Parameters ---
input_shape = (1000, 4)
learning_rate = 0.001
loss_fn = 'binary_crossentropy'
metrics = ['accuracy']

# --- Build Model 1 ---
model_1 = Sequential([
    Conv1D(64, 5, activation='relu', input_shape=input_shape),
    MaxPooling1D(2),
    Dropout(0.3),

    Flatten(),
    Dense(64, activation='relu'),
    Dropout(0.3),
    Dense(32, activation='relu'),
    Dropout(0.3),
    Dense(1, activation='sigmoid')
])

# --- Compile ---
model_1.compile(
    optimizer=Adam(learning_rate),
    loss=loss_fn,
    metrics=metrics
)

# --- Callbacks ---
checkpoint = ModelCheckpoint('best_model1.keras', monitor='val_loss', save_best_only=True, verbose=1)
early_stop = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)


model_1.summary()

# --- Train Model 1 ---
print("\n--- Training Model 1 ---")
history_1 = model_1.fit(
    X_train, y_train,
    batch_size=64,
    epochs=100,
    validation_split=0.2,
    callbacks=[checkpoint, early_stop],
    verbose=2
)

# --- Evaluate Model 1 ---
print("\n--- Evaluating Model 1 ---")
test_loss_1, test_acc_1 = model_1.evaluate(X_test, y_test, verbose=0)
print(f" Model 1 Test Accuracy: {test_acc_1:.4f}")

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)



--- Training Model 1 ---
Epoch 1/100

Epoch 1: val_loss improved from inf to 0.92937, saving model to best_model1.keras
352/352 - 7s - 20ms/step - accuracy: 0.6059 - loss: 0.6825 - val_accuracy: 0.0000e+00 - val_loss: 0.9294
Epoch 2/100

Epoch 2: val_loss did not improve from 0.92937
352/352 - 6s - 18ms/step - accuracy: 0.6260 - loss: 0.6406 - val_accuracy: 0.0000e+00 - val_loss: 1.3578
Epoch 3/100

Epoch 3: val_loss improved from 0.92937 to 0.85074, saving model to best_model1.keras
352/352 - 6s - 18ms/step - accuracy: 0.6717 - loss: 0.5894 - val_accuracy: 0.4179 - val_loss: 0.8507
Epoch 4/100

Epoch 4: val_loss improved from 0.85074 to 0.75484, saving model to best_model1.keras
352/352 - 6s - 18ms/step - accuracy: 0.7281 - loss: 0.5310 - val_accuracy: 0.6269 - val_loss: 0.7548
Epoch 5/100

Epoch 5: val_loss did not improve from 0.75484
352/352 - 6s - 18ms/step - accuracy: 0.7564 - loss: 0.4856 - val_accuracy: 0.4993 - val_loss: 0.9909
Epoch 6/100

Epoch 6: val_loss did not improve f

## model 2 - 2 layer CNN dense

In [14]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Dropout, Flatten, Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

# --- Model 2 Parameters ---
input_shape = (1000, 4)
learning_rate = 0.001
loss_fn = 'binary_crossentropy'
metrics = ['accuracy']

# --- Build Model 2 ---
model_2 = Sequential([
    Conv1D(64, 5, activation='relu', input_shape=input_shape),
    MaxPooling1D(2),
    Dropout(0.3),

    Conv1D(128, 5, activation='relu'),
    MaxPooling1D(2),
    Dropout(0.3),

    Flatten(),
    Dense(64, activation='relu'),
    Dropout(0.3),
    Dense(32, activation='relu'),
    Dropout(0.3),
    Dense(1, activation='sigmoid')
])

# --- Compile ---
model_2.compile(
    optimizer=Adam(learning_rate),
    loss=loss_fn,
    metrics=metrics
)

# --- Callbacks ---
checkpoint = ModelCheckpoint('best_model2.keras', monitor='val_loss', save_best_only=True, verbose=1)
early_stop = EarlyStopping(monitor='val_loss', patience=8, restore_best_weights=True)

model_2.summary()
# --- Train Model 2 ---
print("\n--- Training Model 2 ---")
history_2 = model_2.fit(
    X_train, y_train,
    batch_size=64,
    epochs=100,
    validation_split=0.2,
    callbacks=[checkpoint, early_stop],
    verbose=2
)

# --- Evaluate Model 2 ---
print("\n--- Evaluating Model 2 ---")
test_loss_2, test_acc_2 = model_2.evaluate(X_test, y_test, verbose=0)
print(f" Model 2 Test Accuracy: {test_acc_2:.4f}")


--- Training Model 2 ---
Epoch 1/100

Epoch 1: val_loss improved from inf to 0.87846, saving model to best_model2.keras
352/352 - 19s - 54ms/step - accuracy: 0.6225 - loss: 0.6670 - val_accuracy: 0.0000e+00 - val_loss: 0.8785
Epoch 2/100

Epoch 2: val_loss did not improve from 0.87846
352/352 - 19s - 53ms/step - accuracy: 0.6250 - loss: 0.6509 - val_accuracy: 0.0000e+00 - val_loss: 0.9290
Epoch 3/100

Epoch 3: val_loss did not improve from 0.87846
352/352 - 19s - 53ms/step - accuracy: 0.6250 - loss: 0.6398 - val_accuracy: 0.0000e+00 - val_loss: 0.9281
Epoch 4/100

Epoch 4: val_loss improved from 0.87846 to 0.78400, saving model to best_model2.keras
352/352 - 19s - 53ms/step - accuracy: 0.6418 - loss: 0.6199 - val_accuracy: 0.4975 - val_loss: 0.7840
Epoch 5/100

Epoch 5: val_loss improved from 0.78400 to 0.67482, saving model to best_model2.keras
352/352 - 19s - 54ms/step - accuracy: 0.6857 - loss: 0.5854 - val_accuracy: 0.6963 - val_loss: 0.6748
Epoch 6/100

Epoch 6: val_loss improved

## Model 3 - 3CNN  Dense

In [15]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Dropout, Flatten, Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

# --- Model 3 Parameters ---
input_shape = (1000, 4)
learning_rate = 0.001
loss_fn = 'binary_crossentropy'
metrics = ['accuracy']

# --- Build Model 3 ---
model_3 = Sequential([
    Conv1D(32, 5, activation='relu', input_shape=input_shape),
    MaxPooling1D(2),
    Dropout(0.3),

    Conv1D(64, 5, activation='relu'),
    MaxPooling1D(2),
    Dropout(0.3),

    Conv1D(128, 5, activation='relu'),
    MaxPooling1D(2),
    Dropout(0.3),

    Flatten(),
    Dense(64, activation='relu'),
    Dropout(0.3),
    Dense(32, activation='relu'),
    Dropout(0.3),
    Dense(1, activation='sigmoid')
])

# --- Compile ---
model_3.compile(
    optimizer=Adam(learning_rate),
    loss=loss_fn,
    metrics=metrics
)

# --- Callbacks ---
checkpoint = ModelCheckpoint('best_model3.keras', monitor='val_loss', save_best_only=True, verbose=1)
early_stop = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

model_3.summary()
# --- Train Model 3 ---
print("\n--- Training Model 3 ---")
history_3 = model_3.fit(
    X_train, y_train,
    batch_size=64,
    epochs=100,
    validation_split=0.2,
    callbacks=[checkpoint, early_stop],
    verbose=2
)

# --- Evaluate Model 3 ---
print("\n--- Evaluating Model 3 ---")
test_loss_3, test_acc_3 = model_3.evaluate(X_test, y_test, verbose=0)
print(f" Model 3 Test Accuracy: {test_acc_3:.4f}")


--- Training Model 3 ---
Epoch 1/100

Epoch 1: val_loss improved from inf to 0.83860, saving model to best_model3.keras
352/352 - 16s - 45ms/step - accuracy: 0.6222 - loss: 0.6654 - val_accuracy: 0.0000e+00 - val_loss: 0.8386
Epoch 2/100

Epoch 2: val_loss did not improve from 0.83860
352/352 - 15s - 42ms/step - accuracy: 0.6249 - loss: 0.6528 - val_accuracy: 0.0000e+00 - val_loss: 0.9052
Epoch 3/100

Epoch 3: val_loss improved from 0.83860 to 0.79645, saving model to best_model3.keras
352/352 - 14s - 41ms/step - accuracy: 0.6286 - loss: 0.6393 - val_accuracy: 0.1990 - val_loss: 0.7965
Epoch 4/100

Epoch 4: val_loss did not improve from 0.79645
352/352 - 14s - 41ms/step - accuracy: 0.6386 - loss: 0.6303 - val_accuracy: 0.1324 - val_loss: 0.8800
Epoch 5/100

Epoch 5: val_loss did not improve from 0.79645
352/352 - 15s - 41ms/step - accuracy: 0.6445 - loss: 0.6218 - val_accuracy: 0.3026 - val_loss: 0.8967
Epoch 6/100

Epoch 6: val_loss improved from 0.79645 to 0.77186, saving model to b

## Model 4 - 1 CNN

In [16]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, Dropout, MaxPooling1D, Flatten, Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

# --- Model 4 Parameters ---
input_shape = (1000, 4)
learning_rate = 0.001
loss_fn = 'binary_crossentropy'
metrics = ['accuracy']

# --- Build Model 4 
model_4 = Sequential([
    Conv1D(64, 11, activation='relu', padding='same', input_shape=input_shape),
    Dropout(0.4),

    MaxPooling1D(pool_size=2),
    Dropout(0.3),

    Flatten(),
    Dense(1, activation='sigmoid')
])

# --- Compile ---
model_4.compile(
    optimizer=Adam(learning_rate),
    loss=loss_fn,
    metrics=metrics
)

# --- Callbacks ---
checkpoint = ModelCheckpoint('best_model4.keras', monitor='val_loss', save_best_only=True, verbose=1)
early_stop = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

model_4.summary()

# --- Train Model 4 ---
print("\n--- Training Model 4 ---")
history_4 = model_4.fit(
    X_train, y_train,
    batch_size=64,
    epochs=100,
    validation_split=0.2,
    callbacks=[checkpoint, early_stop],
    verbose=2
)

# --- Evaluate Model 4 ---
print("\n--- Evaluating Model 4 ---")
test_loss_4, test_acc_4 = model_4.evaluate(X_test, y_test, verbose=0)
print(f"Model 4 Test Accuracy: {test_acc_4:.4f}")


--- Training Model 4 ---
Epoch 1/100

Epoch 1: val_loss improved from inf to 0.99358, saving model to best_model4.keras
352/352 - 7s - 20ms/step - accuracy: 0.6194 - loss: 0.6598 - val_accuracy: 0.0012 - val_loss: 0.9936
Epoch 2/100

Epoch 2: val_loss improved from 0.99358 to 0.72015, saving model to best_model4.keras
352/352 - 7s - 19ms/step - accuracy: 0.6568 - loss: 0.6167 - val_accuracy: 0.5002 - val_loss: 0.7202
Epoch 3/100

Epoch 3: val_loss did not improve from 0.72015
352/352 - 7s - 19ms/step - accuracy: 0.7135 - loss: 0.5605 - val_accuracy: 0.4080 - val_loss: 0.8158
Epoch 4/100

Epoch 4: val_loss did not improve from 0.72015
352/352 - 7s - 19ms/step - accuracy: 0.7454 - loss: 0.5148 - val_accuracy: 0.3074 - val_loss: 0.9907
Epoch 5/100

Epoch 5: val_loss did not improve from 0.72015
352/352 - 7s - 19ms/step - accuracy: 0.7679 - loss: 0.4811 - val_accuracy: 0.3442 - val_loss: 1.0047
Epoch 6/100

Epoch 6: val_loss did not improve from 0.72015
352/352 - 7s - 19ms/step - accuracy

## Model 5 - 2 CNN

In [17]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, Dropout, MaxPooling1D, Flatten, Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

# --- Model 5 Parameters ---
input_shape = (1000, 4)
learning_rate = 0.001
loss_fn = 'binary_crossentropy'
metrics = ['accuracy']

# --- Build Model 5 
model_5 = Sequential([
    Conv1D(64, 11, activation='relu', padding='same', input_shape=input_shape),
    Dropout(0.3),

    Conv1D(32, 7, activation='relu', padding='same'),
    Dropout(0.3),

    MaxPooling1D(pool_size=2),
    Dropout(0.3),

    Flatten(),
    Dense(1, activation='sigmoid')
])

# --- Compile ---
model_5.compile(
    optimizer=Adam(learning_rate),
    loss=loss_fn,
    metrics=metrics
)

# --- Callbacks ---
checkpoint = ModelCheckpoint('best_model5.keras', monitor='val_loss', save_best_only=True, verbose=1)
early_stop = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

model_5.summary()

# --- Train Model 5 ---
print("\n--- Training Model 5 ---")
history_5 = model_5.fit(
    X_train, y_train,
    batch_size=64,
    epochs=100,
    validation_split=0.2,
    callbacks=[checkpoint, early_stop],
    verbose=2
)

# --- Evaluate Model 5 ---
print("\n--- Evaluating Model 5 ---")
test_loss_5, test_acc_5 = model_5.evaluate(X_test, y_test, verbose=0)
print(f" Model 5 Test Accuracy: {test_acc_5:.4f}")


--- Training Model 5 ---
Epoch 1/100

Epoch 1: val_loss improved from inf to 0.81259, saving model to best_model5.keras
352/352 - 18s - 52ms/step - accuracy: 0.6236 - loss: 0.6610 - val_accuracy: 0.0046 - val_loss: 0.8126
Epoch 2/100

Epoch 2: val_loss did not improve from 0.81259
352/352 - 18s - 50ms/step - accuracy: 0.6242 - loss: 0.6472 - val_accuracy: 0.0021 - val_loss: 0.9487
Epoch 3/100

Epoch 3: val_loss improved from 0.81259 to 0.76536, saving model to best_model5.keras
352/352 - 18s - 51ms/step - accuracy: 0.6498 - loss: 0.6246 - val_accuracy: 0.4359 - val_loss: 0.7654
Epoch 4/100

Epoch 4: val_loss did not improve from 0.76536
352/352 - 18s - 50ms/step - accuracy: 0.6869 - loss: 0.5864 - val_accuracy: 0.3390 - val_loss: 0.8641
Epoch 5/100

Epoch 5: val_loss did not improve from 0.76536
352/352 - 18s - 50ms/step - accuracy: 0.7142 - loss: 0.5588 - val_accuracy: 0.2521 - val_loss: 1.0023
Epoch 6/100

Epoch 6: val_loss did not improve from 0.76536
352/352 - 18s - 50ms/step - ac

## Model 6 - 3 CNN

In [18]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, Dropout, MaxPooling1D, Flatten, Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

# --- Model 6 Parameters ---
input_shape = (1000, 4)
learning_rate = 0.001
loss_fn = 'binary_crossentropy'
metrics = ['accuracy']

# --- Build Model 6 
model_6 = Sequential([
    Conv1D(32, 11, activation='relu', input_shape=input_shape),  
    Dropout(0.3),

    Conv1D(64, 7, activation='relu'),  
    Dropout(0.3),

    Conv1D(32, 5, activation='relu'),  
    Dropout(0.3),

    MaxPooling1D(pool_size=2),
    Dropout(0.3),

    Flatten(),
    Dense(1, activation='sigmoid')
])

# --- Compile ---
model_6.compile(
    optimizer=Adam(learning_rate),
    loss=loss_fn,
    metrics=metrics
)

# --- Callbacks ---
checkpoint = ModelCheckpoint('best_model6.keras', monitor='val_loss', save_best_only=True, verbose=1)
early_stop = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

model_6.summary()
# --- Train Model 6 ---
print("\n--- Training Model 6 ---")
history_6 = model_6.fit(
    X_train, y_train,
    batch_size=64,
    epochs=100,
    validation_split=0.2,
    callbacks=[checkpoint, early_stop],
    verbose=2
)

# --- Evaluate Model 6 ---
print("\n--- Evaluating Model 6 ---")
test_loss_6, test_acc_6 = model_6.evaluate(X_test, y_test, verbose=0)
print(f"Model 6 Test Accuracy: {test_acc_6:.4f}")


--- Training Model 6 ---
Epoch 1/100

Epoch 1: val_loss improved from inf to 0.78335, saving model to best_model6.keras
352/352 - 26s - 73ms/step - accuracy: 0.6236 - loss: 0.6617 - val_accuracy: 5.3305e-04 - val_loss: 0.7833
Epoch 2/100

Epoch 2: val_loss did not improve from 0.78335
352/352 - 24s - 69ms/step - accuracy: 0.6242 - loss: 0.6503 - val_accuracy: 0.0014 - val_loss: 0.8888
Epoch 3/100

Epoch 3: val_loss did not improve from 0.78335
352/352 - 24s - 69ms/step - accuracy: 0.6335 - loss: 0.6369 - val_accuracy: 0.0290 - val_loss: 0.8821
Epoch 4/100

Epoch 4: val_loss did not improve from 0.78335
352/352 - 24s - 68ms/step - accuracy: 0.6571 - loss: 0.6198 - val_accuracy: 0.2683 - val_loss: 0.9220
Epoch 5/100

Epoch 5: val_loss improved from 0.78335 to 0.71870, saving model to best_model6.keras
352/352 - 24s - 68ms/step - accuracy: 0.6837 - loss: 0.5910 - val_accuracy: 0.5400 - val_loss: 0.7187
Epoch 6/100

Epoch 6: val_loss improved from 0.71870 to 0.66202, saving model to best_

## MOdel 7 - 1 CNN LSTM

In [19]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Dropout, Bidirectional, LSTM, Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

# --- Model 7 (Fast) ---
input_shape = (1000, 4)
learning_rate = 0.001
loss_fn = 'binary_crossentropy'
metrics = ['accuracy']

model_7 = Sequential([
    Conv1D(32, 7, activation='relu', input_shape=input_shape),
    MaxPooling1D(pool_size=2),
    Dropout(0.3),

    Bidirectional(LSTM(16, return_sequences=False)),
    Dropout(0.3),

    Dense(1, activation='sigmoid')
])

model_7.compile(
    optimizer=Adam(learning_rate),
    loss=loss_fn,
    metrics=metrics
)

checkpoint = ModelCheckpoint('best_model7.keras', monitor='val_loss', save_best_only=True, verbose=1)
early_stop = EarlyStopping(monitor='val_loss', patience=8, restore_best_weights=True)
model_7.summary()
print("\n--- Training Model 7 ---")
history_7 = model_7.fit(
    X_train, y_train,
    batch_size=64,
    epochs=100,
    validation_split=0.2,
    callbacks=[checkpoint, early_stop],
    verbose=2
)

print("\n--- Evaluating Model 7 ---")
test_loss_7, test_acc_7 = model_7.evaluate(X_test, y_test, verbose=0)
print(f"Model 7 Test Accuracy : {test_acc_7:.4f}")


--- Training Model 7 ---
Epoch 1/100

Epoch 1: val_loss improved from inf to 0.97886, saving model to best_model7.keras
352/352 - 32s - 90ms/step - accuracy: 0.6233 - loss: 0.6638 - val_accuracy: 0.0000e+00 - val_loss: 0.9789
Epoch 2/100

Epoch 2: val_loss improved from 0.97886 to 0.97770, saving model to best_model7.keras
352/352 - 34s - 96ms/step - accuracy: 0.6251 - loss: 0.6611 - val_accuracy: 0.0000e+00 - val_loss: 0.9777
Epoch 3/100

Epoch 3: val_loss improved from 0.97770 to 0.95118, saving model to best_model7.keras
352/352 - 33s - 94ms/step - accuracy: 0.6249 - loss: 0.6600 - val_accuracy: 0.0000e+00 - val_loss: 0.9512
Epoch 4/100

Epoch 4: val_loss improved from 0.95118 to 0.93085, saving model to best_model7.keras
352/352 - 33s - 93ms/step - accuracy: 0.6254 - loss: 0.6583 - val_accuracy: 0.0073 - val_loss: 0.9309
Epoch 5/100

Epoch 5: val_loss did not improve from 0.93085
352/352 - 33s - 93ms/step - accuracy: 0.6284 - loss: 0.6540 - val_accuracy: 0.0000e+00 - val_loss: 1.0