In [2]:
# Data Loading and Preprocessing
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, LSTM, Dense, Dropout, Flatten, Input
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.optimizers import Adam

print("--- Data Loading and Preprocessing ---")

def one_hot_encode_sequence(sequence, max_len):
    mapping = {
        'A': [1, 0, 0, 0], 'a': [1, 0, 0, 0],
        'T': [0, 1, 0, 0], 't': [0, 1, 0, 0],
        'G': [0, 0, 1, 0], 'g': [0, 0, 1, 0],
        'C': [0, 0, 0, 1], 'c': [0, 0, 0, 1],
        'N': [0, 0, 0, 0], 'n': [0, 0, 0, 0]
    }
    encoded = [mapping.get(char, [0, 0, 0, 0]) for char in sequence]
    encoded = np.array(encoded, dtype=np.float32)
    if len(encoded) < max_len:
        pad = np.zeros((max_len - len(encoded), 4), dtype=np.float32)
        encoded = np.vstack((encoded, pad))
    return encoded[:max_len]

train_df = pd.read_csv('dm3.kc167.tads.train.csv', header=None)
X_train_raw, y_train = train_df[0].values, train_df[1].values
test_df = pd.read_csv('dm3.kc167.tads.test.csv', header=None)
X_test_raw, y_test = test_df[0].values, test_df[1].values

sequence_length = len(X_train_raw[0])
X_train = np.array([one_hot_encode_sequence(seq, sequence_length) for seq in X_train_raw])
X_test = np.array([one_hot_encode_sequence(seq, sequence_length) for seq in X_test_raw])
print("Train shape:", X_train.shape, "Test shape:", X_test.shape)

input_shape = (sequence_length, 4)
NUM_KERNELS_CNN = 64
KERNEL_LENGTH_CNN = 9
LSTM_NEURONS = 30
DROPOUT_RATE = 0.3
LEARNING_RATE = 0.001
EPOCHS = 50
BATCH_SIZE = 32

early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

# Model 1
model1 = Sequential([
    Input(shape=input_shape),
    Conv1D(NUM_KERNELS_CNN, KERNEL_LENGTH_CNN, activation='relu'),
    MaxPooling1D(2), Dropout(DROPOUT_RATE), Flatten(),
    Dense(64, activation='relu'), Dropout(DROPOUT_RATE),
    Dense(1, activation='sigmoid')
])
model1.compile(optimizer=Adam(LEARNING_RATE), loss='binary_crossentropy', metrics=['accuracy'])
model1.fit(X_train, y_train, epochs=EPOCHS, batch_size=BATCH_SIZE, validation_split=0.2, callbacks=[early_stopping], verbose=1)
loss, accuracy = model1.evaluate(X_test, y_test, verbose=0)
print("Model 1 Test Accuracy:", accuracy)

# Model 2
model2 = Sequential([
    Input(shape=input_shape),
    Conv1D(NUM_KERNELS_CNN, KERNEL_LENGTH_CNN, activation='relu'),
    MaxPooling1D(2), Dropout(DROPOUT_RATE),
    Conv1D(NUM_KERNELS_CNN * 2, KERNEL_LENGTH_CNN, activation='relu'),
    MaxPooling1D(2), Dropout(DROPOUT_RATE), Flatten(),
    Dense(128, activation='relu'), Dropout(DROPOUT_RATE),
    Dense(1, activation='sigmoid')
])
model2.compile(optimizer=Adam(LEARNING_RATE), loss='binary_crossentropy', metrics=['accuracy'])
model2.fit(X_train, y_train, epochs=EPOCHS, batch_size=BATCH_SIZE, validation_split=0.2, callbacks=[early_stopping], verbose=1)
loss, accuracy = model2.evaluate(X_test, y_test, verbose=0)
print("Model 2 Test Accuracy:", accuracy)

# Model 3
model3 = Sequential([
    Input(shape=input_shape),
    LSTM(LSTM_NEURONS), Dropout(DROPOUT_RATE),
    Dense(64, activation='relu'), Dropout(DROPOUT_RATE),
    Dense(1, activation='sigmoid')
])
model3.compile(optimizer=Adam(LEARNING_RATE), loss='binary_crossentropy', metrics=['accuracy'])
model3.fit(X_train, y_train, epochs=EPOCHS, batch_size=BATCH_SIZE, validation_split=0.2, callbacks=[early_stopping], verbose=1)
loss, accuracy = model3.evaluate(X_test, y_test, verbose=0)
print("Model 3 Test Accuracy:", accuracy)

# Model 4
model4 = Sequential([
    Input(shape=input_shape),
    LSTM(LSTM_NEURONS, return_sequences=True), Dropout(DROPOUT_RATE),
    LSTM(LSTM_NEURONS), Dropout(DROPOUT_RATE),
    Dense(128, activation='relu'), Dropout(DROPOUT_RATE),
    Dense(1, activation='sigmoid')
])
model4.compile(optimizer=Adam(LEARNING_RATE), loss='binary_crossentropy', metrics=['accuracy'])
model4.fit(X_train, y_train, epochs=EPOCHS, batch_size=BATCH_SIZE, validation_split=0.2, callbacks=[early_stopping], verbose=1)
loss, accuracy = model4.evaluate(X_test, y_test, verbose=0)
print("Model 4 Test Accuracy:", accuracy)

# Model 5
model5 = Sequential([
    Input(shape=input_shape),
    Conv1D(NUM_KERNELS_CNN, KERNEL_LENGTH_CNN, activation='relu'),
    MaxPooling1D(2), Dropout(DROPOUT_RATE),
    LSTM(LSTM_NEURONS), Dropout(DROPOUT_RATE),
    Dense(64, activation='relu'), Dropout(DROPOUT_RATE),
    Dense(1, activation='sigmoid')
])
model5.compile(optimizer=Adam(LEARNING_RATE), loss='binary_crossentropy', metrics=['accuracy'])
model5.fit(X_train, y_train, epochs=EPOCHS, batch_size=BATCH_SIZE, validation_split=0.2, callbacks=[early_stopping], verbose=1)
loss, accuracy = model5.evaluate(X_test, y_test, verbose=0)
print("Model 5 Test Accuracy:", accuracy)

# Model 6
model6 = Sequential([
    Input(shape=input_shape),
    Conv1D(NUM_KERNELS_CNN, KERNEL_LENGTH_CNN, activation='relu'),
    MaxPooling1D(2), Dropout(DROPOUT_RATE), Flatten(),
    Dense(128, activation='relu'), Dropout(DROPOUT_RATE),
    Dense(64, activation='relu'), Dropout(DROPOUT_RATE),
    Dense(1, activation='sigmoid')
])
model6.compile(optimizer=Adam(LEARNING_RATE), loss='binary_crossentropy', metrics=['accuracy'])
model6.fit(X_train, y_train, epochs=EPOCHS, batch_size=BATCH_SIZE, validation_split=0.2, callbacks=[early_stopping], verbose=1)
loss, accuracy = model6.evaluate(X_test, y_test, verbose=0)
print("Model 6 Test Accuracy:", accuracy)

# Model 7
model7 = Sequential([
    Input(shape=input_shape),
    LSTM(LSTM_NEURONS), Dropout(DROPOUT_RATE),
    Dense(128, activation='relu'), Dropout(DROPOUT_RATE),
    Dense(64, activation='relu'), Dropout(DROPOUT_RATE),
    Dense(1, activation='sigmoid')
])
model7.compile(optimizer=Adam(LEARNING_RATE), loss='binary_crossentropy', metrics=['accuracy'])
model7.fit(X_train, y_train, epochs=EPOCHS, batch_size=BATCH_SIZE, validation_split=0.2, callbacks=[early_stopping], verbose=1)
loss, accuracy = model7.evaluate(X_test, y_test, verbose=0)
print("Model 7 Test Accuracy:", accuracy)

--- Data Loading and Preprocessing ---
Train shape: (28140, 1000, 4) Test shape: (2000, 1000, 4)
Epoch 1/50
[1m704/704[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 14ms/step - accuracy: 0.6042 - loss: 0.7260 - val_accuracy: 0.0000e+00 - val_loss: 0.8707
Epoch 2/50
[1m704/704[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 14ms/step - accuracy: 0.6296 - loss: 0.6364 - val_accuracy: 0.0000e+00 - val_loss: 1.1545
Epoch 3/50
[1m704/704[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 14ms/step - accuracy: 0.6423 - loss: 0.5801 - val_accuracy: 0.5210 - val_loss: 0.8068
Epoch 4/50
[1m704/704[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 14ms/step - accuracy: 0.7476 - loss: 0.5160 - val_accuracy: 0.5601 - val_loss: 0.8458
Epoch 5/50
[1m704/704[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 14ms/step - accuracy: 0.7900 - loss: 0.4395 - val_accuracy: 0.4805 - val_loss: 1.0483
Epoch 6/50
[1m704/704[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[

KeyboardInterrupt: 