<a href="https://colab.research.google.com/github/emraankazmi5-coder/deeplearning/blob/main/Homework_4.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**1. Setup: imports and device info**

In [2]:
import time
import numpy as np
import tensorflow as tf

from tensorflow import keras
from tensorflow.keras import layers
from sklearn.model_selection import train_test_split

print("TensorFlow:", tf.__version__)
print("GPUs:", tf.config.list_physical_devices('GPU'))


TensorFlow: 2.19.0
GPUs: []


**2. Download SpliceFinder data**

In [3]:
# Labels
!wget --no-check-certificate \
  'https://drive.google.com/uc?export=download&id=1QbOSExVJEbPMhjzaua5n2eIXeF3qELQ7' \
  -O "label.txt"

# Encoded sequences
!pip install -q gdown
!gdown --id '1Sh2ce0jo5FVGNsSa9fqLjqcAOWQBFhzz' -O "encoded_seq.txt"


--2026-02-10 01:22:39--  https://drive.google.com/uc?export=download&id=1QbOSExVJEbPMhjzaua5n2eIXeF3qELQ7
Resolving drive.google.com (drive.google.com)... 74.125.196.113, 74.125.196.138, 74.125.196.100, ...
Connecting to drive.google.com (drive.google.com)|74.125.196.113|:443... connected.
HTTP request sent, awaiting response... 303 See Other
Location: https://drive.usercontent.google.com/download?id=1QbOSExVJEbPMhjzaua5n2eIXeF3qELQ7&export=download [following]
--2026-02-10 01:22:40--  https://drive.usercontent.google.com/download?id=1QbOSExVJEbPMhjzaua5n2eIXeF3qELQ7&export=download
Resolving drive.usercontent.google.com (drive.usercontent.google.com)... 108.177.11.132, 2607:f8b0:400c:c01::84
Connecting to drive.usercontent.google.com (drive.usercontent.google.com)|108.177.11.132|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 60000 (59K) [application/octet-stream]
Saving to: ‘label.txt’


2026-02-10 01:22:41 (144 MB/s) - ‘label.txt’ saved [60000/60000]

Downl

**3. Load and format the data**

In [4]:
from sklearn.model_selection import train_test_split

Length = 400  # window length

def load_data():
    labels = np.loadtxt('label.txt')
    encoded_seq = np.loadtxt('encoded_seq.txt')

    # Choose a central window of length 2*Length in the original 1600‑dim vector
    start = (400 - Length) * 2
    end   = 1600 - (400 - Length) * 2
    encoded_seq_choose = encoded_seq[:, start:end]

    print("Chosen encoded shape:", encoded_seq_choose.shape)

    x_train, x_test, y_train, y_test = train_test_split(
        encoded_seq_choose, labels, test_size=0.2, random_state=42, stratify=labels
    )

    return np.array(x_train), np.array(y_train), np.array(x_test), np.array(y_test)

X_train, y_train, X_test, y_test = load_data()


Chosen encoded shape: (30000, 1600)


In [5]:
# Reshape to (samples, seq_len, channels=4)
n_samples, total_features = X_train.shape
seq_len = int(total_features / 4)
n_channels = 4

X_train = X_train.reshape(-1, seq_len, n_channels)
X_test  = X_test.reshape(-1, seq_len, n_channels)

n_classes = len(np.unique(y_train))
print("Train shape:", X_train.shape)
print("Test shape:", X_test.shape)
print("Classes:", n_classes)


Train shape: (24000, 400, 4)
Test shape: (6000, 400, 4)
Classes: 3


**4. Build a SpliceFinder‑style 1D CNN**

In [6]:
def build_splice_model():
    model = keras.Sequential([
        layers.Input(shape=(seq_len, n_channels)),
        layers.Conv1D(50, kernel_size=7, activation='relu', padding='same'),
        layers.MaxPooling1D(pool_size=2),
        layers.Conv1D(50, kernel_size=7, activation='relu', padding='same'),
        layers.MaxPooling1D(pool_size=2),
        layers.Flatten(),
        layers.Dense(100, activation='relu'),
        layers.Dropout(0.3),
        layers.Dense(n_classes, activation='softmax')
    ])
    model.compile(
        optimizer='adam',
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy']
    )
    return model


**5. Early stopping callback (patience = 3)**

In [7]:
early_stop = keras.callbacks.EarlyStopping(
    monitor='val_loss',
    patience=3,
    restore_best_weights=True
)


**6. Train on CPU **

In [9]:
print("\n" + "="*60)
print("TRAINING ON CPU")
print("="*60)

tf.keras.backend.clear_session()

with tf.device('/CPU:0'):
    model_cpu = build_splice_model()
    t0 = time.time()
    history_cpu = model_cpu.fit(
        X_train, y_train,
        validation_split=0.2,
        epochs=50,          # ceiling; early stopping will stop earlier
        batch_size=32,
        callbacks=[early_stop],
        verbose=1
    )
    cpu_time = time.time() - t0

cpu_epochs = len(history_cpu.history['loss'])



TRAINING ON CPU
Epoch 1/50
[1m600/600[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 47ms/step - accuracy: 0.7495 - loss: 0.5495 - val_accuracy: 0.9648 - val_loss: 0.1222
Epoch 2/50
[1m600/600[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 45ms/step - accuracy: 0.9749 - loss: 0.0915 - val_accuracy: 0.9665 - val_loss: 0.1141
Epoch 3/50
[1m600/600[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 46ms/step - accuracy: 0.9825 - loss: 0.0591 - val_accuracy: 0.9692 - val_loss: 0.1285
Epoch 4/50
[1m600/600[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 45ms/step - accuracy: 0.9871 - loss: 0.0428 - val_accuracy: 0.9677 - val_loss: 0.1222
Epoch 5/50
[1m600/600[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 46ms/step - accuracy: 0.9919 - loss: 0.0288 - val_accuracy: 0.9688 - val_loss: 0.1460


**7. Train on GPU**

In [7]:
print("\n" + "="*60)
print("TRAINING ON GPU (if available)")
print("="*60)

gpu_time = None
gpu_epochs = None

if tf.config.list_physical_devices('GPU'):
    tf.keras.backend.clear_session()
    with tf.device('/GPU:0'):
        model_gpu = build_splice_model()
        t0 = time.time()
        history_gpu = model_gpu.fit(
            X_train, y_train,
            validation_split=0.2,
            epochs=50,
            batch_size=32,
            callbacks=[early_stop],
            verbose=1
        )
        gpu_time = time.time() - t0
    gpu_epochs = len(history_gpu.history['loss'])
else:
    print("No GPU detected – skipping GPU training.")



TRAINING ON GPU (if available)
Epoch 1/50
[1m600/600[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 6ms/step - accuracy: 0.7383 - loss: 0.5589 - val_accuracy: 0.9679 - val_loss: 0.1151
Epoch 2/50
[1m600/600[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 5ms/step - accuracy: 0.9759 - loss: 0.0843 - val_accuracy: 0.9717 - val_loss: 0.1082
Epoch 3/50
[1m600/600[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.9845 - loss: 0.0565 - val_accuracy: 0.9717 - val_loss: 0.1145
Epoch 4/50
[1m600/600[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.9874 - loss: 0.0388 - val_accuracy: 0.9700 - val_loss: 0.1220
Epoch 5/50
[1m600/600[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.9893 - loss: 0.0325 - val_accuracy: 0.9702 - val_loss: 0.1372
