In [1]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import os

In [2]:
X_train = np.load("../data/splits/X_train.npy")
y_train = np.load("../data/splits/y_train.npy")

X_val = np.load("../data/splits/X_val.npy")
y_val = np.load("../data/splits/y_val.npy")

# Convert to tensors
X_train_t = torch.tensor(X_train, dtype=torch.float32)
y_train_t = torch.tensor(y_train, dtype=torch.float32)

X_val_t = torch.tensor(X_val, dtype=torch.float32)
y_val_t = torch.tensor(y_val, dtype=torch.float32)

X_train_t.shape, y_train_t.shape, X_val_t.shape, y_val_t.shape

(torch.Size([700, 20, 4]),
 torch.Size([700]),
 torch.Size([150, 20, 4]),
 torch.Size([150]))

In [3]:
class CrisprDataset(Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = y

    def __len__(self):
        return len(self.y)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

train_dataset = CrisprDataset(X_train_t, y_train_t)
val_dataset = CrisprDataset(X_val_t, y_val_t)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

In [6]:
# CNN Model
class CrisprCNN(nn.Module):
    def __init__(self):
        super(CrisprCNN, self).__init__()
        self.conv1 = nn.Conv1d(4, 32, 3)
        self.conv2 = nn.Conv1d(32, 64, 3)
        self.dropout = nn.Dropout(0.3)
        self.fc1 = nn.Linear(64 * 16, 64)
        self.fc2 = nn.Linear(64, 1)

    def forward(self, x):
        x = x.permute(0, 2, 1)
        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))
        x = torch.flatten(x, start_dim=1)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

model = CrisprCNN()
model

CrisprCNN(
  (conv1): Conv1d(4, 32, kernel_size=(3,), stride=(1,))
  (conv2): Conv1d(32, 64, kernel_size=(3,), stride=(1,))
  (dropout): Dropout(p=0.3, inplace=False)
  (fc1): Linear(in_features=1024, out_features=64, bias=True)
  (fc2): Linear(in_features=64, out_features=1, bias=True)
)

In [8]:
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr = 0.001)

In [9]:
num_epochs = 20

In [10]:
train_losses = []
val_losses = []

In [14]:
for epoch in range(num_epochs):
    # TRAINING
    model.train()
    running_train_loss = 0.0

    for X_batch, y_batch in train_loader:
        optimizer.zero_grad()
        outputs = model(X_batch).squeeze()
        loss = criterion(outputs, y_batch)
        loss.backward()
        optimizer.step()

        running_train_loss += loss.item()

    avg_train_loss = running_train_loss / len(train_loader)
    train_losses.append(avg_train_loss)

    # VALIDATION
    model.eval()
    running_val_loss = 0.0

    with torch.no_grad():
        for X_batch, y_batch in val_loader:
            outputs = model(X_batch).squeeze()
            loss = criterion(outputs, y_batch)
            running_val_loss += loss.item()

    avg_val_loss = running_val_loss / len(val_loader)
    val_losses.append(avg_val_loss)

    print(f"Epoch [{epoch+1}/{num_epochs}] | Train: {avg_train_loss:.5f} | Val: {avg_val_loss:.5f}")

Epoch [1/20] | Train: 0.06697 | Val: 0.06741
Epoch [2/20] | Train: 0.06284 | Val: 0.06898
Epoch [3/20] | Train: 0.06171 | Val: 0.06972
Epoch [4/20] | Train: 0.05933 | Val: 0.07242
Epoch [5/20] | Train: 0.05742 | Val: 0.07684
Epoch [6/20] | Train: 0.05431 | Val: 0.07901
Epoch [7/20] | Train: 0.05084 | Val: 0.07478
Epoch [8/20] | Train: 0.04880 | Val: 0.07810
Epoch [9/20] | Train: 0.04398 | Val: 0.07814
Epoch [10/20] | Train: 0.03959 | Val: 0.08322
Epoch [11/20] | Train: 0.03630 | Val: 0.08495
Epoch [12/20] | Train: 0.03409 | Val: 0.08569
Epoch [13/20] | Train: 0.03065 | Val: 0.09300
Epoch [14/20] | Train: 0.02734 | Val: 0.10142
Epoch [15/20] | Train: 0.02730 | Val: 0.10280
Epoch [16/20] | Train: 0.02522 | Val: 0.09388
Epoch [17/20] | Train: 0.02202 | Val: 0.09663
Epoch [18/20] | Train: 0.01792 | Val: 0.09877
Epoch [19/20] | Train: 0.01605 | Val: 0.11171
Epoch [20/20] | Train: 0.01559 | Val: 0.10132


In [15]:
os.makedirs("../models", exist_ok=True)
torch.save(model.state_dict(), "../models/crispr_cnn_trained_day6.pth")

print("Trained model saved.")

Trained model saved.


In [17]:
len(train_losses), len(val_losses)

(21, 20)

In [18]:
min_len = min(len(train_losses), len(val_losses))

train_losses = train_losses[:min_len]
val_losses   = val_losses[:min_len]

In [19]:
os.makedirs("../outputs", exist_ok=True)

loss_log = np.column_stack([train_losses, val_losses])

np.savetxt(
    "../outputs/loss_log_day6.csv",
    loss_log,
    delimiter=",",
    header="train_loss,val_loss",
    comments=""
)

print("Loss log saved")

Loss log saved
