# HIC similarity with CNN

In [1]:
import torch

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from data_proc import HICPairsBioReplicatesDataset

dataset = HICPairsBioReplicatesDataset(
    "../data/hic_dataset",
    [("GSM1551552_HIC003", "GSM1551554_HIC005")],
    [("GSM1551552_HIC003", "GSM1551569_HIC020")],
    None,
)

Building positive image pairs
Building image pairs for GSM1551552_HIC003 and GSM1551554_HIC005


100%|██████████| 25/25 [00:00<00:00, 98.04it/s] 


Building negative image pairs
Building image pairs for GSM1551552_HIC003 and GSM1551569_HIC020


100%|██████████| 25/25 [00:00<00:00, 92.33it/s] 


In [3]:
from modeling import SiameseNetwork, SiameseNetworkWithoutCNN

# model = SiameseNetwork().to(device)
model = SiameseNetworkWithoutCNN(40*40).to(device)

In [4]:
# Training params
import torch

batch_size = 400
num_epochs = 10
criterion = torch.nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters())
checkpoint_dir = "../checkpoints"


torch.random.manual_seed(0)
train_data, test_data = torch.utils.data.random_split(dataset, [len(dataset) - 5000, 5000])
train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=batch_size, shuffle=True)

In [5]:
def train_once(model, train_loader, criterion, optimizer):
    print("Training model...")
    model.train()

    running_loss = 0.0
    for i,batch in enumerate(train_loader):
        optimizer.zero_grad()
        input1, input2, label = batch["input1"], batch["input2"], batch["label"]
        output = model(input1.to(device), input2.to(device))
        loss = criterion(output, label.to(device))
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        print(f"Batch: {i + 1}/{len(train_loader)}, Loss: {running_loss / (i + 1)}", end="\r")

    return running_loss / len(train_loader)

def eval_once(model, test_data, criteria):
    print("Evaluating model...")
    model.eval()

    metrics = {name: 0.0 for name, _ in criteria}
    with torch.no_grad():
        for i,batch in enumerate(test_data):
            input1, input2, label = batch["input1"], batch["input2"], batch["label"]
            output = model(input1.to(device), input2.to(device))

            for name, criterion in criteria:
                metric_value = criterion(output.squeeze(1).cpu(), label.squeeze(1).cpu())
                metrics[name] += metric_value
            print(f"Batch: {i + 1}/{len(test_loader)}", end="\r")

    return {name: metric_value / len(test_data) for name, metric_value in metrics.items()}
    

In [6]:
from sklearn.metrics import accuracy_score
from datetime import datetime
from pathlib import Path
import json

# Training loop
history = {
    "train_loss": [],
    "test_loss": [],
    "test_accuracy": [],
}
eval_metrics = [("loss", lambda x,y: criterion(x,y).item()), ("accuracy", lambda x, y: accuracy_score((x > 0.5).int(), y))]
save_dir = Path(f"../checkpoints/{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}")
save_dir.mkdir(parents=True)

json.dump({"batch_size": batch_size, "num_epochs": num_epochs, "criterion": str(criterion), "optimizer": str(optimizer), "model": str(type(model))}, open(f"{save_dir}/params.json", "w"), indent=4)


for epoch in range(num_epochs):
    print(f"Epoch: {epoch + 1}")
    
    history["train_loss"].append(train_once(model, train_loader, criterion, optimizer))
    epoch_val_metrics = eval_once(model, test_loader, eval_metrics)
    for name, value in epoch_val_metrics.items():
        history[f"test_{name}"].append(value)
    
    checkpoint_path = f"{save_dir}/epoch_{epoch + 1}.pt"
    torch.save(model.state_dict(), checkpoint_path)

    print(f"Train loss: {history['train_loss'][-1]:.4f}, Test loss: {epoch_val_metrics['loss']:.4f} Test accuracy: {epoch_val_metrics['accuracy']:.4f}")
torch.save(model.state_dict(), f"{save_dir}/final.pt")
json.dump(history, open(f"{save_dir}/history.json", "w"), indent=4)

Epoch: 1
Training model...
Evaluating model... 0.22944537575046223
Train loss: 0.2294, Test loss: 0.0721 Test accuracy: 0.9744
Epoch: 2
Training model...
Evaluating model... 0.06387675669975579
Train loss: 0.0639, Test loss: 0.0581 Test accuracy: 0.9785
Epoch: 3
Training model...
Evaluating model... 0.048279070667922495
Train loss: 0.0483, Test loss: 0.0550 Test accuracy: 0.9775
Epoch: 4
Training model...
Evaluating model... 0.037957294882896044
Train loss: 0.0380, Test loss: 0.0491 Test accuracy: 0.9800
Epoch: 5
Training model...
Evaluating model... 0.033697292829553284
Train loss: 0.0337, Test loss: 0.0555 Test accuracy: 0.9754
Epoch: 6
Training model...
Evaluating model... 0.028678430772076055
Train loss: 0.0287, Test loss: 0.0536 Test accuracy: 0.9771
Epoch: 7
Training model...
Evaluating model... 0.026637696944332373
Train loss: 0.0266, Test loss: 0.0599 Test accuracy: 0.9754
Epoch: 8
Training model...
Evaluating model... 0.020683017931878565
Train loss: 0.0207, Test loss: 0.0593 