In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import sys
import re
from scipy import stats
import warnings
warnings.filterwarnings("ignore")
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader
from captum.attr import IntegratedGradients
from sklearn.model_selection import KFold

In [3]:
input1 = torch.load('./test1_ae_cnn/input1.pt')
input2 = torch.load('./test1_ae_cnn/padded_latent_features.pt')
input2 = torch.stack(input2)
input2 = input2.unsqueeze(1)
labels = torch.load('./test1_ae_cnn/input_labels.pt')

In [4]:
class InputsDataset(Dataset):
    def __init__(self, input1, input2, labels):
        self.input1 = input1
        self.input2 = input2
        self.labels = labels

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        sample = {
            'input1': self.input1[idx],
            'input2': self.input2[idx],
            'label': self.labels[idx]
        }
        return sample

In [5]:
class TwoBranchCNNRegressor(nn.Module):
    def __init__(self, input_channels1, input_channels2, output_size):
        super(TwoBranchCNNRegressor, self).__init__()

        self.branch1 = nn.Sequential(
            nn.Conv2d(in_channels=input_channels1, out_channels=32, kernel_size=4, padding=(4, 4)),
            nn.ReLU(),
            nn.BatchNorm2d(32),
            nn.MaxPool2d(kernel_size=4),
            nn.Conv2d(in_channels=32, out_channels=64, kernel_size=4, padding=(4, 4)),
            nn.ReLU(),
            nn.BatchNorm2d(64),
            nn.MaxPool2d(kernel_size=4),
            nn.Conv2d(in_channels=64, out_channels=128, kernel_size=4, padding=(4, 4)),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=4),
            nn.Dropout(0.5)
        )
        
        self.branch2 = nn.Sequential(
            nn.Conv2d(in_channels=input_channels2, out_channels=16, kernel_size=(5, 5), padding=2),
            nn.ReLU(),
            nn.BatchNorm2d(16),
            nn.MaxPool2d(kernel_size=(3, 3), stride=2),  
            nn.Conv2d(in_channels=16, out_channels=32, kernel_size=(5, 5), padding=2),
            nn.ReLU(),
            nn.BatchNorm2d(32),
            nn.MaxPool2d(kernel_size=(3, 3), stride=2),  
            nn.Conv2d(in_channels=32, out_channels=64, kernel_size=(5, 5), padding=2),
            nn.ReLU(),
            nn.BatchNorm2d(64),
            nn.MaxPool2d(kernel_size=(3, 3), stride=2), 
            nn.Dropout(0.6)  
        )

        self.fc_input_size = self.calculate_fc_input_size(input_channels1, 100, 4, input_channels2, 133, 32)
        self.fc = nn.Linear(self.fc_input_size, 64)

        self.output_layer = nn.Linear(64, output_size)

    def forward(self, x1, x2):
        x1 = self.branch1(x1)
    
        x2 = self.branch2(x2)
    
        x1 = x1.view(x1.size(0), -1)
        x2 = x2.view(x2.size(0), -1)
    
        x = torch.cat((x1, x2), dim=1)
        x = self.fc(x)
        output = self.output_layer(x)
    
        return output

    def calculate_fc_input_size(self, in_channels1, height1, width1, in_channels2, height2, width2):
        input_data1 = torch.randn(1, in_channels1, height1, width1)
        input_data2 = torch.randn(1, in_channels2, height2, width2)
    
        output1 = self.branch1(input_data1)
        output2 = self.branch2(input_data2)
    
        output1_flat = output1.numel() / output1.shape[0]  
        output2_flat = output2.numel() / output2.shape[0]  
    
        return int(output1_flat + output2_flat) 

In [6]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = torch.load('./test2_ae_cnn/p04_CNN_model_0.pt')
model.to(device)
model.eval()

TwoBranchCNNRegressor(
  (branch1): Sequential(
    (0): Conv2d(4, 32, kernel_size=(4, 4), stride=(1, 1), padding=(4, 4))
    (1): ReLU()
    (2): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (3): MaxPool2d(kernel_size=4, stride=4, padding=0, dilation=1, ceil_mode=False)
    (4): Conv2d(32, 64, kernel_size=(4, 4), stride=(1, 1), padding=(4, 4))
    (5): ReLU()
    (6): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (7): MaxPool2d(kernel_size=4, stride=4, padding=0, dilation=1, ceil_mode=False)
    (8): Conv2d(64, 128, kernel_size=(4, 4), stride=(1, 1), padding=(4, 4))
    (9): ReLU()
    (10): MaxPool2d(kernel_size=4, stride=4, padding=0, dilation=1, ceil_mode=False)
    (11): Dropout(p=0.5, inplace=False)
  )
  (branch2): Sequential(
    (0): Conv2d(1, 16, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (1): ReLU()
    (2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)


In [8]:
input_dataset = InputsDataset(input1, input2, labels)

In [21]:
model.eval()

input_loader = DataLoader(input_dataset, batch_size=64, shuffle=True, drop_last=True)

N = 0
for batch in input_loader:
    X1 = batch['input1'].to(device)
    X2 = batch['input2'].to(device)
    y_edit = batch['label'].to(device)
    print(f"Shape of X1: {X1.shape}")
    print(f"Shape of X2: {X2.shape}")

    ig = IntegratedGradients(model)
    
    baseline = (torch.zeros_like(X1), torch.zeros_like(X2))
    
    attributions, delta = ig.attribute((X1, X2), baseline, target=0, return_convergence_delta=True)
    
    attributions_np1 = attributions[0].cpu().detach().numpy()
    attributions_np2 = attributions[1].sum(dim=1).cpu().detach().numpy()
    
    data1 = np.mean(attributions_np1, axis=0)
    data2 = np.mean(attributions_np2, axis=0)
    
    np.save('plots/p22_IG_input1_'+str(N)+'.npy', data1)
    np.save('plots/p22_IG_input2_'+str(N)+'.npy', data2)
    
    N = N + 1


Shape of X1: torch.Size([64, 4, 100, 4])
Shape of X2: torch.Size([64, 1, 133, 32])
Shape of X1: torch.Size([64, 4, 100, 4])
Shape of X2: torch.Size([64, 1, 133, 32])
Shape of X1: torch.Size([64, 4, 100, 4])
Shape of X2: torch.Size([64, 1, 133, 32])
Shape of X1: torch.Size([64, 4, 100, 4])
Shape of X2: torch.Size([64, 1, 133, 32])
Shape of X1: torch.Size([64, 4, 100, 4])
Shape of X2: torch.Size([64, 1, 133, 32])
Shape of X1: torch.Size([64, 4, 100, 4])
Shape of X2: torch.Size([64, 1, 133, 32])
Shape of X1: torch.Size([64, 4, 100, 4])
Shape of X2: torch.Size([64, 1, 133, 32])
Shape of X1: torch.Size([64, 4, 100, 4])
Shape of X2: torch.Size([64, 1, 133, 32])
Shape of X1: torch.Size([64, 4, 100, 4])
Shape of X2: torch.Size([64, 1, 133, 32])
Shape of X1: torch.Size([64, 4, 100, 4])
Shape of X2: torch.Size([64, 1, 133, 32])
Shape of X1: torch.Size([64, 4, 100, 4])
Shape of X2: torch.Size([64, 1, 133, 32])
Shape of X1: torch.Size([64, 4, 100, 4])
Shape of X2: torch.Size([64, 1, 133, 32])
Shap

KeyboardInterrupt: 

In [9]:
import torch
from scipy.stats import pearsonr, spearmanr
from sklearn.metrics import r2_score
batch_size = 64
test_loader = DataLoader(input_dataset, batch_size=batch_size, shuffle=False, drop_last=True)

criterion = torch.nn.MSELoss()  
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model.to(device)

all_labels = []
all_predictions = []

test_loss = 0
num_batches = 0

with torch.no_grad():
    for data in test_loader:
        input1 = data['input1'].to(device)
        input2 = data['input2'].to(device)
        labels = data['label'].to(device)

        outputs = model(input1, input2)
        loss = criterion(outputs, labels)

        test_loss += loss.item()
        num_batches += 1

        # Store predictions and labels
        all_labels.append(labels.cpu().numpy())
        all_predictions.append(outputs.cpu().numpy())

average_test_loss = test_loss / num_batches
print(f'Average Test Loss: {average_test_loss:.4f}')

all_labels = np.concatenate(all_labels)
all_predictions = np.concatenate(all_predictions)

r_squared = r2_score(all_labels, all_predictions)
print(f"R-squared: {r_squared:.4f}")

pearson_corr, _ = pearsonr(all_labels, all_predictions)
print(f"Pearson's Correlation: {pearson_corr:.4f}")

spearman_corr, _ = spearmanr(all_labels, all_predictions)
print(f"Spearman's Correlation: {spearman_corr:.4f}")

Average Test Loss: 0.0033
R-squared: 0.5423


ValueError: shapes (724288,1) and (724288,1) not aligned: 1 (dim 1) != 724288 (dim 0)