In [1]:
from data.dataset import get_ae_loader

import pickle
import numpy as np
import torch
import torch.nn as nn   
import torchvision.transforms as T
from torchsummary import summary

from tqdm import tqdm

from sklearn.metrics import ndcg_score

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
device = "cuda" if torch.cuda.is_available() else "cpu"
number_patient = 200

class Autoencoder(nn.Module):
    def __init__(self, latent_dim=256):
        super(Autoencoder, self).__init__()
        self.latent_dim = latent_dim

        self.encoder = nn.Sequential(
            nn.Conv2d(in_channels=5, out_channels=32, kernel_size=3, stride=2, padding=1),
            nn.LeakyReLU(0.1, inplace=True),
            nn.BatchNorm2d(32, momentum=0.9),
            nn.Conv2d(32, 64, 3, stride=2, padding=1),
            nn.LeakyReLU(0.1, inplace=True),
            nn.BatchNorm2d(64, momentum=0.9),
            nn.Conv2d(64, 128, 3, stride=2, padding=1),
            nn.LeakyReLU(0.1, inplace=True),
            nn.BatchNorm2d(128, momentum=0.9),
            nn.Conv2d(128, 256, 3, stride=2, padding=1),
            nn.LeakyReLU(0.1, inplace=True),
            nn.BatchNorm2d(256, momentum=0.9),
            nn.Conv2d(256, 256, 3, stride=2, padding=1),
            nn.LeakyReLU(0.1, inplace=True),
            nn.BatchNorm2d(256, momentum=0.9),
            nn.Flatten(),
            nn.Linear(256 * 3 * 4, self.latent_dim)
        )

        self.decoder = nn.Sequential(
            nn.Linear(self.latent_dim, 256 * 3 * 4),
            nn.Unflatten(1, (256, 3, 4)),
            nn.ConvTranspose2d(self.latent_dim, 256, 3, stride=2, padding=1, output_padding=1),
            nn.LeakyReLU(0.1, inplace=True),
            nn.BatchNorm2d(256, momentum=0.9),
            nn.ConvTranspose2d(256, 128, 3, stride=2, padding=1, output_padding=1),
            nn.LeakyReLU(0.1, inplace=True),
            nn.BatchNorm2d(128, momentum=0.9),
            nn.ConvTranspose2d(128, 64, 3, stride=2, padding=1, output_padding=1),
            nn.LeakyReLU(0.1, inplace=True),
            nn.BatchNorm2d(64, momentum=0.9),
            nn.ConvTranspose2d(64, 32, 3, stride=2, padding=1, output_padding=1),
            nn.LeakyReLU(0.1, inplace=True),
            nn.BatchNorm2d(32, momentum=0.9),
            nn.ConvTranspose2d(32, 5, 3, stride=2, padding=1, output_padding=1),
            nn.Tanh()
        )

    def encode(self, x):
        return self.encoder(x)

    def forward(self, x):
        return self.decoder(self.encoder(x))

    def train_reconstruction(self, loader, epochs=10, lr=0.001):
        self.to(device)
        self.train()
        optimizer = torch.optim.Adam(self.parameters(), lr=lr)
        criterion = nn.MSELoss()
        for epoch in range(epochs):
            for i, x in enumerate(loader):
                x = x.to(device)
                optimizer.zero_grad()
                x_reconstructed = self.forward(x)
                loss = criterion(x_reconstructed, x)
                loss.backward()
                optimizer.step()
                if i % 100 == 0:
                    print(f"Epoch {epoch}, batch {i}/{len(loader)}, loss {loss.item()}")

## Loading encoder

In [3]:
# Loading Model
ae = torch.load("./models/model.pth", map_location = torch.device('cpu'))
ae.eval()

# Loading Dataset
ae_dataset = get_ae_loader()
ae_dataset

100%|██████████| 200/200 [00:02<00:00, 74.71it/s]


<torch.utils.data.dataloader.DataLoader at 0x1c1f544ebf0>

## Getting similarity matrix

In [4]:
def get_features_matrix_for ():
    """
    Return features matrix for each patient 
    """

    result = None
    arrays = []

    for index, batch in enumerate (ae_dataset) :
        arrays.append(ae.encode(batch))

    result = torch.cat(tuple(arrays), dim=0)

    return result 

def get_similarity_matrix_for () :
    """
    Return a similarity matrix for a specific model
    """

    distance_matrix = np.zeros((number_patient, number_patient))
    result = get_features_matrix_for()

    for patient_1 in range (0, number_patient) :
        for patient_2 in range (0, number_patient) :
            distance_matrix[patient_1, patient_2] = torch.linalg.norm(result[patient_1] - result[patient_2], ord=2)
        
    return distance_matrix

def get_most_similar_patient (similarity_matrix) :
    """
    Sort patient index in order of most relevent to less relevent
    """

    temp = np.zeros((number_patient, number_patient))

    for candidate in range (number_patient) :
        temp[candidate] = np.argsort(similarity_matrix[candidate])

    return temp

def get_sort_similar (similarity_matrix) :
    """
    Sort patient values
    """

    temp = np.zeros((number_patient, number_patient))

    for candidate in range (number_patient) :
        temp[candidate] = np.sort(similarity_matrix[candidate])

    return temp

In [5]:
similarity = get_similarity_matrix_for ()
most_relevent_candidate = get_most_similar_patient (similarity)
most_relevent_candidate_value = get_sort_similar (similarity)

## NDCG - Model AE

In [6]:
ndcg_model_scores = np.zeros( (1, number_patient) )

for patient in tqdm (range (number_patient)) :
    ndcg_model_scores [0, patient] = ndcg_score([most_relevent_candidate[:,1:][patient]], [most_relevent_candidate_value[:,1:][patient]])


100%|██████████| 200/200 [00:00<00:00, 1526.62it/s]


In [7]:
ndcg_model_scores

array([[0.83961056, 0.88383553, 0.87054429, 0.87591383, 0.86895926,
        0.87228161, 0.87405983, 0.8715807 , 0.87778205, 0.86146356,
        0.87618349, 0.86175342, 0.87179015, 0.87087355, 0.87416316,
        0.87109873, 0.88181062, 0.87906211, 0.87703926, 0.87660961,
        0.87539403, 0.88628887, 0.8778913 , 0.87270752, 0.881306  ,
        0.8753331 , 0.87232484, 0.87466896, 0.86848021, 0.86587195,
        0.87024061, 0.86935318, 0.87367539, 0.86381865, 0.85543065,
        0.87309472, 0.85493976, 0.87354282, 0.87598589, 0.86634701,
        0.87361909, 0.85600795, 0.867853  , 0.8747394 , 0.86094504,
        0.87329141, 0.87908234, 0.87693997, 0.88187834, 0.86732479,
        0.86497676, 0.88419229, 0.86844839, 0.87458724, 0.87405688,
        0.8715949 , 0.87986657, 0.87968832, 0.86956496, 0.87150605,
        0.87430771, 0.8712772 , 0.87034001, 0.88549137, 0.87215601,
        0.86589508, 0.8721716 , 0.87357334, 0.88613337, 0.8626332 ,
        0.87064182, 0.85760631, 0.87522236, 0.86

In [8]:
with open ("./data/early_fusion_ndcg.pickle", "wb") as file:
    pickle.dump(ndcg_model_scores, file)