In [1]:
import pandas as pd
import numpy as np

import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms

from tqdm import tqdm

In [2]:
train_labels = pd.read_csv('../data/train_labels.csv')
train_metadata = pd.read_csv('../data/train_metadata.csv')

train = train_metadata.merge(train_labels, on='filename', how='inner')\

print(train.shape)

(1342, 14)


In [3]:
class VisioMel_Dataset(Dataset):
    
    def __init__(self, data):
        
        data = data.reset_index(drop=True)
        
        self.filenames, self.y = data['filename'], data['relapse']
        
    
        self.eval_t = transforms.Compose([
                                        transforms.ToTensor(),
                                        transforms.Resize((4096, 4096))
                                        ])
        
        #print(f'{self.labels.shape}')
    
    def __len__(self):
        return self.y.shape[0]
    
    def __getitem__(self, index):
        page = np.load('D:/VictorCallejas/images/' + f'{self.filenames[index]}.npy')
        image = self.eval_t(page)
        return image

        

In [4]:
image_dataset = VisioMel_Dataset(train)
image_data_loader = DataLoader(
  image_dataset, 
  batch_size=1, 
  shuffle=False, 
  num_workers=0
)

In [5]:
def batch_mean_and_sd(image_data_loader):
    
    cnt = 0
    fst_moment = torch.empty(3)
    snd_moment = torch.empty(3)

    for step, images in tqdm(enumerate(image_data_loader), total=len(image_data_loader)):
        b, c, h, w = images.shape
        nb_pixels = b * h * w
        sum_ = torch.sum(images, dim=[0, 2, 3])
        sum_of_square = torch.sum(images ** 2,
                                  dim=[0, 2, 3])
        fst_moment = (cnt * fst_moment + sum_) / (cnt + nb_pixels)
        snd_moment = (cnt * snd_moment + sum_of_square) / (cnt + nb_pixels)
        cnt += nb_pixels

    mean, std = fst_moment, torch.sqrt(snd_moment - fst_moment ** 2)        
    return mean,std
  
mean, std = batch_mean_and_sd(image_data_loader)
print("mean and std: \n", mean, std)

100%|██████████| 1342/1342 [2:09:19<00:00,  5.78s/it]  

mean and std: 
 tensor([0.8933, 0.8569, 0.8683]) tensor([0.1107, 0.1622, 0.1459])



