In [52]:
import os
import pandas as pd
from torch.utils.data import Dataset, DataLoader
from utils_cells import get_images_list, transform_image, transform_target, resize_with_padding
import matplotlib.pyplot as plt
from sklearn.utils import shuffle
import numpy as np
import torchvision.transforms.functional as F
import torch
from torchvision import transforms
from torchvision.transforms import functional as F
import cv2
from sklearn.model_selection import train_test_split


class ImageDataset(Dataset):
    def __init__(self, data_path, transform=None, target_transform=None, reduce=False):
        self.transform = transform
        self.target_transform = target_transform
        self.dataset = shuffle(self.load_dataset(data_path))

    def load_dataset(self, path):
        files = os.listdir(path)
        dataset_final = pd.DataFrame()
        dataset_final['filename'] = []
        dataset_final['class'] = []
        for filename in files:
            dataset = pd.DataFrame()
            if filename.endswith('.txt'):
                files = get_images_list(f'{path}/{filename}')
                dataset['filename'] = files
                dataset['class'] = filename.split('_')[1][:-3]
                dataset_final = pd.concat([dataset_final, dataset], ignore_index=True)
        return dataset_final                
                          
    def __len__(self):
        return len(self.dataset)


    
    def __getitem__(self, idx):
        image = cv2.imread(f'{self.dataset["filename"].loc[idx]}')
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        image = cv2.resize(image, (32, 32), interpolation=cv2.INTER_CUBIC)
        #image = resize_with_padding(image, (32, 32))
        image = image.astype(np.float32)
        image = self.transform(image = image)['image'] if self.transform is not None else image

        target = self.dataset["class"].loc[idx]

        if target == 'normal.':
            target_ = [1, 0, 0, 0]
        elif target == 'inflamatory.':
            target_ = [0, 1, 0, 0]
        elif target == 'tumor.':
            target_ = [0, 0, 1, 0]
        elif target == 'other.':
            target_ = [0, 0, 0, 1]
        else:
            print(target)
        
        image = F.to_tensor(image)
        
       
     

        """To see transorms use:
            image, target = trainset[15]
            image = image.numpy()
            image=np.swapaxes(image,0,1)
            image=np.swapaxes(image,1,2)
            plt.imshow(image)"""

        return image.float(), torch.Tensor(np.array(target_, dtype=np.float32))





In [53]:
trainset = ImageDataset(data_path='train_data')

In [17]:
max_val = 0
for idx in range(len(trainset)):
    img = trainset[idx][0]
    if torch.max(img).item() > max_val:
        max_val = torch.max(img).item()
max_val

1.0

In [27]:
mean = 0
std = 0
for idx in range(len(trainset)):
    img = trainset[idx][0]
    mean += torch.mean(img, dim=(1, 2))
    std += torch.std(img, dim=(1, 2))

mean /= len(trainset)
std /= len(trainset)

print(mean)
print(std)

tensor([127.6431,  89.9149, 140.1093])
tensor([38.0793, 34.1893, 28.6506])


In [46]:
import torch
def get_mean_std(loader):
    # Compute the mean and standard deviation of all pixels in the dataset
    num_pixels = 0
    mean = 0.0
    std = 0.0
    for images, _ in loader:
        batch_size, num_channels, height, width = images.shape
        num_pixels += batch_size * height * width
        mean += images.mean(axis=(0, 2, 3)).sum()
        std += images.std(axis=(0, 2, 3)).sum()

    mean /= num_pixels
    std /= num_pixels

    return mean, std

data_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])

batch_size = 32
loader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True)


In [47]:
mean = 0.0
std = 0.0
n_images = 0
for images, _ in loader:
    # Flatten the image tensors to (batch_size, channels * height * width)
    images = images.view(images.size(0), images.size(1), -1)
    
    # Update the total number of images
    n_images += images.size(0)
    
    # Calculate the sum and sum of squares of pixel values
    mean += images.mean(2).sum(0)
    std += images.std(2).sum(0)

# Calculate the mean and standard deviation
mean /= len(trainset)
std /= len(trainset)

print(f'Mean: {mean}')
print(f'Std: {std}')

Mean: tensor([-2.4241e-04,  7.3988e-05, -4.0785e-04])
Std: tensor([1.0002, 0.9998, 0.9996])


In [55]:
from albumentations import (
    Compose,
    Resize,
    OneOf,
    RandomBrightness,
    RandomContrast,
    MotionBlur,
    MedianBlur,
    GaussianBlur,
    VerticalFlip,
    HorizontalFlip,
    ShiftScaleRotate,
    Normalize,
Lambda
)

std = [0.1493, 0.1341, 0.1124]
mean = [0.5006, 0.3526, 0.5495]


transform_test = Compose(
    [Normalize(mean=0, std=1)],
)
trainset = ImageDataset(data_path='train_data', transform=transform_test)
max_val = 0
min_val = 1000
for idx in range(len(trainset)):
    img = trainset[idx][0]
    if torch.max(img).item() > max_val:
        max_val = torch.max(img)
    if torch.min(img).item() < min_val:
        min_val = torch.min(img)

max_val

tensor(1.)

In [56]:
min_val

tensor(0.)