In [3]:
from javIA_oop import *
import cv2
from albumentations import Compose, RandomCrop, Normalize, Flip, Resize

AI framework by Javi based in PyTorch: 0.4.1


# Load data
ver https://github.com/albu/albumentations/blob/master/notebooks/migrating_from_torchvision_to_albumentations.ipynb

In [4]:
torch_transf = transforms.Compose([
	transforms.Resize((256, 256)), 
	transforms.RandomCrop(224),
	transforms.RandomHorizontalFlip(),
	transforms.RandomVerticalFlip(),
	#transforms.RandomRotation(degrees=90),
	transforms.ToTensor(),
	transforms.Normalize(
		mean=[0.485, 0.456, 0.406],
		std=[0.229, 0.224, 0.225],
	)
])


albu_transf = Compose([
	Resize(256, 256), 
	RandomCrop(224, 224),
	#HorizontalFlip(),
	#VerticalFlip(p=0.5),
	Flip(p=0.5),
	Normalize(
		mean=[0.485, 0.456, 0.406],
		std=[0.229, 0.224, 0.225],
	)
])

# PIL + torchvision

In [5]:
# Data hyperparameters
data_dir_ssd = pathlib.Path("C:/Users/Javi/Desktop/Datasets/histologyDS2828")
data_dir_hdd = pathlib.Path("D:/Datasets/TFM/histologyDS2828")
imgs_dir     = data_dir_ssd / "imgs"
csv_file     = data_dir_ssd / "imageClasses.txt"


class histologyDataset(torch.utils.data.Dataset):

	def __init__(self, imgs_dir, csv_file, transform=None):
		self.df       = pd.read_csv(csv_file, header=None, delim_whitespace=True, names=['Image', 'Label'])
		self.imgs_dir = imgs_dir
		self.transform = transform

	def __len__(self):
		return len(self.df)

	def __getitem__(self, idx):
		img_name = imgs_dir / (self.df.iloc[idx, 0])
		image    = PIL.Image.open(img_name)
		if self.transform:
			image = self.transform(image)
		label    = self.df.iloc[idx, 1] - 1
		return image, label

dataset1 = histologyDataset(imgs_dir, csv_file, torch_transf)

# PIL + albumentations

In [6]:
# Data hyperparameters
data_dir_ssd = pathlib.Path("C:/Users/Javi/Desktop/Datasets/histologyDS2828")
data_dir_hdd = pathlib.Path("D:/Datasets/TFM/histologyDS2828")
imgs_dir     = data_dir_ssd / "imgs"
csv_file     = data_dir_ssd / "imageClasses.txt"


class histologyDataset2(torch.utils.data.Dataset):

	def __init__(self, imgs_dir, csv_file, transform=None):
		self.df       = pd.read_csv(csv_file, header=None, delim_whitespace=True, names=['Image', 'Label'])
		self.imgs_dir = imgs_dir
		self.transform = transform

	def __len__(self):
		return len(self.df)

	def __getitem__(self, idx):
		img_name = imgs_dir / (self.df.iloc[idx, 0])
		image    = PIL.Image.open(img_name)
		if self.transform:
			image_np = np.array(image)    # Convert PIL image to numpy array
			augmented = self.transform(image=image_np) # Apply transformations
			image = augmented['image']
		label    = self.df.iloc[idx, 1] - 1
		return image, label

dataset2 = histologyDataset2(imgs_dir, csv_file, albu_transf)

# OpenCV + albumentations

In [7]:
# Data hyperparameters
data_dir_ssd = pathlib.Path("C:/Users/Javi/Desktop/Datasets/histologyDS2828")
data_dir_hdd = pathlib.Path("D:/Datasets/TFM/histologyDS2828")
imgs_dir     = data_dir_ssd / "imgs"
csv_file     = data_dir_ssd / "imageClasses.txt"


class histologyDataset3(torch.utils.data.Dataset):

	def __init__(self, imgs_dir, csv_file, transform=None):
		self.df       = pd.read_csv(csv_file, header=None, delim_whitespace=True, names=['Image', 'Label'])
		self.imgs_dir = imgs_dir
		self.transform = transform

	def __len__(self):
		return len(self.df)

	def __getitem__(self, idx):
		img_name = imgs_dir / (self.df.iloc[idx, 0])
		image = cv2.imread(str(img_name))
		image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
		if self.transform:
			augmented = self.transform(image=image)
			image = augmented['image']
		label    = self.df.iloc[idx, 1] - 1
		return image, label

dataset3 = histologyDataset3(imgs_dir, csv_file, albu_transf)

In [8]:
batch_size = 64 #128
loader1 = torch.utils.data.DataLoader(dataset1, batch_size=batch_size)
loader2 = torch.utils.data.DataLoader(dataset2, batch_size=batch_size)
loader3 = torch.utils.data.DataLoader(dataset3, batch_size=batch_size)

In [9]:
t = Timer()

for batch in tqdm(loader1):
    pass
time1 = t()
for batch in tqdm(loader2):
    pass
time2 = t()
for batch in tqdm(loader3):
    pass
time3 = t()

print("PIL + torchvision:    ", time1)
print("PIL + albumentations: ", time2)
print("CV2 + albumentations: ", time3)

100%|█████████████████████████████████████████████████████████████████████████████████| 45/45 [00:18<00:00,  2.43it/s]
100%|█████████████████████████████████████████████████████████████████████████████████| 45/45 [00:25<00:00,  2.26it/s]
100%|█████████████████████████████████████████████████████████████████████████████████| 45/45 [00:21<00:00,  2.08it/s]


PIL + torchvision:     18.482799291610718
PIL + albumentations:  25.819448709487915
CV2 + albumentations:  21.685729265213013
