In [5]:
import pathlib
import pandas as pd
import PIL

import torch
import torch.nn as nn
import torch.nn.functional as F
from   torchvision import transforms

# Histology dataset

In [6]:
class HistologyDataset(torch.utils.data.Dataset):

	def __init__(self, transforms=False):
		data_dir_hdd = pathlib.Path("E:/Datasets/TFM/histologyDS2828")
		csv_file     = data_dir_hdd / "imageClasses.txt"
		csv_df       = pd.read_csv(csv_file, header=None, delim_whitespace=True, names=['Image', 'Label'])
        
		self.image_dir  = data_dir_hdd / "imgs"
		self.images     = (csv_df["Image"]).values
		self.labels     = (csv_df["Label"]-1).values
		self.labels_map = {0: "conective tissue", 1: "ephitelial tissue", 2: "muscular tissue", 3: "nervous tissue"}
		self.transforms = transforms

	def __len__(self):
		return len(self.labels)

	def __getitem__(self, idx):
		img_name = self.image_dir / self.images[idx]
		image = PIL.Image.open(img_name)
		if self.transforms: image = self.transforms(image)
		label = self.labels[idx]
		return image, label

mean       = [0.485, 0.456, 0.406] #[0.5, 0.5, 0.5]
std        = [0.229, 0.224, 0.225] #[0.5, 0.5, 0.5]
train_tmfs =  transforms.Compose([transforms.RandomCrop(420),
                                  transforms.Resize(140),
                                  transforms.RandomHorizontalFlip(),
                                  transforms.RandomVerticalFlip(),
                                  transforms.ToTensor(),
                                  transforms.Normalize(mean, std)])

micro_ds = HistologyDataset()
print("There are", len(micro_ds), "images in the dataset.")

#micro_ds.transforms = train_tmfs
#trainloader = torch.utils.data.DataLoader(micro_ds, batch_size=64, shuffle=True)

There are 2828 images in the dataset.


# Skin dataset

In [7]:
class SkinDataset(torch.utils.data.Dataset):

	def __init__(self, subset, transforms=False):
		dataset_dir = pathlib.Path("E:/Datasets/TFM/ISIC-2017")
		csv_file    = dataset_dir / ("ground_truth_"+subset+".csv")
		csv_df      = pd.read_csv(csv_file)

		self.image_dir  = dataset_dir / ("data_"+subset)
		self.images     = (csv_df["image_id"]+".jpg").values
		self.labels1    = (csv_df["melanoma"]).values
		self.labels2    = (csv_df["seborrheic_keratosis"]).values
		self.labels_map = {0:"melanoma", 1:"seborrheic", 2:"healthy"}
		self.transforms = transforms
        
	def __len__(self):
		return len(self.labels1)

	def __getitem__(self, idx):
		img_name = self.image_dir / self.images[idx]
		image = PIL.Image.open(img_name)
		if self.transforms: image = self.transforms(image)
		label = self.labels1[idx]
		return image, label

skin_ds    = {subset: SkinDataset(subset) for subset in ["train", "valid", "test"]}
{print("There are", len(skin_ds[subset]), "images in the "+subset+" dataset.") for subset in ["train", "valid", "test"]};

There are 2000 images in the train dataset.
There are 150 images in the valid dataset.
There are 600 images in the test dataset.
