In [None]:
import tarfile as tf
import os
import torch
from PIL import Image
from random import shuffle
import numpy as np
from torch.utils.data import Dataset
import torchvision.transforms as T
from matplotlib.image import imread

In [None]:
def extData(path, dest):
    if not os.path.isdir(dest):
        print("Extracting Data...")
        f = tf.open(path)
        f.extractall(dest)
        f.close()
    else:
        print("Data already present.")

In [None]:
extData('/content/drive/MyDrive/Patches.tar.gz', '/content/drive/MyDrive/data')

Data already present.


In [None]:
class CustomDataSet(Dataset):
    def __init__(self, main_dir, transform):
        self.main_dir = main_dir
        self.transform = transform
        self.train_num = 89
        self.test_num = 10
        self.num0 = len(os.listdir(main_dir + '/0'))
        self.num1 = len(os.listdir(main_dir + '/1'))
        self.bags, self.labels = self._create_bags()

    def _create_bags(self):
      bag_list = []
      label_list = []
      for folder_label in os.listdir(main_dir):
        path = os.path.join(main_dir, folder_label)
        for img in os.listdir(path):
          print("current image:", img)
          bag = []
          path1 = os.path.join(path, img)
          for patch in os.listdir(path1):
            path2 = os.path.join(path1, patch)
            nd_image = imread(path2)
            image_tensor = self.transform(nd_image)
            bag.append(image_tensor)
          bag_list.append(bag)
          label_list.append(torch.Tensor([int(folder_label)]))
      temp = list(zip(bag_list, label_list))
      shuffle(temp)
      Sbag_list, Slabel_list = zip(*temp)
      return Sbag_list, Slabel_list

    def __len__(self):
      return len(self.labels)

    def __getitem__(self, idx):
      b, l = self.bags[idx], self.labels[idx]
      return b, l

In [None]:
tran =T.ToTensor()
a = CustomDataSet('/content/drive/MyDrive/data/Patches', tran)

In [None]:
a.__getitem__(0)

In [None]:
"""Pytorch dataset object that loads MNIST dataset as bags."""

import numpy as np
import torch
import torch.utils.data as data_utils
from torchvision import datasets, transforms


class MnistBags(data_utils.Dataset):
    def __init__(self, target_number=9, mean_bag_length=10, var_bag_length=2, num_bag=250, seed=1, train=True):
        self.target_number = target_number
        self.mean_bag_length = mean_bag_length
        self.var_bag_length = var_bag_length
        self.num_bag = num_bag
        self.train = train

        self.r = np.random.RandomState(seed)

        self.num_in_train = 60000
        self.num_in_test = 10000

        if self.train:
            self.train_bags_list, self.train_labels_list = self._create_bags()
        else:
            self.test_bags_list, self.test_labels_list = self._create_bags()

    def _create_bags(self):
        if self.train:
            loader = data_utils.DataLoader(datasets.MNIST('../datasets',
                                                          train=True,
                                                          download=True,
                                                          transform=transforms.Compose([
                                                              transforms.ToTensor(),
                                                              transforms.Normalize((0.1307,), (0.3081,))])),
                                           batch_size=self.num_in_train,
                                           shuffle=False)
        else:
            loader = data_utils.DataLoader(datasets.MNIST('../datasets',
                                                          train=False,
                                                          download=True,
                                                          transform=transforms.Compose([
                                                              transforms.ToTensor(),
                                                              transforms.Normalize((0.1307,), (0.3081,))])),
                                           batch_size=self.num_in_test,
                                           shuffle=False)

        for (batch_data, batch_labels) in loader:
            all_imgs = batch_data
            all_labels = batch_labels

        bags_list = []
        labels_list = []

        for i in range(self.num_bag):
            bag_length = np.int(self.r.normal(self.mean_bag_length, self.var_bag_length, 1))
            if bag_length < 1:
                bag_length = 1

            if self.train:
                indices = torch.LongTensor(self.r.randint(0, self.num_in_train, bag_length))
            else:
                indices = torch.LongTensor(self.r.randint(0, self.num_in_test, bag_length))

            labels_in_bag = all_labels[indices]
            labels_in_bag = labels_in_bag == self.target_number
            #print(len(all_imgs[indices]))
            bags_list.append(all_imgs[indices])
            labels_list.append(labels_in_bag)

        return bags_list, labels_list

    def __len__(self):
        if self.train:
            return len(self.train_labels_list)
        else:
            return len(self.test_labels_list)

    def __getitem__(self, index):
        if self.train:
            bag = self.train_bags_list[index]
            label = [max(self.train_labels_list[index]), self.train_labels_list[index]]
        else:
            bag = self.test_bags_list[index]
            label = [max(self.test_labels_list[index]), self.test_labels_list[index]]

        return bag, label


if __name__ == "__main__":

    train_loader = data_utils.DataLoader(MnistBags(target_number=9,
                                                   mean_bag_length=10,
                                                   var_bag_length=2,
                                                   num_bag=100,
                                                   seed=1,
                                                   train=True),
                                         batch_size=1,
                                         shuffle=True)

    test_loader = data_utils.DataLoader(MnistBags(target_number=9,
                                                  mean_bag_length=10,
                                                  var_bag_length=2,
                                                  num_bag=100,
                                                  seed=1,
                                                  train=False),
                                        batch_size=1,
                                        shuffle=False)

    len_bag_list_train = []
    mnist_bags_train = 0
    print("train loaded data", train_loader)
    for batch_idx, (bag, label) in enumerate(train_loader):
        print('bag ', batch_idx, ': ', bag.shape, sep='')
        len_bag_list_train.append(int(bag.squeeze(0).size()[0]))
        mnist_bags_train += label[0].numpy()[0]
    print('Number positive train bags: {}/{}\n'
          'Number of instances per bag, mean: {}, max: {}, min {}\n'.format(
        mnist_bags_train, len(train_loader),
        np.mean(len_bag_list_train), np.max(len_bag_list_train), np.min(len_bag_list_train)))

    len_bag_list_test = []
    mnist_bags_test = 0
    for batch_idx, (bag, label) in enumerate(test_loader):
        len_bag_list_test.append(int(bag.squeeze(0).size()[0]))
        mnist_bags_test += label[0].numpy()[0]
    print('Number positive test bags: {}/{}\n'
          'Number of instances per bag, mean: {}, max: {}, min {}\n'.format(
        mnist_bags_test, len(test_loader),
        np.mean(len_bag_list_test), np.max(len_bag_list_test), np.min(len_bag_list_test)))

train loaded data <torch.utils.data.dataloader.DataLoader object at 0x7fc05203ee90>
bag 0: torch.Size([1, 9, 1, 28, 28])
bag 1: torch.Size([1, 10, 1, 28, 28])
bag 2: torch.Size([1, 7, 1, 28, 28])
bag 3: torch.Size([1, 9, 1, 28, 28])
bag 4: torch.Size([1, 9, 1, 28, 28])
bag 5: torch.Size([1, 9, 1, 28, 28])
bag 6: torch.Size([1, 8, 1, 28, 28])
bag 7: torch.Size([1, 9, 1, 28, 28])
bag 8: torch.Size([1, 12, 1, 28, 28])
bag 9: torch.Size([1, 10, 1, 28, 28])
bag 10: torch.Size([1, 4, 1, 28, 28])
bag 11: torch.Size([1, 12, 1, 28, 28])
bag 12: torch.Size([1, 9, 1, 28, 28])
bag 13: torch.Size([1, 10, 1, 28, 28])
bag 14: torch.Size([1, 8, 1, 28, 28])
bag 15: torch.Size([1, 11, 1, 28, 28])
bag 16: torch.Size([1, 11, 1, 28, 28])
bag 17: torch.Size([1, 6, 1, 28, 28])
bag 18: torch.Size([1, 10, 1, 28, 28])
bag 19: torch.Size([1, 9, 1, 28, 28])
bag 20: torch.Size([1, 5, 1, 28, 28])
bag 21: torch.Size([1, 12, 1, 28, 28])
bag 22: torch.Size([1, 9, 1, 28, 28])
bag 23: torch.Size([1, 10, 1, 28, 28])
bag 

In [None]:
data = MnistBags(target_number=9, mean_bag_length=10, var_bag_length=2, num_bag=100, seed=1, train=False)
bg, lb = data.__getitem__(1)
print(bg.shape)

torch.Size([8, 1, 28, 28])


In [None]:
main_dir = '/content/drive/MyDrive/data/Patches'
bag_list = []
label_list = []
for folder_label in os.listdir(main_dir):
        path = os.path.join(main_dir, folder_label)
        for img in os.listdir(path):
          bag = []
          path1 = os.path.join(path, img)
          for patch in os.listdir(path1):
            path2 = os.path.join(path1, patch)
            patch_img = Image.open(path2).convert("RGB")
            bag.append(patch_img)
          bag_list.append(bag)
          label_list.append(folder_label)

          

In [None]:
test = torch.Tensor([int('0')])
test

tensor([0.])