In [1]:
from FileListDataset import FileListDataset
import glob
import os
from collections import Counter
import torchvision.transforms as transforms

# Getting the files from the folder

In [2]:
# files_white_light = glob.glob("/data/home/sol48479/Splits_rw/1_new/*/*")
data_files = glob.glob("examples/data/*/*")

In [3]:
data_files

['examples/data/class1/data_0.png',
 'examples/data/class1/data_1.png',
 'examples/data/class1/data_2.png',
 'examples/data/class1/data_3.png',
 'examples/data/class1/data_4.png',
 'examples/data/class1/data_5.png',
 'examples/data/class1/data_6.png',
 'examples/data/class1/data_7.png',
 'examples/data/class1/data_8.png',
 'examples/data/class1/data_9.png',
 'examples/data/class2/data_0.png',
 'examples/data/class2/data_1.png',
 'examples/data/class2/data_2.png',
 'examples/data/class2/data_3.png',
 'examples/data/class2/data_4.png',
 'examples/data/class2/data_5.png',
 'examples/data/class2/data_6.png',
 'examples/data/class2/data_7.png',
 'examples/data/class2/data_8.png',
 'examples/data/class2/data_9.png',
 'examples/data/class0/data_0.png',
 'examples/data/class0/data_1.png',
 'examples/data/class0/data_2.png',
 'examples/data/class0/data_3.png',
 'examples/data/class0/data_4.png',
 'examples/data/class0/data_5.png',
 'examples/data/class0/data_6.png',
 'examples/data/class0/data_

## Listing Avaliable Classes 

What defines the class of each of these files is the folder name before the file name, so, for example:

'examples/data/class0/data_1.txt' is from class 'class0',

'examples/data/class1/data_5.txt' is from class 'class1' and

'examples/data/class2/data_7.txt' is from class 'class2'.

In [4]:
file_name = data_files[0] # taking a random sample from the dataset

In [5]:
file_name

'examples/data/class1/data_0.png'

So, as we can see, this sample is from class 'AMD'. We can extract the label from the path by doing:

In [6]:
file_name.split(os.sep)[-2]

'class1'

In [7]:
data_labels = [f_name.split(os.sep)[-2] for f_name in data_files]

In [8]:
data_labels

['class1',
 'class1',
 'class1',
 'class1',
 'class1',
 'class1',
 'class1',
 'class1',
 'class1',
 'class1',
 'class2',
 'class2',
 'class2',
 'class2',
 'class2',
 'class2',
 'class2',
 'class2',
 'class2',
 'class2',
 'class0',
 'class0',
 'class0',
 'class0',
 'class0',
 'class0',
 'class0',
 'class0',
 'class0',
 'class0']

In [9]:
dataset = FileListDataset(data_files=data_files,
                          data_labels=data_labels,
                          transform=transforms.ToTensor())

In [10]:
print('Number of samples: ', len(dataset))

Number of samples:  30


In [11]:
img, label = next(iter(dataset))

In [12]:
print('img:', img)

img: tensor([[[1., 1., 1.,  ..., 1., 1., 1.],
         [1., 1., 1.,  ..., 1., 1., 1.],
         [1., 1., 1.,  ..., 1., 1., 1.],
         ...,
         [1., 1., 1.,  ..., 1., 1., 1.],
         [1., 1., 1.,  ..., 1., 1., 1.],
         [1., 1., 1.,  ..., 1., 1., 1.]],

        [[1., 1., 1.,  ..., 1., 1., 1.],
         [1., 1., 1.,  ..., 1., 1., 1.],
         [1., 1., 1.,  ..., 1., 1., 1.],
         ...,
         [1., 1., 1.,  ..., 1., 1., 1.],
         [1., 1., 1.,  ..., 1., 1., 1.],
         [1., 1., 1.,  ..., 1., 1., 1.]],

        [[1., 1., 1.,  ..., 1., 1., 1.],
         [1., 1., 1.,  ..., 1., 1., 1.],
         [1., 1., 1.,  ..., 1., 1., 1.],
         ...,
         [1., 1., 1.,  ..., 1., 1., 1.],
         [1., 1., 1.,  ..., 1., 1., 1.],
         [1., 1., 1.,  ..., 1., 1., 1.]]])


In [13]:
print('label:', label)

label: 1


And to make it easier, we wrap it in a function:

In [14]:
# custom function to extract the file label from the filename/filepath
def getter_image_label(file_name):
    return file_name.split(os.sep)[-2]

And now we can easily count the amount of labels of each class:

In [15]:
Counter([getter_image_label(f) for f in data_files])

Counter({'class1': 10, 'class2': 10, 'class0': 10})

In [16]:
getter_image_label(data_files[0])

'class1'

In [17]:
dataset = FileListDataset(data_files=data_files,
                          f_get_label=getter_image_label,
                          transform=transforms.ToTensor())

In [18]:
print('Number of samples: ', len(dataset))

Number of samples:  30


In [19]:
img, label = next(iter(dataset))

In [20]:
print('img:', img)

img: tensor([[[1., 1., 1.,  ..., 1., 1., 1.],
         [1., 1., 1.,  ..., 1., 1., 1.],
         [1., 1., 1.,  ..., 1., 1., 1.],
         ...,
         [1., 1., 1.,  ..., 1., 1., 1.],
         [1., 1., 1.,  ..., 1., 1., 1.],
         [1., 1., 1.,  ..., 1., 1., 1.]],

        [[1., 1., 1.,  ..., 1., 1., 1.],
         [1., 1., 1.,  ..., 1., 1., 1.],
         [1., 1., 1.,  ..., 1., 1., 1.],
         ...,
         [1., 1., 1.,  ..., 1., 1., 1.],
         [1., 1., 1.,  ..., 1., 1., 1.],
         [1., 1., 1.,  ..., 1., 1., 1.]],

        [[1., 1., 1.,  ..., 1., 1., 1.],
         [1., 1., 1.,  ..., 1., 1., 1.],
         [1., 1., 1.,  ..., 1., 1., 1.],
         ...,
         [1., 1., 1.,  ..., 1., 1., 1.],
         [1., 1., 1.,  ..., 1., 1., 1.],
         [1., 1., 1.,  ..., 1., 1., 1.]]])


In [21]:
print('label:', label)

label: 1
