In [1]:
import torch 
import torchvision
from torch.utils.data import DataLoader, Dataset
import glob
import os
import numpy as np
from skimage import io

In [3]:
class LFWDataset(Dataset):
    def __init__(self, path="data", transform=None):
        self.path = os.path.join(".",path,"*","*")
        self.data = glob.glob(self.path)
        self.classes = list({s.split(os.path.sep)[-2] for s in self.data})
        self.labels = [self.classes.index(l.split(os.path.sep)[-2]) for l in self.data]
        self.transform = transform
    
    def __getitem__(self, index):
        image = io.imread(self.data[index])
        label = np.array(self.labels[index])
        
        if self.transform:
            image = self.transform(image)
        
        return image, torch.from_numpy(label)
        
    def __len__(self):
        return len(self.data)

In [11]:
t = torchvision.transforms.ToTensor()
mydataset = LFWDataset(transform=t)
loader = DataLoader(dataset=mydataset, batch_size=10, num_workers=0, shuffle=False)

In [13]:
for data in loader:
    print(data)
    break

[tensor([[[[0.0000, 0.0000, 0.0000,  ..., 0.0235, 0.0078, 0.0000],
          [0.0000, 0.0000, 0.0000,  ..., 0.0314, 0.0235, 0.0000],
          [0.0000, 0.0000, 0.0000,  ..., 0.0314, 0.0353, 0.0039],
          ...,
          [0.0078, 0.0039, 0.0000,  ..., 0.3569, 0.3137, 0.3059],
          [0.0039, 0.0000, 0.0000,  ..., 0.3608, 0.3098, 0.3059],
          [0.0039, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000]],

         [[0.0000, 0.0000, 0.0000,  ..., 0.0235, 0.0078, 0.0000],
          [0.0000, 0.0000, 0.0000,  ..., 0.0314, 0.0235, 0.0000],
          [0.0000, 0.0000, 0.0000,  ..., 0.0314, 0.0353, 0.0039],
          ...,
          [0.0000, 0.0039, 0.0039,  ..., 0.3882, 0.3451, 0.3333],
          [0.0000, 0.0000, 0.0078,  ..., 0.3882, 0.3333, 0.3294],
          [0.0000, 0.0000, 0.0078,  ..., 0.0000, 0.0039, 0.0039]],

         [[0.0000, 0.0000, 0.0000,  ..., 0.0157, 0.0000, 0.0000],
          [0.0000, 0.0000, 0.0000,  ..., 0.0235, 0.0235, 0.0000],
          [0.0000, 0.0000, 0.0000,  ..., 

### `BrokenPipeError: [Errno 32] Broken pipe`
`DataLoader()` with `num_workers>=1` gives `BrokenPipeError: [Errno 32] Broken pipe` in Windows OS.

Us

In [16]:
mydataset2 = torchvision.datasets.ImageFolder("./data", torchvision.transforms.ToTensor())

In [38]:
mydataloader = DataLoader(dataset=mydataset2, batch_size=10, shuffle=True, num_workers=6)

In [39]:
it = iter(mydataloader)

In [41]:
for data in mydataloader:
    print(data)
    break

[tensor([[[[0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
          [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
          [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
          ...,
          [0.0039, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
          [0.0039, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
          [0.0039, 0.0000, 0.0000,  ..., 0.0039, 0.0039, 0.0039]],

         [[0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
          [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
          [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
          ...,
          [0.0000, 0.0000, 0.0039,  ..., 0.0078, 0.0039, 0.0039],
          [0.0000, 0.0000, 0.0000,  ..., 0.0039, 0.0039, 0.0039],
          [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000]],

         [[0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
          [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
          [0.0000, 0.0000, 0.0000,  ..., 