In [4]:
import torch
from torch.utils.data import DataLoader

In [6]:
t = torch.rand(8,4)
data_loader = DataLoader(t, batch_size = 2, drop_last = False)
for (i, batch) in enumerate(data_loader, 1):
    print(f'Batch {i}', batch)


Batch 1 tensor([[0.2333, 0.7105, 0.8091, 0.1045],
        [0.6069, 0.6230, 0.6879, 0.5116]])
Batch 2 tensor([[0.3139, 0.4432, 0.4330, 0.2257],
        [0.0086, 0.6774, 0.8912, 0.6360]])
Batch 3 tensor([[0.8942, 0.5758, 0.4397, 0.0579],
        [0.3327, 0.6286, 0.8873, 0.1933]])
Batch 4 tensor([[0.9312, 0.0545, 0.2930, 0.2586],
        [0.6638, 0.3677, 0.4177, 0.5897]])


In [21]:
#apparently, DataLoader iterates over the rows of the input tensor, which is the same behavior as when we iterate over the tensor itself
t_r = torch.reshape(t, [4, 8])
data_loader = DataLoader(t_r, batch_size = 2, drop_last = False)
for (i, batch) in enumerate(data_loader, 1):
    print(f'Batch {i}', batch)

print()

for (i, row) in enumerate(t_r, 1):
    print(f'Row: {i}', row)


Batch 1 tensor([[1.0000e+03, 7.1048e-01, 8.0911e-01, 9.9990e+03, 6.0691e-01, 6.2299e-01,
         6.8793e-01, 5.1159e-01],
        [3.1390e-01, 4.4320e-01, 4.3296e-01, 2.2573e-01, 1.0000e+03, 6.7742e-01,
         8.9117e-01, 6.3600e-01]])
Batch 2 tensor([[0.8942, 0.5758, 0.4397, 0.0579, 0.3327, 0.6286, 0.8873, 0.1933],
        [0.9312, 0.0545, 0.2930, 0.2586, 0.6638, 0.3677, 0.4177, 0.5897]])

Row: 1 tensor([1.0000e+03, 7.1048e-01, 8.0911e-01, 9.9990e+03, 6.0691e-01, 6.2299e-01,
        6.8793e-01, 5.1159e-01])
Row: 2 tensor([3.1390e-01, 4.4320e-01, 4.3296e-01, 2.2573e-01, 1.0000e+03, 6.7742e-01,
        8.9117e-01, 6.3600e-01])
Row: 3 tensor([0.8942, 0.5758, 0.4397, 0.0579, 0.3327, 0.6286, 0.8873, 0.1933])
Row: 4 tensor([0.9312, 0.0545, 0.2930, 0.2586, 0.6638, 0.3677, 0.4177, 0.5897])


TypeError: 'DataLoader' object is not subscriptable

In [52]:
from torch.utils.data import TensorDataset

torch.manual_seed(1)
t_x = torch.rand(8, 4)
t_y = torch.arange(8)
joint_dataset = TensorDataset(t_x, t_y)
print(joint_dataset[1]) #returns a tuple of a single row of x and a single element of y


(tensor([0.0293, 0.7999, 0.3971, 0.7544]), tensor(1))


In [53]:
joint_data_loader = DataLoader(joint_dataset, batch_size = 2, shuffle = True, drop_last = False)
for (i, batch) in enumerate(joint_data_loader, 1):
    print()
    print(f'Batch {i} x:', batch[0])
    print(f'Batch {i} y:', batch[1])


Batch 1 x: tensor([[0.3138, 0.1980, 0.4162, 0.2843],
        [0.5725, 0.4980, 0.9371, 0.6556]])
Batch 1 y: tensor([5, 4])

Batch 2 x: tensor([[0.7576, 0.2793, 0.4031, 0.7347],
        [0.0112, 0.8100, 0.6397, 0.9743]])
Batch 2 y: tensor([0, 7])

Batch 3 x: tensor([[0.5695, 0.4388, 0.6387, 0.5247],
        [0.0293, 0.7999, 0.3971, 0.7544]])
Batch 3 y: tensor([2, 1])

Batch 4 x: tensor([[0.3398, 0.5239, 0.7981, 0.7718],
        [0.6826, 0.3051, 0.4635, 0.4550]])
Batch 4 y: tensor([6, 3])


In [54]:
for epoch in range(2):
    print(f'epoch {epoch+1}')
    for i, batch in enumerate(joint_data_loader, 1):
        print(f'Batch {i} x:', batch[0])
        print(f'Batch {i} y:', batch[1])


epoch 1
Batch 1 x: tensor([[0.3138, 0.1980, 0.4162, 0.2843],
        [0.5695, 0.4388, 0.6387, 0.5247]])
Batch 1 y: tensor([5, 2])
Batch 2 x: tensor([[0.0112, 0.8100, 0.6397, 0.9743],
        [0.5725, 0.4980, 0.9371, 0.6556]])
Batch 2 y: tensor([7, 4])
Batch 3 x: tensor([[0.6826, 0.3051, 0.4635, 0.4550],
        [0.0293, 0.7999, 0.3971, 0.7544]])
Batch 3 y: tensor([3, 1])
Batch 4 x: tensor([[0.3398, 0.5239, 0.7981, 0.7718],
        [0.7576, 0.2793, 0.4031, 0.7347]])
Batch 4 y: tensor([6, 0])
epoch 2
Batch 1 x: tensor([[0.6826, 0.3051, 0.4635, 0.4550],
        [0.3398, 0.5239, 0.7981, 0.7718]])
Batch 1 y: tensor([3, 6])
Batch 2 x: tensor([[0.7576, 0.2793, 0.4031, 0.7347],
        [0.3138, 0.1980, 0.4162, 0.2843]])
Batch 2 y: tensor([0, 5])
Batch 3 x: tensor([[0.5695, 0.4388, 0.6387, 0.5247],
        [0.0112, 0.8100, 0.6397, 0.9743]])
Batch 3 y: tensor([2, 7])
Batch 4 x: tensor([[0.0293, 0.7999, 0.3971, 0.7544],
        [0.5725, 0.4980, 0.9371, 0.6556]])
Batch 4 y: tensor([1, 4])


In [63]:
import pathlib

imgdir_path = pathlib.Path('cat_dog_images')

file_list = sorted([str(path) for path in imgdir_path.glob('*.jpg')])
print(file_list)


['cat_dog_images\\cat-01.jpg', 'cat_dog_images\\cat-02.jpg', 'cat_dog_images\\cat-03.jpg', 'cat_dog_images\\dog-01.jpg', 'cat_dog_images\\dog-02.jpg', 'cat_dog_images\\dog-03.jpg']
