In [42]:
import torch
import numpy as np
np.set_printoptions(precision=3, suppress=True)
from torch.utils.data import DataLoader

In [43]:
t = torch.arange(6, dtype=torch.float32)
t

tensor([0., 1., 2., 3., 4., 5.])

In [44]:
data_loader = DataLoader(t)
type(data_loader)

torch.utils.data.dataloader.DataLoader

In [45]:
for item in data_loader:
    print(item)

tensor([0.])
tensor([1.])
tensor([2.])
tensor([3.])
tensor([4.])
tensor([5.])


In [46]:
data_loader = DataLoader(t, batch_size=3, drop_last=False)

for i, batch in enumerate(data_loader):
    print(f"batch {i}:", batch)  # because batch size is 3, our data is split into 2 size (3,) batches.
    print(f"Shape of batch {i}:", batch.shape)

batch 0: tensor([0., 1., 2.])
Shape of batch 0: torch.Size([3])
batch 1: tensor([3., 4., 5.])
Shape of batch 1: torch.Size([3])


In [47]:
torch.manual_seed(1)

t_x = torch.rand((4, 3), dtype=torch.float32)
t_y = torch.arange(4)

print("t_x =", t_x)
print("t_y =", t_y)

t_x = tensor([[0.7576, 0.2793, 0.4031],
        [0.7347, 0.0293, 0.7999],
        [0.3971, 0.7544, 0.5695],
        [0.4388, 0.6387, 0.5247]])
t_y = tensor([0, 1, 2, 3])


In [48]:
from torch.utils.data import Dataset

class JointDataset(Dataset):
    def __init__(self, x, y) -> None:
        self.x = x
        self.y = y
    
    def __len__(self):
        return len(self.x)
    
    def __getitem__(self, idx):
        return (self.x[idx], self.y[idx])

In [49]:
from torch.utils.data import TensorDataset
joint_dataset = JointDataset(t_x, t_y)
joint_dataset.__getitem__(0)

(tensor([0.7576, 0.2793, 0.4031]), tensor(0))

In [50]:
torch.manual_seed(1)
joint_dataset = TensorDataset(t_x, t_y)
print([joint_dataset.__getitem__(i) for i in range(3)])

[(tensor([0.7576, 0.2793, 0.4031]), tensor(0)), (tensor([0.7347, 0.0293, 0.7999]), tensor(1)), (tensor([0.3971, 0.7544, 0.5695]), tensor(2))]


In [51]:
torch.manual_seed(1)
data_loader = DataLoader(dataset=joint_dataset, batch_size=2, shuffle=True)

for i, batch in enumerate(data_loader, 1):
    print(f'batch {i}:', 'x:', batch[0],
 '\n y:', batch[1])

batch 1: x: tensor([[0.3971, 0.7544, 0.5695],
        [0.7576, 0.2793, 0.4031]]) 
 y: tensor([2, 0])
batch 2: x: tensor([[0.7347, 0.0293, 0.7999],
        [0.4388, 0.6387, 0.5247]]) 
 y: tensor([1, 3])


In [57]:
import pathlib
imgdir_path = pathlib.Path('cat_dog_images')
print(imgdir_path)
file_list = sorted([str(path) for path in imgdir_path.glob("*.jpg")])
print(file_list)

cat_dog_images
[]
