In [1]:
import torch
import torch.utils.data as Data

torch.manual_seed(6)

<torch._C.Generator at 0x7f3a3806b660>

## Create Dataset

In [3]:
x = torch.linspace(1,10,10)
y = torch.linspace(10,1,10)
torch.cat((x.view(len(x),-1),y.view(len(y),-1)),1)


    1    10
    2     9
    3     8
    4     7
    5     6
    6     5
    7     4
    8     3
    9     2
   10     1
[torch.FloatTensor of size 10x2]

In [19]:
dataset = Data.TensorDataset(data_tensor=x, target_tensor=y)

## DataLoader

In [34]:
BATCH_SIZE =  5

loader = Data.DataLoader(
    dataset=dataset,
    batch_size=BATCH_SIZE,
    shuffle=True,
    num_workers=1,
)

In [35]:
for epoch in range(3):
    for step, (batch_x, batch_y) in enumerate(loader):
        print('Epoch : ',epoch,'| Step', step, '| batch x :', batch_x.numpy(), "| bactch y : ",batch_y.numpy())

Epoch :  0 | Step 0 | batch x : [ 3.  6.  8.  9.  5.] | bactch y :  [ 8.  5.  3.  2.  6.]
Epoch :  0 | Step 1 | batch x : [  4.   7.   2.  10.   1.] | bactch y :  [  7.   4.   9.   1.  10.]
Epoch :  1 | Step 0 | batch x : [ 9.  4.  7.  6.  2.] | bactch y :  [ 2.  7.  4.  5.  9.]
Epoch :  1 | Step 1 | batch x : [  5.   3.  10.   8.   1.] | bactch y :  [  6.   8.   1.   3.  10.]
Epoch :  2 | Step 0 | batch x : [ 3.  6.  2.  7.  1.] | bactch y :  [  8.   5.   9.   4.  10.]
Epoch :  2 | Step 1 | batch x : [  8.   9.   5.  10.   4.] | bactch y :  [ 3.  2.  6.  1.  7.]


In [22]:
BATCH_SIZE = 8

loader = Data.DataLoader(
    dataset=dataset,      # torch TensorDataset format
    batch_size=BATCH_SIZE,      # mini batch size
    shuffle=True,               # random shuffle for training
    num_workers=1,              # subprocesses for loading data
)

for epoch in range(3):   # train entire dataset 3 times
    for step, (batch_x, batch_y) in enumerate(loader):  # for each training step
        
        # train your data...
        
        print('Epoch: ', epoch, '| Step: ', step, '| batch x: ',
              batch_x.numpy(), '| batch y: ', batch_y.numpy())

Epoch:  0 | Step:  0 | batch x:  [ 10.   7.   8.   2.   9.   5.   3.   1.] | batch y:  [  1.   4.   3.   9.   2.   6.   8.  10.]
Epoch:  0 | Step:  1 | batch x:  [ 6.  4.] | batch y:  [ 5.  7.]
Epoch:  1 | Step:  0 | batch x:  [  8.  10.   1.   3.   9.   5.   6.   4.] | batch y:  [  3.   1.  10.   8.   2.   6.   5.   7.]
Epoch:  1 | Step:  1 | batch x:  [ 7.  2.] | batch y:  [ 4.  9.]
Epoch:  2 | Step:  0 | batch x:  [  2.   3.  10.   4.   6.   9.   7.   1.] | batch y:  [  9.   8.   1.   7.   5.   2.   4.  10.]
Epoch:  2 | Step:  1 | batch x:  [ 8.  5.] | batch y:  [ 3.  6.]


## Create dataset from Image

In [27]:
import torchvision
import torchvision.datasets as dsets
import torchvision.transforms as transforms

img_dir = "./images"
img_data = dsets.ImageFolder(img_dir, transforms.Compose([
            transforms.RandomSizedCrop(224),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            ]))

print(img_data.classes)
print(img_data.class_to_idx)
print(img_data.imgs)

['bird', 'mountain']
{'bird': 0, 'mountain': 1}
[('./images/bird/5f708a660eef3c4762c5a4a232f76db0.jpg', 0), ('./images/bird/c012c9fc6a99dcc00cfd89867f22a1c6.jpg', 0), ('./images/mountain/926c079396486b16944a44e6fa395bd6.jpg', 1), ('./images/mountain/b3e291f866f1c8754a5eb7fa51e72a0a.jpg', 1), ('./images/mountain/bd5fdf64757acb56a5c336f1e7bdbbb3.jpg', 1), ('./images/mountain/f7d40a84a44897f83ab8722254bd0551.jpg', 1)]


## DataLoader

In [36]:
loader = Data.DataLoader(img_data, batch_size=3,
                            shuffle=True, num_workers=1)

for img,label in loader:
    print(img.size())
    print(label)

torch.Size([3, 3, 224, 224])

 1
 1
 1
[torch.LongTensor of size 3]

torch.Size([3, 3, 224, 224])

 1
 0
 0
[torch.LongTensor of size 3]



## Defined Dataset

In [38]:
# MNIST Dataset 
train_dataset = dsets.MNIST(root='./data', 
                            train=True, 
                            transform=transforms.ToTensor(),  
                            download=True)

image, label = train_dataset[0]
print (image.size())
print (label)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Processing...
Done!
torch.Size([1, 28, 28])
5


In [44]:
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                          batch_size=100,
                                          shuffle=True,
                                          num_workers=2)

data_iter = iter(train_loader)

images, labels = data_iter.next()

for images, labels in train_loader:
    pass

images.size()

torch.Size([100, 1, 28, 28])

In [42]:
# You should build custom dataset as below.
class CustomDataset(Data.Dataset):
    def __init__(self):
        # TODO
        # 1. Initialize file path or list of file names. 
        pass
    def __getitem__(self, index):
        # TODO
        # 1. Read one data from file (e.g. using numpy.fromfile, PIL.Image.open).
        # 2. Preprocess the data (e.g. torchvision.Transform).
        # 3. Return a data pair (e.g. image and label).
        pass
    def __len__(self):
        # You should change 0 to the total size of your dataset.
        return 0 

# Then, you can just use prebuilt torch's data loader. 
custom_dataset = CustomDataset()
train_loader = Data.DataLoader(dataset=custom_dataset,
                                           batch_size=100, 
                                           shuffle=True,
                                           num_workers=2)

In [43]:
#========================== Using pretrained model ==========================#
# Download and load pretrained resnet.
resnet = torchvision.models.resnet18(pretrained=True)

# If you want to finetune only top layer of the model.
for param in resnet.parameters():
    param.requires_grad = False
    
# Replace top layer for finetuning.
resnet.fc = torch.nn.Linear(resnet.fc.in_features, 100)  # 100 is for example.

# For test.
images = torch.autograd.Variable(torch.randn(10, 3, 256, 256))
outputs = resnet(images)
print (outputs.size())   # (10, 100)

Downloading: "https://download.pytorch.org/models/resnet18-5c106cde.pth" to /home/kiwoong/.torch/models/resnet18-5c106cde.pth
100.0%


torch.Size([10, 100])
