In [1]:
import torch
import torch.utils.data as Data

torch.manual_seed(6)

<torch._C.Generator at 0x7f59b1f9d510>

## Create Dataset

In [2]:
x = torch.linspace(1,10,10)
y = torch.linspace(10,1,10)
torch.cat((x.view(len(x),-1),y.view(len(y),-1)),1)

tensor([[ 1., 10.],
        [ 2.,  9.],
        [ 3.,  8.],
        [ 4.,  7.],
        [ 5.,  6.],
        [ 6.,  5.],
        [ 7.,  4.],
        [ 8.,  3.],
        [ 9.,  2.],
        [10.,  1.]])

In [3]:
dataset = Data.TensorDataset(x, y)

## DataLoader

In [4]:
BATCH_SIZE =  5

loader = Data.DataLoader(
    dataset=dataset,
    batch_size=BATCH_SIZE,
    shuffle=True,
    num_workers=1,
)

In [5]:
for epoch in range(3):
    for step, (batch_x, batch_y) in enumerate(loader):
        print('Epoch : ',epoch,'| Step', step, '| batch x :', batch_x.numpy(), "| bactch y : ",batch_y.numpy())

Epoch :  0 | Step 0 | batch x : [ 4.  1.  5.  2. 10.] | bactch y :  [ 7. 10.  6.  9.  1.]
Epoch :  0 | Step 1 | batch x : [7. 6. 8. 3. 9.] | bactch y :  [4. 5. 3. 8. 2.]
Epoch :  1 | Step 0 | batch x : [ 6.  9.  5.  3. 10.] | bactch y :  [5. 2. 6. 8. 1.]
Epoch :  1 | Step 1 | batch x : [2. 1. 8. 4. 7.] | bactch y :  [ 9. 10.  3.  7.  4.]
Epoch :  2 | Step 0 | batch x : [3. 6. 2. 1. 7.] | bactch y :  [ 8.  5.  9. 10.  4.]
Epoch :  2 | Step 1 | batch x : [ 8.  4.  9. 10.  5.] | bactch y :  [3. 7. 2. 1. 6.]


In [6]:
BATCH_SIZE = 8

loader = Data.DataLoader(
    dataset=dataset,      # torch TensorDataset format
    batch_size=BATCH_SIZE,      # mini batch size
    shuffle=True,               # random shuffle for training
    num_workers=1,              # subprocesses for loading data
)

for epoch in range(3):   # train entire dataset 3 times
    for step, (batch_x, batch_y) in enumerate(loader):  # for each training step
        
        # train your data...
        
        print('Epoch: ', epoch, '| Step: ', step, '| batch x: ',
              batch_x.numpy(), '| batch y: ', batch_y.numpy())

Epoch:  0 | Step:  0 | batch x:  [ 1. 10.  8.  2.  4.  3.  6.  9.] | batch y:  [10.  1.  3.  9.  7.  8.  5.  2.]
Epoch:  0 | Step:  1 | batch x:  [7. 5.] | batch y:  [4. 6.]
Epoch:  1 | Step:  0 | batch x:  [ 5.  8.  6.  2.  9.  3. 10.  7.] | batch y:  [6. 3. 5. 9. 2. 8. 1. 4.]
Epoch:  1 | Step:  1 | batch x:  [4. 1.] | batch y:  [ 7. 10.]
Epoch:  2 | Step:  0 | batch x:  [5. 3. 2. 6. 7. 1. 9. 4.] | batch y:  [ 6.  8.  9.  5.  4. 10.  2.  7.]
Epoch:  2 | Step:  1 | batch x:  [ 8. 10.] | batch y:  [3. 1.]


## Create dataset from Image

In [7]:
import torchvision
import torchvision.datasets as dsets
import torchvision.transforms as transforms

img_dir = "./images"        # 여기의 bird, mountain 폴더명을 class로 인식하고, 그 속의 사진들을 학습데이터로 활용
img_data = dsets.ImageFolder(img_dir, transforms.Compose([
            transforms.RandomSizedCrop(224),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            ]))

print(img_data.classes)       # ['bird', 'mountain']
print(img_data.class_to_idx)  # {'bird': 0, 'mountain': 1}
print(img_data.imgs)          # [(이미지명, class), (이미지명, class), ...]

['bird', 'mountain']
{'bird': 0, 'mountain': 1}
[('./images/bird/5f708a660eef3c4762c5a4a232f76db0.jpg', 0), ('./images/bird/c012c9fc6a99dcc00cfd89867f22a1c6.jpg', 0), ('./images/mountain/926c079396486b16944a44e6fa395bd6.jpg', 1), ('./images/mountain/b3e291f866f1c8754a5eb7fa51e72a0a.jpg', 1), ('./images/mountain/bd5fdf64757acb56a5c336f1e7bdbbb3.jpg', 1), ('./images/mountain/f7d40a84a44897f83ab8722254bd0551.jpg', 1)]


  "please use transforms.RandomResizedCrop instead.")


## DataLoader

In [8]:
loader = Data.DataLoader(img_data, batch_size=3,
                            shuffle=True, num_workers=1)

for img,label in loader:
    print(img.size())
    print(label)           # batch_size만큼의 class index값들

torch.Size([3, 3, 224, 224])
tensor([1, 1, 1])
torch.Size([3, 3, 224, 224])
tensor([0, 0, 1])


## Defined Dataset

In [9]:
# MNIST Dataset 
train_dataset = dsets.MNIST(root='./data', 
                            train=True, 
                            transform=transforms.ToTensor(),  
                            download=True)

image, label = train_dataset[0]
print (image.size())
print (label)

torch.Size([1, 28, 28])
5


In [10]:
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                          batch_size=100,
                                          shuffle=True,
                                          num_workers=2)

data_iter = iter(train_loader)

images, labels = data_iter.next()

for images, labels in train_loader:
    pass

images.size()

torch.Size([100, 1, 28, 28])

# You should build custom dataset as below.

# 이 cell을 바로 cell 형태로 바꾸고 수정하면 됨
class CustomDataset(Data.Dataset):
    def __init__(self):
        # TODO
        # 1. Initialize file path or list of file names. 
        pass
    def __getitem__(self, index):
        # TODO
        # 1. Read one data from file (e.g. using numpy.fromfile, PIL.Image.open).
        # 2. Preprocess the data (e.g. torchvision.Transform).
        # 3. Return a data pair (e.g. image and label).
        pass
    def __len__(self):
        # You should change 0 to the total size of your dataset.
        return 0 

# Then, you can just use prebuilt torch's data loader. 
custom_dataset = CustomDataset()
train_loader = Data.DataLoader(dataset=custom_dataset,
                                           batch_size=100, 
                                           shuffle=True,
                                           num_workers=2)

In [11]:
#========================== Using pretrained model ==========================#
# Download and load pretrained resnet.
resnet = torchvision.models.resnet18(pretrained=True)

# If you want to finetune only top layer of the model.
for param in resnet.parameters():
    param.requires_grad = False
    
# Replace top layer for finetuning.
resnet.fc = torch.nn.Linear(resnet.fc.in_features, 100)  # 100 is for example.

# For test.
images = torch.autograd.Variable(torch.randn(10, 3, 256, 256))
outputs = resnet(images)
print (outputs.size())   # (10, 100)

Downloading: "https://download.pytorch.org/models/resnet18-5c106cde.pth" to /home/cloudera/.cache/torch/checkpoints/resnet18-5c106cde.pth


HBox(children=(FloatProgress(value=0.0, max=46827520.0), HTML(value='')))


torch.Size([10, 100])
