/
fashion_mnist_dataset.py
68 lines (56 loc) · 2.5 KB
/
fashion_mnist_dataset.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import numpy as np
#import torch.nn as nn
import torch
import os
#import torch.nn.functional as F
import torchvision.datasets as dset
import torchvision.transforms as transforms
#from torch.autograd import Variable
#from torch.nn import Linear
class FMnistDataset():
"""
Fashion-MNIST [#]_ is a large-scale image dataset of various fashion items (T-shirt/top, Trouser,
Pullover, Dress. Coat, Sandal, Shirt, Sneaker, Bag, Ankle boot. The images are grayscale and 28x28
in size with each image belong to one the above mentioned 10 categories. The training set contains
60000 examples and the test set contains 10000 examples.
References:
.. [#] `Xiao, Han, Kashif Rasul, and Roland Vollgraf. Fashion-mnist: a
novel image dataset for benchmarking machine learning algorithms.
<https://arxiv.org/abs/1708.07747>`_
"""
def __init__(self, batch_size=256, subset_size=50000, test_batch_size=256, dirpath=None):
trans = transforms.Compose([transforms.ToTensor()])
self._dirpath = dirpath
if not self._dirpath:
self._dirpath = os.path.join(os.path.dirname(os.path.abspath(__file__)),
'..', 'data','fmnit_data')
train_set = dset.FashionMNIST(root=self._dirpath, train=True, transform=trans, download=True)
test_set = dset.FashionMNIST(root=self._dirpath, train=False, transform=trans, download=True)
indices = torch.randperm(len(train_set))[:subset_size]
train_set = torch.utils.data.Subset(train_set, indices)
self.train_loader = torch.utils.data.DataLoader(
dataset=train_set,
batch_size=batch_size,
shuffle=True)
self.test_loader = torch.utils.data.DataLoader(
dataset=test_set,
batch_size=test_batch_size,
shuffle=False)
self.name = "fmnist"
self.data_dims = [28, 28, 1]
self.train_size = len(self.train_loader)
self.test_size = len(self.test_loader)
self.range = [0.0, 1.0]
self.batch_size = batch_size
self.num_training_instances = len(train_set)
self.num_test_instances = len(test_set)
self.likelihood_type = 'gaussian'
self.output_activation_type = 'sigmoid'
def next_batch(self):
for x, y in self.train_loader:
x = np.reshape(x, (-1, 28, 28, 1))
yield x, y
def next_test_batch(self):
for x, y in self.test_loader:
x = np.reshape(x, (-1, 28, 28, 1))
yield x, y