-
Notifications
You must be signed in to change notification settings - Fork 0
/
dataset.py
87 lines (68 loc) · 3.02 KB
/
dataset.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
from PIL import Image
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms
import pandas as pd
import torch
import os
import h5py
from sklearn.preprocessing import LabelEncoder
import numpy as np
class IndoorSceneDataset(Dataset):
def __init__(self, text_file, root_dir, transform=None):
super(IndoorSceneDataset).__init__()
self.indoor_scenes = pd.read_csv(text_file, header=None)
self.root_dir = root_dir
self.transform = transform
def __len__(self):
return len(self.indoor_scenes)
def __getitem__(self, idx):
if torch.is_tensor(idx):
idx = idx.tolist()
img_name = os.path.join(self.root_dir, self.indoor_scenes.iloc[idx, 0])
image = Image.open(img_name).convert('RGB')
indoor_scene = self.indoor_scenes.iloc[idx, 0].split('/')[0]
if self.transform:
image = self.transform(image)
return image, indoor_scene
class IndoorSceneFeatureDataset(Dataset):
def __init__(self, text_file, feature_file, train, root_dir=None, transform=None):
super(IndoorSceneDataset).__init__()
self.indoor_scenes = pd.read_csv(text_file, header=None)
f = h5py.File(feature_file, 'r')
if train:
self.features = f['train_features']
self.labels = f['train_labels']
else:
self.features = f['test_features']
self.labels = f['test_labels']
mappinglist = np.array(f['mapping'])
self.mapping = [str(el).strip('[]').strip('\'') for el in mappinglist.astype(str)]
self.root_dir = root_dir
self.transform = transform
self.label_encoder = LabelEncoder()
def __len__(self):
return len(self.indoor_scenes)
def __getitem__(self, idx):
if torch.is_tensor(idx):
idx = idx.tolist()
image = self.features[idx].squeeze(0)
indoor_scene = self.labels[idx]
return image, indoor_scene
if __name__ == '__main__':
indoorscene_dataset = IndoorSceneFeatureDataset(
text_file='Dataset/TestImages1.txt',
feature_file = 'Dataset/test-features-1.h5',
root_dir='Dataset/Images/',
transform=transforms.Compose([
transforms.Resize((224, 224)),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]),
]))
trainloader = DataLoader(indoorscene_dataset, batch_size=8, shuffle=True, num_workers=1)
for i_batch, (images, labels) in enumerate(trainloader):
print(images.shape)
print(labels)
# observe 4th batch and stop.
if i_batch == 4:
break