forked from dudevil/DomainAdaptation
-
Notifications
You must be signed in to change notification settings - Fork 0
/
data_loader.py
106 lines (82 loc) · 3.79 KB
/
data_loader.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader, random_split
from torchvision import transforms
class DataGenerator(DataLoader):
def __init__(self, is_infinite=False, *args, **kwargs):
super().__init__(*args, **kwargs)
self.is_infinite = is_infinite
self.reload_iterator()
def reload_iterator(self):
self.dataset_iterator = super().__iter__()
def __iter__(self):
return self
def __next__(self):
try:
batch = next(self.dataset_iterator)
except StopIteration:
if self.is_infinite:
self.reload_iterator()
batch = next(self.dataset_iterator)
return batch
def get_classes_to_idx(self):
return self.dataset.dataset.class_to_idx
def get_classes(self):
return self.dataset.dataset.classes
def create_data_generators(dataset_name, domain, data_path="data", batch_size=16,
transformations=None, num_workers=1, split_ratios=[0.8, 0.1, 0.1],
image_size=500, infinite_train=False):
"""
Args:
dataset_name (string)
domain (string)
- valid domain of the dataset dataset_name
data_path (string)
- valid path, which contains dataset_name folder
batch_size (int)
transformations (callable)
- optional transform applied on image sample
num_workers (int)
- multi-process data loading
split_ratios (list of ints, len(split_ratios) = 3)
- ratios of train, validation and test parts
Return:
3 data generators - for train, validation and test data
"""
if transformations is None:
transformations = transforms.Compose([
transforms.Resize(image_size),
transforms.ToTensor(),
])
dataset = create_dataset(dataset_name, domain, data_path, transformations)
len_dataset = len(dataset)
train_size = int(len_dataset * split_ratios[0])
val_size = int(len_dataset * split_ratios[1])
test_size = len_dataset - train_size - val_size
train_dataset, val_dataset, test_dataset = random_split(dataset, [train_size, val_size, test_size])
train_dataloader = DataGenerator(is_infinite=infinite_train, dataset=train_dataset, batch_size=batch_size,
shuffle=True, num_workers=num_workers, drop_last=True)
val_dataloader = DataGenerator(is_infinite=False, dataset=val_dataset, batch_size=batch_size,
shuffle=False, num_workers=num_workers)
test_dataloader = DataGenerator(is_infinite=False, dataset=test_dataset, batch_size=batch_size,
shuffle=False, num_workers=num_workers)
return train_dataloader, val_dataloader, test_dataloader
def create_dataset(dataset_name, domain, data_path, transformations):
"""
Args:
dataset_name (string)
domain (string)
- valid domain of the dataset dataset_name
data_path (string)
- valid path, which contains dataset_name folder
transformations (callable)
- optional transform to be applied on an image sample
Return:
torchvision.dataset object
"""
assert dataset_name in ["office-31"], f"Dataset {dataset_name} is not implemented"
if dataset_name == "office-31":
dataset_domains = ["amazon", "dslr", "webcam"]
assert domain in dataset_domains, f"Incorrect domain {domain}: " + \
f"dataset {dataset_name} domains: {dataset_domains}"
dataset = ImageFolder(f"{data_path}/{dataset_name}/{domain}/images", transform=transformations)
return dataset