# Data aquisition

In [1]:
from multiprocessing import freeze_support
import time

import torch
from torch.utils.data import Dataset, DataLoader
from timed_decorator.simple_timed import timed

In [2]:
class SleepyDataset(Dataset):
    def __init__(self):
        self.data = [1, 2, 3, 4]
        self.labels = ["odd", "even", "odd", "even"]
    
    def __len__(self):
        return len(self.data)

    def __getitem__(self, i: int):
        worker_info = torch.utils.data.get_worker_info()
        if worker_info is None:
            print(f"Loading item {i} in main process")
        else:
            print(f"Worker {worker_info.id}/{worker_info.num_workers} is loading item {i}")

        time.sleep(1)  # Simulate a slow loading process
        return self.data[i], self.labels[i]

In [3]:

@timed(use_seconds=True, show_args=True, return_time=True)
def load_data(num_workers: int):
    dataset = SleepyDataset()
    dataloader = DataLoader(dataset, batch_size=1, num_workers=num_workers)
    for _ in dataloader:
        time.sleep(1)  # Simulate training

In [4]:
freeze_support()
_, t0 = load_data(0)
_, t1 = load_data(1)
_, t2 = load_data(4)
_, t3 = load_data(8)
print()
print(f"num_workers: {0}, time: {t0} seconds")
print(f"num_workers: {2}, time: {t1} seconds")
print(f"num_workers: {4}, time: {t2} seconds")
print(f"num_workers: {8}, time: {t3} seconds")
print(f"Speedup: {t0/t1}, {t0/t2}, {t0/t3}")

Loading item 0 in main process
Loading item 1 in main process
Loading item 2 in main process
Loading item 3 in main process
load_data(0) -> total time: 8.018422338s
Worker 0/1 is loading item 0
Worker 0/1 is loading item 1
Worker 0/1 is loading item 2
Worker 0/1 is loading item 3
load_data(1) -> total time: 5.059337280s
Worker 1/4 is loading item 1Worker 0/4 is loading item 0Worker 2/4 is loading item 2


Worker 3/4 is loading item 3
load_data(4) -> total time: 5.068841478s
Worker 1/8 is loading item 1Worker 0/8 is loading item 0Worker 2/8 is loading item 2Worker 3/8 is loading item 3



load_data(8) -> total time: 5.098676861s

num_workers: 0, time: 8.018422338 seconds
num_workers: 2, time: 5.05933728 seconds
num_workers: 4, time: 5.068841478 seconds
num_workers: 8, time: 5.098676861 seconds
Speedup: 1.5848760211535058, 1.5819043410218874, 1.57264768029001
