In [1]:
#  Copyright 2022 Institute of Advanced Research in Artificial Intelligence (IARAI) GmbH.
#  IARAI licenses this file to You under the Apache License, Version 2.0
#  (the "License"); you may not use this file except in compliance with
#  the License. You may obtain a copy of the License at
#  http://www.apache.org/licenses/LICENSE-2.0
#  Unless required by applicable law or agreed to in writing, software
#  distributed under the License is distributed on an "AS IS" BASIS,
#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#  See the License for the specific language governing permissions and
#  limitations under the License.

In [2]:
import os
import sys

In [3]:
# Alternatevly, in order to make the module imports work properly set PYTHONPATH=$PWD before launching the notebook server from the repo root folder.
sys.path.insert(0, os.path.abspath("../../"))  # noqa:E402

![t4c20logo](../../t4c20logo.png)

In [4]:
import torch
import torch_geometric
import tqdm
from IPython.core.display import HTML
from IPython.display import display
from pathlib import Path

import t4c22
from t4c22.dataloading.t4c22_dataset import T4c22Dataset
from t4c22.dataloading.t4c22_dataset_geometric import T4c22GeometricDataset
from t4c22.misc.t4c22_logging import t4c_apply_basic_logging_config
from t4c22.t4c22_config import load_basedir


In [5]:
%matplotlib inline
%load_ext autoreload
%load_ext time
%autoreload 2
%autosave 60
display(HTML("<style>.container { width:80% !important; }</style>"))

The time module is not an IPython extension.


Autosaving every 60 seconds


In [6]:
t4c_apply_basic_logging_config(loglevel="DEBUG")

In [7]:
# load BASEDIR from file, change to your data root
BASEDIR = load_basedir(fn="t4c22_config.json", pkg=t4c22)

In [8]:
city = "london"

# T4c22GeometricDataset

In [9]:
%%time
train_dataset = T4c22GeometricDataset(root=BASEDIR, city=city, split="train", cachedir=Path("/tmp/processed"), limit=1000)

CPU times: user 1.01 s, sys: 93.1 ms, total: 1.1 s
Wall time: 1.05 s


In [10]:
len(train_dataset)

1000

In [11]:
10010 / 91

110.0

In [12]:
%%time
# 2.41s -> 2.35ms from cachedir!!
train_dataset.get(0)

CPU times: user 906 µs, sys: 1.43 ms, total: 2.33 ms
Wall time: 2.05 ms


Data(x=[59110, 4], edge_index=[2, 132414], y=[132414])

## Dataloader Performance

#### 16 workers

In [13]:
%%timeit -n 1
# use this to generate cached files with num_workers >> 0.
# generation: 5-10it/s
# from cachedir: ca. 210-260 it/s
for _ in tqdm.notebook.tqdm(
    enumerate(torch_geometric.loader.dataloader.DataLoader(train_dataset, batch_size=1, shuffle=False, num_workers=16)), total=len(train_dataset)
):
    pass

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

The slowest run took 33.13 times longer than the fastest. This could mean that an intermediate result is being cached.
22.2 s ± 44.6 s per loop (mean ± std. dev. of 7 runs, 1 loop each)


#### 4 workers

In [14]:
%%timeit -n 1
# from cachedir: ca. 200-230 it/s
for _ in tqdm.notebook.tqdm(
    enumerate(torch_geometric.loader.dataloader.DataLoader(train_dataset, batch_size=1, shuffle=False, num_workers=4)), total=len(train_dataset)
):
    pass

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

3.94 s ± 64.5 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


#### 1 worker

In [15]:
%%timeit -n 1
# from cachedir: ca. 95-120 it/s
for _ in tqdm.notebook.tqdm(
    enumerate(torch_geometric.loader.dataloader.DataLoader(train_dataset, batch_size=1, shuffle=False, num_workers=1)), total=len(train_dataset)
):
    pass

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

8.02 s ± 440 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


#### 0 workers

In [16]:
%%timeit -n 1
# from cachedir: ca. 340-510 it/s (!)
# get items from val_dataset to trigger caching with dataloader. In the test function, we use the dataset directly (thus no workers)
for _ in tqdm.notebook.tqdm(
    enumerate(torch_geometric.loader.dataloader.DataLoader(train_dataset, batch_size=1, shuffle=False, num_workers=0)), total=len(train_dataset)
):
    pass

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

2.27 s ± 101 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


#### pure dataset

In [17]:
%%timeit -n 1
# from cachedir: ca. 1100 it/s
for _ in tqdm.notebook.tqdm(enumerate(train_dataset), total=len(train_dataset)):
    pass

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

1.4 s ± 38.4 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


#### Conclusion

It looks as if the pytorch-geometric collator slows down by factor 2! See https://pytorch-geometric.readthedocs.io/en/latest/_modules/torch_geometric/loader/dataloader.html

However, training is at 5 batches/sec whereas dataloader still gets >= 300 batches/sec, so this dataloader should not be the bottleneck.

# T4c22Dataset

In [22]:
%%time
train_dataset = T4c22Dataset(root=BASEDIR, city=city, split="train", cachedir=Path("/tmp/processed"), limit=1000)

CPU times: user 969 ms, sys: 124 ms, total: 1.09 s
Wall time: 1.05 s


In [23]:
len(train_dataset)

1000

In [24]:
train_dataset.__getitem__(0)

(tensor([[nan, nan, nan, nan],
         [nan, nan, nan, nan],
         [nan, nan, nan, nan],
         ...,
         [nan, nan, nan, nan],
         [nan, nan, nan, nan],
         [nan, nan, nan, nan]]),
 tensor([1., 0., 0.,  ..., nan, nan, 0.]))

In [25]:
%%timeit -n 1
# use this to generate cached files with num_workers >> 0.
# generation: 8 it/s
# from cachedir: ca. 890 it/s
for _ in tqdm.notebook.tqdm(enumerate(torch.utils.data.DataLoader(train_dataset, batch_size=1, shuffle=False, num_workers=16)), total=len(train_dataset)):
    pass

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

1.62 s ± 21.2 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
