# Create a pytorch dataloader

This notebook show how to create a pytorch dataloader from a given dataset

In [8]:
import sys
# sys.path.insert(0, "..") # For local tests without pkg installation, to make challenge_welding module visible 
import challenge_welding.dataloaders
from challenge_welding.user_interface import ChallengeUI
from torch.utils.data import DataLoader

## Get dataset list

In [12]:
# Initiate the user interface

my_challenge_UI=ChallengeUI(cache_strategy="local",cache_dir="notebooks_cache")

# Get list of available datasets

ds_list=my_challenge_UI.list_datasets()
print(ds_list)

# In this example we will choose a small dataset

ds_name="example_mini_dataset"

['example_mini_dataset', 'welding-detection-challenge-dataset']


## Get your dataset metadata

In [13]:
# Load all metadata of your dataset
ds_name=ds_name="example_mini_dataset"
# ds_name="welding-detection-challenge-dataset"
meta_df=my_challenge_UI.get_ds_metadata_dataframe(ds_name)

https://minio-storage.apps.confianceai-public.irtsysx.fr/challenge-welding/datasets/example_mini_dataset/metadata/ds_meta.parquet


## Initialize your torch dataloader on your dataset

In [15]:
# Create your dataloader
dataloader=challenge_welding.dataloaders.create_pytorch_dataloader(input_df=meta_df[0:20],
                                                     cache_strategy=my_challenge_UI.cache_strategy,
                                                     cache_dir=my_challenge_UI.cache_dir,
                                                     batch_size=100,
                                                     shuffle=False)

Cache storage has been activated in  notebooks_cache
cache_metadata_unique_id 132503
Downloading all raw samples in cache storage, please wait . .


  5%|█████▉                                                                                                                 | 1/20 [00:00<00:03,  6.16it/s]

notebooks_cache\challenge-welding\datasets\example_mini_dataset\c33\expert\sample_0.jpeg
notebooks_cache\challenge-welding\datasets\example_mini_dataset\c102\expert\sample_1.jpeg


 15%|█████████████████▊                                                                                                     | 3/20 [00:00<00:02,  6.36it/s]

notebooks_cache\challenge-welding\datasets\example_mini_dataset\c20\expert\sample_2.jpeg
notebooks_cache\challenge-welding\datasets\example_mini_dataset\c102\operator\sample_3.jpeg


 25%|█████████████████████████████▊                                                                                         | 5/20 [00:00<00:02,  6.71it/s]

notebooks_cache\challenge-welding\datasets\example_mini_dataset\c20\expert\sample_4.jpeg
notebooks_cache\challenge-welding\datasets\example_mini_dataset\c33\operator\sample_5.jpeg


 35%|█████████████████████████████████████████▋                                                                             | 7/20 [00:01<00:01,  6.58it/s]

notebooks_cache\challenge-welding\datasets\example_mini_dataset\c20\expert\sample_6.jpeg
notebooks_cache\challenge-welding\datasets\example_mini_dataset\c20\expert\sample_7.jpeg


 45%|█████████████████████████████████████████████████████▌                                                                 | 9/20 [00:01<00:01,  6.69it/s]

notebooks_cache\challenge-welding\datasets\example_mini_dataset\c102\expert\sample_8.jpeg
notebooks_cache\challenge-welding\datasets\example_mini_dataset\c102\expert\sample_9.jpeg


 55%|████████████████████████████████████████████████████████████████▉                                                     | 11/20 [00:01<00:01,  6.52it/s]

notebooks_cache\challenge-welding\datasets\example_mini_dataset\c102\expert\sample_10.jpeg
notebooks_cache\challenge-welding\datasets\example_mini_dataset\c33\expert\sample_11.jpeg


 65%|████████████████████████████████████████████████████████████████████████████▋                                         | 13/20 [00:01<00:01,  6.64it/s]

notebooks_cache\challenge-welding\datasets\example_mini_dataset\c20\expert\sample_12.jpeg
notebooks_cache\challenge-welding\datasets\example_mini_dataset\c33\operator\sample_13.jpeg


 75%|████████████████████████████████████████████████████████████████████████████████████████▌                             | 15/20 [00:02<00:00,  6.98it/s]

notebooks_cache\challenge-welding\datasets\example_mini_dataset\c20\expert\sample_14.jpeg
notebooks_cache\challenge-welding\datasets\example_mini_dataset\c33\operator\sample_15.jpeg


 85%|████████████████████████████████████████████████████████████████████████████████████████████████████▎                 | 17/20 [00:02<00:00,  6.83it/s]

notebooks_cache\challenge-welding\datasets\example_mini_dataset\c102\expert\sample_16.jpeg
notebooks_cache\challenge-welding\datasets\example_mini_dataset\c102\operator\sample_17.jpeg


 95%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████      | 19/20 [00:02<00:00,  6.90it/s]

notebooks_cache\challenge-welding\datasets\example_mini_dataset\c102\expert\sample_18.jpeg
notebooks_cache\challenge-welding\datasets\example_mini_dataset\c33\operator\sample_19.jpeg


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 20/20 [00:02<00:00,  6.74it/s]

cache directory_built
Creating dataloader . .





## Test your dataloader

In [17]:
# Test your dataloader       
for i_batch, sample_batched in enumerate(dataloader):
    print("batch number", i_batch)
    print("batch content image",    sample_batched['image'].shape)
    print("batch content meta",sample_batched['meta'])

    # observe 4th batch and stop.
    if i_batch == 3:
        break

batch number 0
batch content image torch.Size([20, 540, 540, 3])
batch content meta {'class': ['OK', 'OK', 'OK', 'OK', 'OK', 'OK', 'OK', 'OK', 'OK', 'OK', 'OK', 'OK', 'OK', 'OK', 'OK', 'OK', 'OK', 'OK', 'OK', 'OK'], 'timestamp': ['22/01/20 12:49', '20/02/20 23:53', '20/01/20 20:34', '18/07/2022 20:18', '03/10/19 21:14', '21/07/2022 22:44', '11/07/20 19:08', '04/11/2020 20:09', '11/03/20 17:59', '28/10/2020 18:47', '20/07/20 15:14', '25/01/20 00:24', '08/09/20 17:47', '18/07/2022 23:24', '18/07/20 07:34', '21/07/2022 23:04', '22/10/2020 15:28', '28/07/2022 01:21', '18/06/20 06:14', '19/07/2022 04:56'], 'welding-seams': ['c33', 'c102', 'c20', 'c102', 'c20', 'c33', 'c20', 'c20', 'c102', 'c102', 'c102', 'c33', 'c20', 'c33', 'c20', 'c33', 'c102', 'c102', 'c102', 'c33'], 'labelling_type': ['expert', 'expert', 'expert', 'operator', 'expert', 'operator', 'expert', 'expert', 'expert', 'expert', 'expert', 'expert', 'expert', 'operator', 'expert', 'operator', 'expert', 'operator', 'expert', 'oper