# Create a pytorch dataloader

This notebook show how to create a pytorch dataloader from the dataset.

In [13]:
import sys
# sys.path.insert(0, "..") # For local tests without pkg installation, to make challenge_welding module visible 
from challenge_welding.tools import ChallengeWeldingDataset
from torch.utils.data import DataLoader
from challenge_welding.user_interface import ChallengeUI
from torch.utils.data import DataLoader

## Get dataset list

In [14]:
# Initiate the user interface

my_challenge_UI=ChallengeUI(cache_strategy="local",cache_dir="notebooks_cache")

# Get list of available datasets

ds_list=my_challenge_UI.list_datasets()
print(ds_list)

# In this example we will choose a small dataset

ds_name="example_mini_dataset"

['example_mini_dataset', 'welding-detection-challenge-dataset']


## Get your dataset metadata

In [15]:
# Load all metadata of your dataset
ds_name=ds_name="example_mini_dataset"
# ds_name="welding-detection-challenge-dataset"
meta_df=my_challenge_UI.get_ds_metadata_dataframe(ds_name)

https://minio-storage.apps.confianceai-public.irtsysx.fr/challenge-welding/datasets/example_mini_dataset/metadata/ds_meta.parquet


## Initialize your torch dataloader on your dataset

In [16]:
# Create your dataloader
dataloader=my_challenge_UI.create_pytorch_dataloader(input_df=meta_df.iloc[0:50],
                                                     batch_size=10,
                                                     shuffle=False,
                                                     )

Cache storage has been activated in  notebooks_cache
Cache directory has already been built, loading local metadata..
local metadata loaded !
0     challenge-welding/datasets/example_mini_datase...
1     challenge-welding/datasets/example_mini_datase...
2     challenge-welding/datasets/example_mini_datase...
3     challenge-welding/datasets/example_mini_datase...
4     challenge-welding/datasets/example_mini_datase...
5     challenge-welding/datasets/example_mini_datase...
6     challenge-welding/datasets/example_mini_datase...
7     challenge-welding/datasets/example_mini_datase...
8     challenge-welding/datasets/example_mini_datase...
9     challenge-welding/datasets/example_mini_datase...
10    challenge-welding/datasets/example_mini_datase...
11    challenge-welding/datasets/example_mini_datase...
12    challenge-welding/datasets/example_mini_datase...
13    challenge-welding/datasets/example_mini_datase...
14    challenge-welding/datasets/example_mini_datase...
15    challenge-we

## Test your dataloader

In [12]:
# Test your dataloader       
for i_batch, sample_batched in enumerate(dataloader):
    print("batch number", i_batch)
    print("batch content image",    sample_batched['image'].shape)
    print("batch content meta",sample_batched['meta'])

    # observe 4th batch and stop.
    if i_batch == 3:
        break

batch number 0
batch content image torch.Size([10, 540, 540, 3])
batch content meta {'class': ['OK', 'OK', 'OK', 'OK', 'OK', 'OK', 'OK', 'OK', 'OK', 'OK'], 'timestamp': ['22/01/20 12:49', '20/02/20 23:53', '20/01/20 20:34', '18/07/2022 20:18', '03/10/19 21:14', '21/07/2022 22:44', '11/07/20 19:08', '04/11/2020 20:09', '11/03/20 17:59', '28/10/2020 18:47'], 'welding-seams': ['c33', 'c102', 'c20', 'c102', 'c20', 'c33', 'c20', 'c20', 'c102', 'c102'], 'labelling_type': ['expert', 'expert', 'expert', 'operator', 'expert', 'operator', 'expert', 'expert', 'expert', 'expert'], 'resolution': [tensor([540, 540, 540, 540, 540, 540, 540, 540, 540, 540]), tensor([540, 540, 540, 540, 540, 540, 540, 540, 540, 540])], 'path': ['challenge-welding/datasets/example_mini_dataset/c33/expert/sample_0.jpeg', 'challenge-welding/datasets/example_mini_dataset/c102/expert/sample_1.jpeg', 'challenge-welding/datasets/example_mini_dataset/c20/expert/sample_2.jpeg', 'challenge-welding/datasets/example_mini_dataset/c