In [1]:
import sys

sys.path.insert(0, "..")

import torch 
from functools import partial
import matplotlib.pyplot as plt

from dinov2.data import SamplerType, make_data_loader, make_dataset
from dinov2.data import collate_data_and_cast, DataAugmentationDINO, MaskingGenerator



In [3]:
train_ds_path = "PanMDataset:split=ALL:root=/fast/AG_Kainmueller/data/pan_m_lmdb_crops/:extra=mibi*"

img_size = 224
patch_size = 14
n_tokens = (img_size // patch_size) ** 2
inputs_dtype = torch.half
do_on_gpu = False
do_data_aug = False

mask_generator = MaskingGenerator(
    input_size=(img_size // patch_size, img_size // patch_size),
    max_num_patches=0.5 * img_size // patch_size * img_size // patch_size,
)

data_transform_cpu = DataAugmentationDINO(
    (0.32, 1.0),
    (0.05, 0.32),
    8,
    global_crops_size=224,
    local_crops_size=96,
    use_kornia=True,
    do_multi_channel=True,
)

# setup data loader
dataset = make_dataset(
    dataset_str=train_ds_path,
    transform=data_transform_cpu,
    target_transform=lambda _: (),
)

print(f"#samples: {len(dataset)}")


Dataset kwargs {'split': <_Split.ALL: 'all'>, 'root': '/fast/AG_Kainmueller/data/pan_m_lmdb_crops/', 'extra': 'mibi*'}
extra_path /fast/AG_Kainmueller/data/pan_m_lmdb_crops/*
Datasets imgs file list:  ['/fast/AG_Kainmueller/data/pan_m_lmdb_crops/codex_colon/0-TRAIN_images', '/fast/AG_Kainmueller/data/pan_m_lmdb_crops/mibi_breast/0-TRAIN_images', '/fast/AG_Kainmueller/data/pan_m_lmdb_crops/mibi_decidua/0-TRAIN_images', '/fast/AG_Kainmueller/data/pan_m_lmdb_crops/vectra_colon/0-TRAIN_images', '/fast/AG_Kainmueller/data/pan_m_lmdb_crops/vectra_pancreas/0-TRAIN_images']
Datasets labels file list:  ['/fast/AG_Kainmueller/data/pan_m_lmdb_crops/codex_colon/0-TRAIN_labels', '/fast/AG_Kainmueller/data/pan_m_lmdb_crops/mibi_breast/0-TRAIN_labels', '/fast/AG_Kainmueller/data/pan_m_lmdb_crops/mibi_decidua/0-TRAIN_labels', '/fast/AG_Kainmueller/data/pan_m_lmdb_crops/vectra_colon/0-TRAIN_labels', '/fast/AG_Kainmueller/data/pan_m_lmdb_crops/vectra_pancreas/0-TRAIN_labels']
Datasets metadata file list

In [4]:
sampler_type = SamplerType.SHARDED_INFINITE

collate_fn_cpu = partial(
    collate_data_and_cast,
    mask_ratio_tuple=(0.1, 0.5),
    mask_probability=0.5,
    n_tokens=n_tokens,
    mask_generator=mask_generator,
    dtype=inputs_dtype,
    do_free_shapes=None,
    use_ch_patch_embed=True,
    use_variable_channels=True,
)

data_loader = make_data_loader(
    dataset=dataset,
    batch_size=4,
    num_workers=8,
    shuffle=True,
    seed=0,
    sampler_type=sampler_type,
    sampler_advance=0,  # TODO(qas): fix this -- start_iter * cfg.train.batch_size_per_gpu,
    drop_last=True,
    collate_fn=collate_fn_cpu,
)

In [5]:
for i, batch in enumerate(data_loader):
    if i > 10:
        break
    print('GC', [e.shape for e in batch['collated_global_crops']])
    print("LC", [e.shape for e in batch["collated_local_crops"]])

    gc_0 = batch['collated_global_crops'][0].to(torch.float32)
    lc_0 = batch["collated_local_crops"][0].to(torch.float32)
    gc0_0 = (gc_0[0] - gc_0[0].min()) / (gc_0[0].max() - gc_0[0].min())
    lc0_0 = (lc_0[0] - lc_0[0].min()) / (lc_0[0].max() - lc_0[0].min())

    f,a = plt.subplots(1, 2)
    a[0].imshow(gc0_0[:3].reshape(3, 224, 224).permute(1, 2, 0))
    a[1].imshow(lc0_0[:3].permute(1, 2, 0))
    plt.show()




eeeeeeeeeeeeeeeeee   mibi_breast_0868_p4_ch39mibi_breast_0760_p5_ch39eeeeeemibi_breast_0437_p5_ch39eeeeee   eeeeee /fast/AG_Kainmueller/data/pan_m_lmdb_crops/mibi_breast/0-TRAIN_images /fast/AG_Kainmueller/data/pan_m_lmdb_crops/mibi_breast/0-TRAIN_images/fast/AG_Kainmueller/data/pan_m_lmdb_crops/mibi_breast/0-TRAIN_imagesmibi_breast_0323_p6_ch39mibi_breast_0244_p0_ch39 eeeeee     self.curr_in_chans Noneself.curr_in_chans Nonemibi_breast_0149_p2_ch39/fast/AG_Kainmueller/data/pan_m_lmdb_crops/mibi_breast/0-TRAIN_images self.curr_in_chans None

 /fast/AG_Kainmueller/data/pan_m_lmdb_crops/mibi_breast/0-TRAIN_imagesmibi_breast_0852_p6_ch39
 /fast/AG_Kainmueller/data/pan_m_lmdb_crops/mibi_breast/0-TRAIN_images self.curr_in_chans None  /fast/AG_Kainmueller/data/pan_m_lmdb_crops/mibi_breast/0-TRAIN_imagesself.curr_in_chans Noneeeeeee self.curr_in_chans None
self.curr_in_chans None 

mibi_breast_0294_p1_ch39
 /fast/AG_Kainmueller/data/pan_m_lmdb_crops/mibi_breast/0-TRAIN_images self.curr_in_cha

Exception ignored in sys.unraisablehook

 self.curr_in_chans Noneeeeeee
 vectra_colon_0031_p5_ch7 /fast/AG_Kainmueller/data/pan_m_lmdb_crops/vectra_colon/0-TRAIN_images

: 

 self.curr_in_chans None

<built-in function unraisablehook>





Traceback (most recent call last):


eeeeee 

  File "/fast/home/j/jluesch/micromamba/envs/dinov2_2/lib/python3.10/site-packages/ipykernel/iostream.py", line 660, in write


mibi_breast_0625_p2_ch39 /fast/AG_Kainmueller/data/pan_m_lmdb_crops/mibi_breast/0-TRAIN_images self.curr_in_chans None


    self.pub_thread.schedule(self._flush)
  File "/fast/home/j/jluesch/micromamba/envs/dinov2_2/lib/python3.10/site-packages/ipykernel/iostream.py", line 268, in schedule
    f()
  File "/fast/home/j/jluesch/micromamba/envs/dinov2_2/lib/python3.10/site-packages/ipykernel/iostream.py", line 618, in _flush
    Exception ignored in sys.unraisablehook: self.session.send(<built-in function unraisablehook>
  File "/fast/home/j/jluesch/micromamba/envs/dinov2_2/lib/python3.10/site-packages/jupyter_client/session.py", line 863, in send

Traceback (most recent call last):
  File "/fast/home/j/jluesch/micromamba/envs/dinov2_2/lib/python3.10/site-packages/ipykernel/iostream.py", line 660, in write
    self.pub_thread.schedule(self._flush)
  File "/fast/home/j/jluesch/micromamba/envs/dinov2_2/lib/python3.10/site-packages/ipykernel/iostream.py", line 268, in schedule
    f()
  File "/fast/home/j/jluesch/micromamba/envs/dinov2_2/lib/python3.10/site-packages/ipykernel/iostream.py", line 618, in _flush

eeeeee

        

 

stream.send_multipart(to_send, copy=copy)self.session.send(
  File "/fast/home/j/jluesch/micromamba/envs/dinov2_2/lib/python3.10/site-packages/ipykernel/iostream.py", line 275, in send_multipart
    

mibi_breast_0057_p7_ch39 /fast/AG_Kainmueller/data/pan_m_lmdb_crops/mibi_breast/0-TRAIN_imageseeeeee




 

self.schedule(lambda: self._really_send(*args, **kwargs))


self.curr_in_chans None 


  File "/fast/home/j/jluesch/micromamba/envs/dinov2_2/lib/python3.10/site-packages/ipykernel/iostream.py", line 268, in schedule


vectra_pancreas_0199_p2_ch7

    Exception ignored in sys.unraisablehook      File "/fast/home/j/jluesch/micromamba/envs/dinov2_2/lib/python3.10/site-packages/jupyter_client/session.py", line 863, in send
f()

 

: 


/fast/AG_Kainmueller/data/pan_m_lmdb_crops/vectra_pancreas/0-TRAIN_images

<built-in function unraisablehook>

 

stream.send_multipart(to_send, copy=copy)  File "/fast/home/j/jluesch/micromamba/envs/dinov2_2/lib/python3.10/site-packages/ipykernel/iostream.py", line 275, in <lambda>


self.curr_in_chans None


    

eeeeee

self.schedule(lambda: self._really_send(*args, **kwargs))

 mibi_breast_0094_p5_ch39




 

  File "/fast/home/j/jluesch/micromamba/envs/dinov2_2/lib/python3.10/site-packages/ipykernel/iostream.py", line 291, in _really_send
    

/fast/AG_Kainmueller/data/pan_m_lmdb_crops/mibi_breast/0-TRAIN_images

ctx, pipe_out = self._setup_pipe_out()

 




self.curr_in_chans None


  File "/fast/home/j/jluesch/micromamba/envs/dinov2_2/lib/python3.10/site-packages/ipykernel/iostream.py", line 206, in _setup_pipe_out
    ctx = zmq.Context()
  File "/fast/home/j/jluesch/micromamba/envs/dinov2_2/lib/python3.10/site-packages/zmq/sugar/context.py", line 125, in __init__


Traceback (most recent call last):
  File "/fast/home/j/jluesch/micromamba/envs/dinov2_2/lib/python3.10/site-packages/ipykernel/iostream.py", line 275, in send_multipart


eeeeee

  File "/fast/home/j/jluesch/micromamba/envs/dinov2_2/lib/python3.10/site-packages/ipykernel/iostream.py", line 660, in write
    

 vectra_pancreas_0081_p5_ch7

    

 

self.pub_thread.schedule(self._flush)self.schedule(lambda: self._really_send(*args, **kwargs))

/fast/AG_Kainmueller/data/pan_m_lmdb_crops/vectra_pancreas/0-TRAIN_images




 

    
  File "/fast/home/j/jluesch/micromamba/envs/dinov2_2/lib/python3.10/site-packages/ipykernel/iostream.py", line 268, in schedule


self.curr_in_chans None

      File "/fast/home/j/jluesch/micromamba/envs/dinov2_2/lib/python3.10/site-packages/ipykernel/iostream.py", line 268, in schedule





f()    
Exception ignored in sys.unraisablehooksuper().__init__(io_threads=io_threads, shadow=shadow_address)f()  File "/fast/home/j/jluesch/micromamba/envs/dinov2_2/lib/python3.10/site-packages/ipykernel/iostream.py", line 275, in <lambda>
: 

  File "zmq/backend/cython/context.pyx", line 49, in zmq.backend.cython.context.Context.__init__
<built-in function unraisablehook>  File "/fast/home/j/jluesch/micromamba/envs/dinov2_2/lib/python3.10/site-packages/ipykernel/iostream.py", line 618, in _flush
zmq.error
    self.schedule(lambda: self._really_send(*args, **kwargs)).
Traceback (most recent call last):
ZMQError      File "/fast/home/j/jluesch/micromamba/envs/dinov2_2/lib/python3.10/site-packages/ipykernel/iostream.py", line 291, in _really_send
  File "/fast/home/j/jluesch/micromamba/envs/dinov2_2/lib/python3.10/site-packages/ipykernel/iostream.py", line 660, in write
    

eeeeee

ctx, pipe_out = self._setup_pipe_out()    : self.session.send(self.pub_thread.schedule(self._flush)

 





vectra_colon_0307_p4_ch7




 

  File "/fast/home/j/jluesch/micromamba/envs/dinov2_2/lib/python3.10/site-packages/jupyter_client/session.py", line 863, in send
Too many open files  File "/fast/home/j/jluesch/micromamba/envs/dinov2_2/lib/python3.10/site-packages/ipykernel/iostream.py", line 206, in _setup_pipe_out


/fast/AG_Kainmueller/data/pan_m_lmdb_crops/vectra_colon/0-TRAIN_images

  File "/fast/home/j/jluesch/micromamba/envs/dinov2_2/lib/python3.10/site-packages/ipykernel/iostream.py", line 268, in schedule



 

    stream.send_multipart(to_send, copy=copy)

self.curr_in_chans None

    
      File "/fast/home/j/jluesch/micromamba/envs/dinov2_2/lib/python3.10/site-packages/ipykernel/iostream.py", line 275, in send_multipart
ctx = zmq.Context()f()




    

self.schedule(lambda: self._really_send(*args, **kwargs))  File "/fast/home/j/jluesch/micromamba/envs/dinov2_2/lib/python3.10/site-packages/zmq/sugar/context.py", line 125, in __init__

  File "/fast/home/j/jluesch/micromamba/envs/dinov2_2/lib/python3.10/site-packages/ipykernel/iostream.py", line 618, in _flush
      File "/fast/home/j/jluesch/micromamba/envs/dinov2_2/lib/python3.10/site-packages/ipykernel/iostream.py", line 268, in schedule
    super().__init__(io_threads=io_threads, shadow=shadow_address)    
self.session.send(
  File "zmq/backend/cython/context.pyx", line 49, in zmq.backend.cython.context.Context.__init__
zmq.error  File "/fast/home/j/jluesch/micromamba/envs/dinov2_2/lib/python3.10/site-packages/jupyter_client/session.py", line 863, in send
.    f()ZMQErrorstream.send_multipart(to_send, copy=copy): 

Too many open files  File "/fast/home/j/jluesch/micromamba/envs/dinov2_2/lib/python3.10/site-packages/ipykernel/iostream.py", line 275, in <lambda>
  File "/fast/

eeeeee vectra_pancreas_0009_p3_ch7




 /fast/AG_Kainmueller/data/pan_m_lmdb_crops/vectra_pancreas/0-TRAIN_images

  File "zmq/backend/cython/context.pyx", line 49, in zmq.backend.cython.context.Context.__init__
zmq.error.

 

ZMQError

self.curr_in_chans None


: Too many open files


eeeeee mibi_breast_0594_p7_ch39 /fast/AG_Kainmueller/data/pan_m_lmdb_crops/mibi_breast/0-TRAIN_images eeeeeeself.curr_in_chans None
 mibi_breast_1206_p4_ch39 /fast/AG_Kainmueller/data/pan_m_lmdb_crops/mibi_breast/0-TRAIN_images self.curr_in_chans None
eeeeee mibi_breast_1175_p3_ch39 /fast/AG_Kainmueller/data/pan_m_lmdb_crops/mibi_breast/0-TRAIN_images self.curr_in_chans None
eeeeee mibi_breast_0410_p8_ch39 /fast/AG_Kainmueller/data/pan_m_lmdb_crops/mibi_breast/0-TRAIN_images self.curr_in_chans None
eeeeee mibi_breast_0212_p0_ch39 /fast/AG_Kainmueller/data/pan_m_lmdb_crops/mibi_breast/0-TRAIN_images self.curr_in_chans None
eeeeee mibi_breast_0013_p9_ch39 /fast/AG_Kainmueller/data/pan_m_lmdb_crops/mibi_breast/0-TRAIN_images self.curr_in_chans None
eeeeee mibi_breast_0728_p0_ch39 /fast/AG_Kainmueller/data/pan_m_lmdb_crops/mibi_breast/0-TRAIN_images self.curr_in_chans None
eeeeee vectra_colon_0358_p3_ch7 /fast/AG_Kainmueller/data/pan_m_lmdb_crops/vectra_colon/0-TRAIN_images self.curr_in_ch

Too many open files (src/epoll.cpp:38)


EOFError: 

Traceback (most recent call last):
  File "/fast/home/j/jluesch/micromamba/envs/dinov2_2/lib/python3.10/runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/fast/home/j/jluesch/micromamba/envs/dinov2_2/lib/python3.10/runpy.py", line 86, in _run_code
    exec(code, run_globals)
  File "/fast/home/j/jluesch/micromamba/envs/dinov2_2/lib/python3.10/site-packages/ipykernel_launcher.py", line 17, in <module>
    app.launch_new_instance()
  File "/fast/home/j/jluesch/micromamba/envs/dinov2_2/lib/python3.10/site-packages/traitlets/config/application.py", line 992, in launch_instance
    app.start()
  File "/fast/home/j/jluesch/micromamba/envs/dinov2_2/lib/python3.10/site-packages/ipykernel/kernelapp.py", line 701, in start
    self.io_loop.start()
  File "/fast/home/j/jluesch/micromamba/envs/dinov2_2/lib/python3.10/site-packages/tornado/platform/asyncio.py", line 195, in start
    self.asyncio_loop.run_forever()
  File "/fast/home/j/jluesch/microm

In [31]:
a1={'aa': 00, 'bb': 1}

a2={'cc': 2, 'dd': 3}

print(a1 | a2)

{'aa': 0, 'bb': 1, 'cc': 2, 'dd': 3}
