# Prepare CoNSeP Dataset

In [1]:
# download data from https://warwick.ac.uk/fac/cross_fac/tia/data/

import numpy as np
from PIL import Image
import numpy as np
from pathlib import Path
from scipy.io import loadmat
from matplotlib import pyplot as plt

In [2]:
# set paths to dataset

orig_dataset_path = Path("/workspace/CellViT-plus-plus/dataset/original/CoNSeP")
cellvit_dataset_path = Path("/workspace/CellViT-plus-plus/dataset/transformed/CoNSeP")

## Step 1: Rescale Images

In [4]:
from tqdm import tqdm
# 1. Test images
test_input_path = orig_dataset_path / "Test" / "Images"
test_output_path = cellvit_dataset_path / "Test" / "images"
test_output_path.mkdir(parents=True, exist_ok=True)
test_image_files = list(test_input_path.glob("*.png"))
for img_file in tqdm(test_image_files):
    loaded_image = Image.open(img_file)
    resized = loaded_image.resize((1024, 1024), resample=Image.Resampling.LANCZOS)
    new_img_path = test_output_path / f"{img_file.stem}.png"
    resized.save(new_img_path)
    
# 2. Train images

train_input_path = orig_dataset_path / "Train" / "Images"
train_output_path = cellvit_dataset_path / "Train" / "images"
train_output_path.mkdir(parents=True, exist_ok=True)
train_image_files = tqdm(list(train_input_path.glob("*.png")))
for img_file in train_image_files:
    loaded_image = Image.open(img_file)
    resized = loaded_image.resize((1024, 1024), resample=Image.Resampling.LANCZOS)
    new_img_path = train_output_path / f"{img_file.stem}.png"
    resized.save(new_img_path)

100%|██████████| 14/14 [00:04<00:00,  3.03it/s]
100%|██████████| 27/27 [00:08<00:00,  3.10it/s]


## Step 2: Convert labels to numpy and rescale

In [5]:
# 1. Test images
test_input_path = orig_dataset_path / "Test" / "Labels"
test_mask_files = list(test_input_path.glob("*.mat"))

test_output_path = cellvit_dataset_path / "Test" / "labels-1000-1000"
test_output_path_resized = cellvit_dataset_path / "Test" / "labels"
test_output_path.mkdir(parents=True, exist_ok=True)
test_output_path_resized.mkdir(parents=True, exist_ok=True)

for mask_file in tqdm(test_mask_files):
    loaded_mask= loadmat(mask_file)
    inst_map = loaded_mask["inst_map"]
    inst_map_resized = np.array(Image.fromarray(inst_map).resize(
        (1024, 1024), resample=Image.Resampling.NEAREST
    )).astype(np.float64)
    type_map = loaded_mask["type_map"]
    type_map_resized = np.array(Image.fromarray(type_map).resize(
        (1024, 1024), resample=Image.Resampling.NEAREST
    )).astype(np.float64)
    output_mask = {
        "inst_map": inst_map,
        "type_map": type_map,
    }
    output_mask_resized = {
        "inst_map": inst_map_resized,
        "type_map": type_map_resized,
    }
    new_mask_path = test_output_path / f"{mask_file.stem}.npy"   
    np.save(new_mask_path, output_mask)
    new_mask_path_resized = test_output_path_resized / f"{mask_file.stem}.npy"
    np.save(new_mask_path_resized, output_mask_resized)
    
# 2. Train images
train_input_path = orig_dataset_path / "Train" / "Labels"
train_mask_files = list(train_input_path.glob("*.mat"))

train_output_path = cellvit_dataset_path / "Train" / "labels-1000-1000"
train_output_path_resized = cellvit_dataset_path / "Train" / "labels"
train_output_path.mkdir(parents=True, exist_ok=True)
train_output_path_resized.mkdir(parents=True, exist_ok=True)

for mask_file in tqdm(train_mask_files):
    loaded_mask= loadmat(mask_file)
    inst_map = loaded_mask["inst_map"]
    inst_map_resized = np.array(Image.fromarray(inst_map).resize(
        (1024, 1024), resample=Image.Resampling.NEAREST
    )).astype(np.float64)
    type_map = loaded_mask["type_map"]
    type_map_resized = np.array(Image.fromarray(type_map).resize(
        (1024, 1024), resample=Image.Resampling.NEAREST
    )).astype(np.float64)
    output_mask = {
        "inst_map": inst_map,
        "type_map": type_map,
    }
    output_mask_resized = {
        "inst_map": inst_map_resized,
        "type_map": type_map_resized,
    }
    new_mask_path = train_output_path / f"{mask_file.stem}.npy"   
    np.save(new_mask_path, output_mask)
    new_mask_path_resized = train_output_path_resized / f"{mask_file.stem}.npy"
    np.save(new_mask_path_resized, output_mask_resized)

100%|██████████| 14/14 [00:00<00:00, 22.67it/s]
100%|██████████| 27/27 [00:01<00:00, 22.65it/s]


In [15]:
import numpy as np

# 1. npy 파일 로드
data = np.load('/workspace/CellViT-plus-plus/dataset/transformed/CoNSeP/Train/labels/train_1.npy', allow_pickle=True)

# 2. 내부 딕셔너리로 변환
label_dict = data.item()

# 3. 딕셔너리 내부 키 확인 및 사용
print(label_dict.keys())  # dict_keys(['inst_map', 'type_map'])

# 4. 각 필드 출력
print(label_dict['inst_map'].shape, np.unique(label_dict['inst_map']))
print(label_dict['type_map'].shape, np.unique(label_dict['type_map']))


dict_keys(['inst_map', 'type_map'])
(1024, 1024) [  0.   1.   2.   3.   4.   5.   6.   7.   8.   9.  10.  11.  12.  13.
  14.  15.  16.  17.  18.  19.  20.  21.  22.  23.  24.  25.  26.  27.
  28.  29.  30.  31.  32.  33.  34.  35.  36.  37.  38.  39.  40.  41.
  42.  43.  44.  45.  46.  47.  48.  49.  50.  51.  52.  53.  54.  55.
  56.  57.  58.  59.  60.  61.  62.  63.  64.  65.  66.  67.  68.  69.
  70.  71.  72.  73.  74.  75.  76.  77.  78.  79.  80.  81.  82.  83.
  84.  85.  86.  87.  88.  89.  90.  91.  92.  93.  94.  95.  96.  97.
  98.  99. 100. 101. 102. 103. 104. 105. 106. 107. 108. 109. 110. 111.
 112. 113. 114. 115. 116. 117. 118. 119. 120. 121. 122. 123. 124. 125.
 126. 127. 128. 129. 130. 131. 132. 133. 134. 135. 136. 137. 138. 139.
 140. 141. 142. 143. 144. 145. 146. 147. 148. 149. 150. 151. 152. 153.
 154. 155. 156. 157. 158. 159. 160. 161. 162. 163. 164. 165. 166. 167.
 168. 169. 170. 171. 172. 173. 174. 175. 176. 177. 178. 179. 180. 181.
 182. 183. 184. 185. 186. 18

# Train Classifier

In [None]:
# exemplary configs can be found in the log folder:
# ./logs/Classifiers/CoNSeP

# python3 ./cellvit/train_cell_classifier_head.py --config /workspace/CellViT-plus-plus/config/CoNSeP_config.yaml

# Evaluation

In [None]:
# evaluate with consep-evaluation metrics
# python3 ./cellvit/training/evaluate/inference_cellvit_experiment_consep.py --help



# usage: inference_cellvit_experiment_consep.py [-h] [--logdir LOGDIR] [--dataset_path DATASET_PATH] [--cellvit_path CELLVIT_PATH] [--normalize_stains] [--gpu GPU]

# Perform CellViT-Classifier inference for CoNSeP

# options:
#   -h, --help            show this help message and exit
#   --logdir LOGDIR       Path to the log directory with the trained head. (default: None)
#   --dataset_path DATASET_PATH
#                         Path to the CoNSeP dataset (default: None)
#   --cellvit_path CELLVIT_PATH
#                         Path to the Cellvit model (default: None)
#   --normalize_stains    If stains should be normalized for inference (default: False)
#   --gpu GPU             Number of CUDA GPU to use (default: 0)