# Move label
Usage: Pair the label file and corresponded dicom files from `raw_data`, then copy to `label_data`
## 0. Prepare
### Library and path

In [1]:
import time

import glob

import logging
from datetime import datetime as ddt

import pandas as pd

from tqdm import tqdm

from data_arrangement import move_labeldata, move_labeldata_PC

In [2]:
# basically you don't need to change these
detail_descrip_path_PC = '/home/d/pancreas/raw_data/PC_info.csv'
detail_descrip_path = '/home/d/pancreas/raw_data/PTNP_info.csv'


brief_descrip_normal_path = '/home/d/pancreas/raw_data/brief_normal.xlsx'
brief_descrip_tumor_path = '/home/d/pancreas/raw_data/brief_pancreas.xlsx'

source_label_path = '/home/d/pancreas/raw_data/label/'
source_scan_path = '/home/d/pancreas/raw_data/'

# target path
# target_base_path = '/home/d/pancreas/label_data/'
target_base_path = '/home/d/pancreas/test/'

### Black list
add the id that need to removed

In [3]:
black_list = ['PT3', 'PT5', 'PT43']
# PT3: wrong disease
# PT5: cystic
# PT43: cystic

## Run the specific task
Please run the type you need. <br>
### 1. normal pancreas: NP

In [4]:
st_tol = time.time()

brief_df = pd.read_excel(brief_descrip_normal_path).fillna('')

# Read the description
detail_df = pd.read_csv(detail_descrip_path, converters={'Code': str}).fillna('')

source_scan_path = '/home/d/pancreas/raw_data/normal/'

cnt = 0
for label in tqdm(glob.glob(source_label_path + 'NP*.nrrd')):
    check_copy = move_labeldata(label, brief_df, detail_df, source_scan_path, target_base_path)
    cnt += 1 if check_copy else 0
print('Done cleaning {} data in {} seconds'.format(cnt, time.time()-st_tol))

100%|██████████| 10/10 [00:53<00:00,  5.36s/it]

Done cleaning 10 data in 53.67331838607788 seconds





### 2.  pancreas with tumor: PT

In [6]:
st_tol = time.time()

brief_df = pd.read_excel(brief_descrip_tumor_path).fillna('')

# Read the description
detail_df = pd.read_csv(detail_descrip_path, converters={'Code': str}).fillna('')

source_scan_path = '/home/d/pancreas/raw_data/tumor/'

cnt = 0
for label in tqdm(glob.glob(source_label_path + 'PT*.nrrd')):
    check_copy = move_labeldata(label, brief_df, detail_df, source_scan_path, target_base_path, black_list)
    cnt += 1 if check_copy else 0
print('Done cleaning {} data in {} seconds'.format(cnt, time.time()-st_tol))

  9%|▊         | 4/47 [00:49<08:47, 12.27s/it]

Skip PT43 from black list!


 53%|█████▎    | 25/47 [04:50<04:15, 11.63s/it]

Skip PT3 from black list!


 79%|███████▊  | 37/47 [06:40<01:48, 10.82s/it]

Skip PT5 from black list!


100%|██████████| 47/47 [08:36<00:00, 10.99s/it]

Done cleaning 44 data in 516.7655823230743 seconds





### 3. thick cut pancreas: PC

In [5]:
st_tol = time.time()
cnt = 0

# Read the description
detail_df = pd.read_csv(detail_descrip_path_PC, converters={'Code': str}).fillna('')

source_scan_path = '/home/d/pancreas/raw_data/tumor55/'


for label in tqdm(glob.glob(source_label_path + 'PC*.nrrd')):
    check_copy = move_labeldata_PC(label, detail_df, source_scan_path, target_base_path)
    cnt += 1 if check_copy else 0
print('Done cleaning {} data in {} seconds'.format(cnt, time.time()-st_tol))

  4%|▍         | 2/53 [00:02<00:57,  1.13s/it]

Different phases in /home/d/pancreas/raw_data/tumor55/000264/20160603/4


  6%|▌         | 3/53 [00:03<01:04,  1.29s/it]

Different phases in /home/d/pancreas/raw_data/tumor55/000263/20160411/4


  8%|▊         | 4/53 [00:04<00:56,  1.16s/it]

Different phases in /home/d/pancreas/raw_data/tumor55/000147/20160308/104


  9%|▉         | 5/53 [00:05<00:54,  1.14s/it]

Different phases in /home/d/pancreas/raw_data/tumor55/000027/20170621/4


 11%|█▏        | 6/53 [00:07<00:54,  1.17s/it]

Different phases in /home/d/pancreas/raw_data/tumor55/000144/20170225/4


 15%|█▌        | 8/53 [00:09<00:54,  1.22s/it]

Different phases in /home/d/pancreas/raw_data/tumor55/000146/20160316/4


 28%|██▊       | 15/53 [00:17<00:44,  1.17s/it]

Different phases in /home/d/pancreas/raw_data/tumor55/000134/20160625/4


 36%|███▌      | 19/53 [00:21<00:39,  1.15s/it]

Different phases in /home/d/pancreas/raw_data/tumor55/000283/20160624/4


 38%|███▊      | 20/53 [00:22<00:37,  1.13s/it]

Different phases in /home/d/pancreas/raw_data/tumor55/000134/20160613/4


 42%|████▏     | 22/53 [00:24<00:34,  1.13s/it]

Different phases in /home/d/pancreas/raw_data/tumor55/000148/20160213/4


 45%|████▌     | 24/53 [00:26<00:32,  1.11s/it]

Different phases in /home/d/pancreas/raw_data/tumor55/000277/20160830/4


 47%|████▋     | 25/53 [00:27<00:31,  1.11s/it]

Different phases in /home/d/pancreas/raw_data/tumor55/000273/20160429/4


 51%|█████     | 27/53 [00:30<00:29,  1.12s/it]

Different phases in /home/d/pancreas/raw_data/tumor55/000140/20160112/4


 55%|█████▍    | 29/53 [00:33<00:27,  1.14s/it]

Different phases in /home/d/pancreas/raw_data/tumor55/000281/20170130/4


 66%|██████▌   | 35/53 [00:40<00:20,  1.15s/it]

Different phases in /home/d/pancreas/raw_data/tumor55/000027/20171225/4


 70%|██████▉   | 37/53 [00:43<00:18,  1.18s/it]

Different phases in /home/d/pancreas/raw_data/tumor55/000284/20160503/3


 72%|███████▏  | 38/53 [00:44<00:17,  1.18s/it]

Different phases in /home/d/pancreas/raw_data/tumor55/000281/20170203/4


 75%|███████▌  | 40/53 [00:47<00:15,  1.19s/it]

Different phases in /home/d/pancreas/raw_data/tumor55/000027/20171026/4


 83%|████████▎ | 44/53 [00:51<00:10,  1.16s/it]

Different phases in /home/d/pancreas/raw_data/tumor55/000143/20160613/4


 85%|████████▍ | 45/53 [00:52<00:09,  1.17s/it]

Different phases in /home/d/pancreas/raw_data/tumor55/000149/20160108/4


 96%|█████████▌| 51/53 [00:59<00:02,  1.17s/it]

Different phases in /home/d/pancreas/raw_data/tumor55/000134/20160727/4


100%|██████████| 53/53 [01:02<00:00,  1.18s/it]

Done cleaning 53 data in 62.3938250541687 seconds





In [5]:
test = [2,3,3,2,2,3,3,3]

In [6]:
max(test)

3

In [7]:
test.count(max(test))

5