In [29]:
import shutil
import os
from tqdm import tqdm
import pandas as pd
import numpy as np

In [39]:
train_dir = 'Dataset/images/train_caseonly_grouped'
val_dir = 'Dataset/images/validation_caseonly_grouped'
test_dir = 'Dataset/images/test_caseonly_grouped'

In [40]:
class_names = ['Fluid_overload', 'Infection', 'Mass_Like_Lesions', 'Parenchymal_Disease', 
               'Atelectasis', 'Cardiomegaly', 'Pneumothorax', 'Pleural_Thickening']

In [41]:
# creating path to store images in folders(train/val/test) with subfolders named by class-label
for path in [train_dir, val_dir, test_dir]:
    for label in list(class_names): 
        path_label = path + '/' + label
        if os.path.exists(path_label)== False:
            print('Creating ',path_label)
            os.makedirs(path_label)
        else: print('Existed path', path_label)

Existed path Dataset/images/train_caseonly_grouped/Fluid_overload
Existed path Dataset/images/train_caseonly_grouped/Infection
Existed path Dataset/images/train_caseonly_grouped/Mass_Like_Lesions
Existed path Dataset/images/train_caseonly_grouped/Parenchymal_Disease
Existed path Dataset/images/train_caseonly_grouped/Atelectasis
Existed path Dataset/images/train_caseonly_grouped/Cardiomegaly
Existed path Dataset/images/train_caseonly_grouped/Pneumothorax
Existed path Dataset/images/train_caseonly_grouped/Pleural_Thickening
Creating  Dataset/images/validation_caseonly_grouped/Fluid_overload
Creating  Dataset/images/validation_caseonly_grouped/Infection
Creating  Dataset/images/validation_caseonly_grouped/Mass_Like_Lesions
Creating  Dataset/images/validation_caseonly_grouped/Parenchymal_Disease
Creating  Dataset/images/validation_caseonly_grouped/Atelectasis
Creating  Dataset/images/validation_caseonly_grouped/Cardiomegaly
Creating  Dataset/images/validation_caseonly_grouped/Pneumothorax


In [42]:
class_group_map = {
    'Edema': 'Fluid_overload',
    'Effusion': 'Fluid_overload',
    'Pneumonia': 'Infection',
    'Consolidation': 'Infection',
    'Infiltration': 'Infection',
    'Mass': 'Mass_Like_Lesions',
    'Nodule': 'Mass_Like_Lesions',
    'Fibrosis': 'Parenchymal_Disease',
    'Emphysema': 'Parenchymal_Disease',
    'Atelectasis': 'Atelectasis',
    'Cardiomegaly': 'Cardiomegaly',
    'Pneumothorax': 'Pneumothorax',
    'Pleural_Thickening': 'Pleural_Thickening'
    }

In [43]:
old_folder_list = [#'train_caseonly',
                   'validation_caseonly','test_caseonly']
new_folder_list = [#'train_caseonly_grouped',
                   'validation_caseonly_grouped','test_caseonly_grouped']

In [44]:
for key, item in class_group_map.items():
    print(key,' ---> ', item)

Edema  --->  Fluid_overload
Effusion  --->  Fluid_overload
Pneumonia  --->  Infection
Consolidation  --->  Infection
Infiltration  --->  Infection
Mass  --->  Mass_Like_Lesions
Nodule  --->  Mass_Like_Lesions
Fibrosis  --->  Parenchymal_Disease
Emphysema  --->  Parenchymal_Disease
Atelectasis  --->  Atelectasis
Cardiomegaly  --->  Cardiomegaly
Pneumothorax  --->  Pneumothorax
Pleural_Thickening  --->  Pleural_Thickening


In [45]:
len(class_group_map.keys())

13

In [46]:
folder_list = zip(old_folder_list,new_folder_list)

In [47]:
for old_folder, new_folder in folder_list:
    for old_label, new_label in class_group_map.items():
        old_path = 'Dataset/images/' + old_folder + '/' + old_label
        new_path = 'Dataset/images/' + new_folder + '/' + new_label
        print('Copying ' + old_path + ' to ' + new_path)
        
        files = os.listdir(old_path)
        for file in tqdm(files):
            source_file = os.path.join(old_path, file)
            destination_file = os.path.join(new_path, file)
            shutil.copy(source_file, destination_file)

Copying Dataset/images/validation_caseonly/Edema to Dataset/images/validation_caseonly_grouped/Fluid_overload


100%|██████████| 153/153 [00:04<00:00, 31.83it/s]


Copying Dataset/images/validation_caseonly/Effusion to Dataset/images/validation_caseonly_grouped/Fluid_overload


100%|██████████| 1184/1184 [00:37<00:00, 31.34it/s]


Copying Dataset/images/validation_caseonly/Pneumonia to Dataset/images/validation_caseonly_grouped/Infection


100%|██████████| 115/115 [00:03<00:00, 33.37it/s]


Copying Dataset/images/validation_caseonly/Consolidation to Dataset/images/validation_caseonly_grouped/Infection


100%|██████████| 367/367 [00:10<00:00, 35.06it/s]


Copying Dataset/images/validation_caseonly/Infiltration to Dataset/images/validation_caseonly_grouped/Infection


100%|██████████| 1824/1824 [00:54<00:00, 33.48it/s]


Copying Dataset/images/validation_caseonly/Mass to Dataset/images/validation_caseonly_grouped/Mass_Like_Lesions


100%|██████████| 563/563 [00:13<00:00, 40.32it/s]


Copying Dataset/images/validation_caseonly/Nodule to Dataset/images/validation_caseonly_grouped/Mass_Like_Lesions


100%|██████████| 641/641 [00:16<00:00, 37.84it/s]


Copying Dataset/images/validation_caseonly/Fibrosis to Dataset/images/validation_caseonly_grouped/Parenchymal_Disease


100%|██████████| 173/173 [00:05<00:00, 31.93it/s]


Copying Dataset/images/validation_caseonly/Emphysema to Dataset/images/validation_caseonly_grouped/Parenchymal_Disease


100%|██████████| 187/187 [00:07<00:00, 24.72it/s]


Copying Dataset/images/validation_caseonly/Atelectasis to Dataset/images/validation_caseonly_grouped/Atelectasis


100%|██████████| 1030/1030 [00:30<00:00, 34.03it/s]


Copying Dataset/images/validation_caseonly/Cardiomegaly to Dataset/images/validation_caseonly_grouped/Cardiomegaly


100%|██████████| 202/202 [00:05<00:00, 39.58it/s]


Copying Dataset/images/validation_caseonly/Pneumothorax to Dataset/images/validation_caseonly_grouped/Pneumothorax


100%|██████████| 317/317 [00:08<00:00, 35.92it/s]


Copying Dataset/images/validation_caseonly/Pleural_Thickening to Dataset/images/validation_caseonly_grouped/Pleural_Thickening


100%|██████████| 288/288 [00:08<00:00, 32.30it/s]


Copying Dataset/images/test_caseonly/Edema to Dataset/images/test_caseonly_grouped/Fluid_overload


100%|██████████| 925/925 [00:25<00:00, 36.56it/s]


Copying Dataset/images/test_caseonly/Effusion to Dataset/images/test_caseonly_grouped/Fluid_overload


100%|██████████| 4658/4658 [02:07<00:00, 36.45it/s]


Copying Dataset/images/test_caseonly/Pneumonia to Dataset/images/test_caseonly_grouped/Infection


100%|██████████| 555/555 [00:15<00:00, 36.45it/s]


Copying Dataset/images/test_caseonly/Consolidation to Dataset/images/test_caseonly_grouped/Infection


100%|██████████| 1815/1815 [00:47<00:00, 38.02it/s]


Copying Dataset/images/test_caseonly/Infiltration to Dataset/images/test_caseonly_grouped/Infection


100%|██████████| 6112/6112 [02:46<00:00, 36.66it/s]


Copying Dataset/images/test_caseonly/Mass to Dataset/images/test_caseonly_grouped/Mass_Like_Lesions


100%|██████████| 1748/1748 [00:48<00:00, 36.12it/s]


Copying Dataset/images/test_caseonly/Nodule to Dataset/images/test_caseonly_grouped/Mass_Like_Lesions


100%|██████████| 1623/1623 [00:49<00:00, 32.88it/s]


Copying Dataset/images/test_caseonly/Fibrosis to Dataset/images/test_caseonly_grouped/Parenchymal_Disease


100%|██████████| 435/435 [00:13<00:00, 32.30it/s]


Copying Dataset/images/test_caseonly/Emphysema to Dataset/images/test_caseonly_grouped/Parenchymal_Disease


100%|██████████| 1093/1093 [00:30<00:00, 36.25it/s]


Copying Dataset/images/test_caseonly/Atelectasis to Dataset/images/test_caseonly_grouped/Atelectasis


100%|██████████| 3279/3279 [01:35<00:00, 34.38it/s]


Copying Dataset/images/test_caseonly/Cardiomegaly to Dataset/images/test_caseonly_grouped/Cardiomegaly


100%|██████████| 1069/1069 [00:32<00:00, 32.97it/s]


Copying Dataset/images/test_caseonly/Pneumothorax to Dataset/images/test_caseonly_grouped/Pneumothorax


100%|██████████| 2665/2665 [01:12<00:00, 36.94it/s]


Copying Dataset/images/test_caseonly/Pleural_Thickening to Dataset/images/test_caseonly_grouped/Pleural_Thickening


100%|██████████| 1143/1143 [00:32<00:00, 34.65it/s]
