![DME Layers](https://opg.optica.org/getImage.cfm?img=M3cuZnVsbCxib2UtNi00LTExNzItZzAwMw&article=boe-6-4-1172-g003)

-----

![RTA Layers Image](https://journals.plos.org/plosone/article/figure/image?size=large&id=10.1371/journal.pone.0133908.g001) | ![RTA Layers Explanation](https://journals.plos.org/plosone/article/figure/image?size=large&id=10.1371/journal.pone.0133908.t001)
-- | --

![AMD Layers](https://www.ncbi.nlm.nih.gov/pmc/articles/instance/3901571/bin/nihms-508161-f0001.jpg)

----

![AROI Layers](https://ipg.fer.hr/images/50037599/nasa%20baza.png)

In [2]:
%matplotlib inline 

import os
import re
import copy
from glob import glob
import pathlib
from pathlib import Path
import random
import shutil
import numpy as np
from tqdm import tqdm
from matplotlib import pyplot as plt
import scipy.io
import imageio
import cv2
from PIL import Image
import idp_utils.data_handling.constants as C
from idp_utils.data_handling.common import (extract_data,
                                            extract_data_aroi,
                                            extract_data_op,
                                            get_dme_valid_idx,
                                            get_amd_valid_idx,
                                            prepare_files)


%cd $C.ROOT_PATH

seed = 6
random.seed(seed)

/home/extra/micheal/IDP


# 1 Extract Labeled Layeres & Bscans from Data

## 1.1 Original Data

### 1.1.1 RTA

In [2]:
data, dtype = "RTA", "original"
extract_data(file_pattern=os.path.join(C.RAW_DATA_PATTERN.format(data=data), "*.mat"), 
                   bscan_key='volumedata', 
                   layermap_key='Observer2',
                   bscan_format='hws',
                   layermap_format='wsl',
                   layer_labels=C.RTA_LABELS,
                   bscan_folder=C.BSCAN_PATTERN.format(data=data),
                   layer_folder=C.LAYER_PATTERN.format(data=data, dtype=dtype),
                   overwrite=True)

10 files matches pattern: data/raw/RTA/*.mat


100%|██████████| 10/10 [00:01<00:00,  9.43it/s]


### 1.1.2 DME

In [3]:
data, dtype = "DME", "original"
extract_data(file_pattern=os.path.join(C.RAW_DATA_PATTERN.format(data=data), "*.mat"),
                   bscan_key='images',
                   layermap_key='manualLayers1',
                   fluid_key='manualFluid1',
                   bscan_format='hws',
                   layermap_format='lws',
                   layer_labels=C.DME_LABELS,
                   bscan_folder=C.BSCAN_PATTERN.format(data=data),
                   layer_folder=C.LAYER_PATTERN.format(data=data, dtype=dtype),
                   fluid_folder=C.FLUID_PATTERN.format(data=data),
                   valid_slice_indices_fn=get_dme_valid_idx,
                   overwrite=True)

10 files matches pattern: data/raw/DME/*.mat


100%|██████████| 10/10 [00:10<00:00,  1.06s/it]


### 1.1.3 AMD

In [4]:
# Control data
data, dtype = "AMD", "original"
extract_data(file_pattern=os.path.join(C.RAW_DATA_PATTERN.format(data=data), 'Control 2/Control 2/*.mat'), 
                   bscan_key='images',
                   layermap_key='layerMaps',
                   bscan_format='hws',
                   layermap_format='swl',
                   layer_labels=C.AMD_LABELS,
                   bscan_folder=C.BSCAN_PATTERN.format(data=data),
                   layer_folder=C.LAYER_PATTERN.format(data=data, dtype=dtype),
                   valid_slice_indices_fn=get_amd_valid_idx,
                   overwrite=False)

115 files matches pattern: data/raw/AMD/Control 2/Control 2/*.mat


100%|██████████| 115/115 [00:29<00:00,  3.89it/s]


In [5]:
# AMD data
data, dtype = "AMD", "original"
extract_data(file_pattern=os.path.join(C.RAW_DATA_PATTERN.format(data=data),'AMD 2/AMD 2/*.mat'), 
                   bscan_key='images',
                   layermap_key='layerMaps',
                   bscan_format='hws',
                   layermap_format='swl',
                   layer_labels=C.AMD_LABELS,
                   bscan_folder=C.BSCAN_PATTERN.format(data=data),
                   layer_folder=C.LAYER_PATTERN.format(data=data, dtype=dtype),
                   valid_slice_indices_fn=get_amd_valid_idx,
                   overwrite=False)

269 files matches pattern: data/raw/AMD/AMD 2/AMD 2/*.mat


100%|██████████| 269/269 [01:08<00:00,  3.92it/s]


### 1.1.4 AROI

In [6]:
data, dtype = "AROI", "original"
extract_data_aroi(raw_data_folder=C.RAW_DATA_PATTERN.format(data=data),
                        bscan_folder=C.BSCAN_PATTERN.format(data=data),
                        layer_folder=C.LAYER_PATTERN.format(data=data, dtype=dtype),
                        fluid_folder=C.FLUID_PATTERN.format(data=data),
                        dtype=dtype,
                        fluid_labels=C.FLUID_LABELS,
                        layer_labels=C.AROI_LABELS)

  3%|▎         | 29/1105 [00:02<01:30, 11.95it/s]

Error occurred when detecting edges data/raw/AROI/24 patient/patient14/mask/number/patient14_raw0075.png


 35%|███▍      | 386/1105 [00:26<00:37, 19.16it/s]

Error occurred when detecting edges data/raw/AROI/24 patient/patient5/mask/number/patient5_raw0067.png
Error occurred when detecting edges data/raw/AROI/24 patient/patient5/mask/number/patient5_raw0064.png
Error occurred when detecting edges data/raw/AROI/24 patient/patient5/mask/number/patient5_raw0065.png


 36%|███▌      | 395/1105 [00:27<00:35, 19.84it/s]

Error occurred when detecting edges data/raw/AROI/24 patient/patient5/mask/number/patient5_raw0066.png


 83%|████████▎ | 916/1105 [01:03<00:09, 19.46it/s]

Error occurred when detecting edges data/raw/AROI/24 patient/patient10/mask/number/patient10_raw0070.png


 89%|████████▊ | 978/1105 [01:07<00:08, 15.04it/s]

Error occurred when detecting edges data/raw/AROI/24 patient/patient10/mask/number/patient10_raw0068.png


 89%|████████▉ | 985/1105 [01:07<00:06, 17.29it/s]

Error occurred when detecting edges data/raw/AROI/24 patient/patient10/mask/number/patient10_raw0069.png


 96%|█████████▌| 1058/1105 [01:12<00:02, 17.57it/s]

Error occurred when detecting edges data/raw/AROI/24 patient/patient11/mask/number/patient11_raw0069.png


100%|██████████| 1105/1105 [01:15<00:00, 14.65it/s]

Sum of skipped files:  9





#### 1.1.4.1 AROI Hetero

We create another AROI dataset that use a totally different set of notations than OP. This is for a later experiment.

In [8]:
data, dtype = "AROI", "hetero"

hetero_aroi_layer_labels = [22, 57, 174, 190] # was [19, 57, 171, 190]

extract_data_aroi(raw_data_folder=C.RAW_DATA_PATTERN.format(data=data),
                        bscan_folder=C.BSCAN_PATTERN.format(data=data),
                        layer_folder=C.LAYER_PATTERN.format(data=data, dtype=dtype),
                        fluid_folder=C.FLUID_PATTERN.format(data=data),
                        dtype=dtype,
                        fluid_labels=C.FLUID_LABELS,
                        layer_labels=hetero_aroi_layer_labels)

Created folder data/extract/layers/AROI/hetero


  3%|▎         | 29/1105 [00:03<01:32, 11.64it/s]

Error occurred when detecting edges data/raw/AROI/24 patient/patient14/mask/number/patient14_raw0075.png


 35%|███▌      | 387/1105 [00:29<00:42, 17.01it/s]

Error occurred when detecting edges data/raw/AROI/24 patient/patient5/mask/number/patient5_raw0067.png
Error occurred when detecting edges data/raw/AROI/24 patient/patient5/mask/number/patient5_raw0064.png
Error occurred when detecting edges data/raw/AROI/24 patient/patient5/mask/number/patient5_raw0065.png


 36%|███▌      | 397/1105 [00:29<00:39, 17.95it/s]

Error occurred when detecting edges data/raw/AROI/24 patient/patient5/mask/number/patient5_raw0066.png


 83%|████████▎ | 918/1105 [01:07<00:10, 17.27it/s]

Error occurred when detecting edges data/raw/AROI/24 patient/patient10/mask/number/patient10_raw0070.png


 89%|████████▊ | 979/1105 [01:11<00:07, 17.06it/s]

Error occurred when detecting edges data/raw/AROI/24 patient/patient10/mask/number/patient10_raw0068.png


 89%|████████▉ | 983/1105 [01:12<00:07, 16.54it/s]

Error occurred when detecting edges data/raw/AROI/24 patient/patient10/mask/number/patient10_raw0069.png


 96%|█████████▌| 1058/1105 [01:17<00:03, 15.57it/s]

Error occurred when detecting edges data/raw/AROI/24 patient/patient11/mask/number/patient11_raw0069.png


100%|██████████| 1105/1105 [01:20<00:00, 13.78it/s]

Sum of skipped files:  9





### 1.1.5 OP

In [7]:
data, dtype = "OP", "original"
name = "original"

extract_data_op(raw_data_folder=C.RAW_DATA_PATTERN.format(data=data),
                bscan_folder=C.BSCAN_PATTERN.format(data=data),
                layer_folder=C.LAYER_PATTERN.format(data=data, dtype=dtype),
                layer_labels=C.OP_LABELS,
                instrument_labels=C.INSTRUMENT_LABELS,
                save_extension='png')

Part1 Started


100%|██████████| 47/47 [03:45<00:00,  4.80s/it]


Part2 Started


100%|██████████| 47/47 [03:51<00:00,  4.92s/it]


## 1.2 Reduced-Layer Data

### 1.2.1 RTA

In [8]:
# data = "RTA"
# extract_data(file_pattern=os.path.join(C.RAW_DATA_PATTERN.format(data=data),'Subject[78]*.mat'), 
#                    bscan_key='volumedata', 
#                    layermap_key='Observer2',
#                    bscan_format='hws',
#                    layermap_format='wsl',
#                    layer_labels=C.RTA_LABELS,
#                    bscan_folder=C.BSCAN_PATTERN.format(data=data),
#                    layer_folder=C.LAYER_PATTERN.format(data=data, dtype='reduce1'),
#                    remove_from=range(2,8), # 6 out of 8
#                    n_remove=1)
# extract_data(file_pattern=os.path.join(C.RAW_DATA_PATTERN.format(data=data),'Subject[56]*.mat'), 
#                    bscan_key='volumedata', 
#                    layermap_key='Observer2',
#                    bscan_format='hws',
#                    layermap_format='wsl',
#                    layer_labels=C.RTA_LABELS,
#                    bscan_folder=C.BSCAN_PATTERN.format(data=data),
#                    layer_folder=C.LAYER_PATTERN.format(data=data, dtype='reduce2'),
#                    remove_from=range(2,8), # 6 out of 8
#                    n_remove=2)
# extract_data(file_pattern=os.path.join(C.RAW_DATA_PATTERN.format(data=data),'Subject[34]*.mat'), 
#                    bscan_key='volumedata', 
#                    layermap_key='Observer2',
#                    bscan_format='hws',
#                    layermap_format='wsl',
#                    layer_labels=C.RTA_LABELS,
#                    bscan_folder=C.BSCAN_PATTERN.format(data=data),
#                    layer_folder=C.LAYER_PATTERN.format(data=data, dtype='reduce3'),
#                    remove_from=range(2,8), # 6 out of 8
#                    n_remove=3)
# extract_data(file_pattern=os.path.join(C.RAW_DATA_PATTERN.format(data=data),'Subject[12]*.mat'), 
#                    bscan_key='volumedata', 
#                    layermap_key='Observer2',
#                    bscan_format='hws',
#                    layermap_format='wsl',
#                    layer_labels=C.RTA_LABELS,
#                    bscan_folder=C.BSCAN_PATTERN.format(data=data),
#                    layer_folder=C.LAYER_PATTERN.format(data=data, dtype='reduce4'),
#                    remove_from=range(2,8), # 6 out of 8
#                    n_remove=4)
# extract_data(file_pattern=os.path.join(C.RAW_DATA_PATTERN.format(data=data),'Subject[12]*.mat'), 
#                    bscan_key='volumedata', 
#                    layermap_key='Observer2',
#                    bscan_format='hws',
#                    layermap_format='wsl',
#                    layer_labels=C.RTA_LABELS,
#                    bscan_folder=C.BSCAN_PATTERN.format(data=data),
#                    layer_folder=C.LAYER_PATTERN.format(data=data, dtype='reduce5'),
#                    remove_from=range(2,8), # 6 out of 8
#                    n_remove=5)

### 1.2.2 DME

In [9]:
# data = "DME"
# extract_data(file_pattern=os.path.join(C.RAW_DATA_PATTERN.format(data=data),'Subject_0[89].mat'),
#                    bscan_key='images',
#                    layermap_key='manualLayers1',
#                    fluid_key='manualFluid1',
#                    bscan_format='hws',
#                    layermap_format='lws',
#                    layer_labels=C.DME_LABELS,
#                    bscan_folder=C.BSCAN_PATTERN.format(data=data),
#                    layer_folder=C.LAYER_PATTERN.format(data=data, dtype='reduce1'),
#                    fluid_folder=C.FLUID_PATTERN.format(data=data),
#                    valid_slice_indices_fn=get_dme_valid_idx,
#                    remove_from=range(2,8), # DME also has 8 layers, we choose 6 out of 8
#                    n_remove=1)
# extract_data(file_pattern=os.path.join(C.RAW_DATA_PATTERN.format(data=data),'Subject_0[67].mat'),
#                    bscan_key='images',
#                    layermap_key='manualLayers1',
#                    fluid_key='manualFluid1',
#                    bscan_format='hws',
#                    layermap_format='lws',
#                    layer_labels=C.DME_LABELS,
#                    bscan_folder=C.BSCAN_PATTERN.format(data=data),
#                    layer_folder=C.LAYER_PATTERN.format(data=data, dtype='reduce2'),
#                    fluid_folder=C.FLUID_PATTERN.format(data=data),
#                    valid_slice_indices_fn=get_dme_valid_idx,
#                    remove_from=range(2,8), # DME also has 8 layers, we choose 6 out of 8
#                    n_remove=2)
# extract_data(file_pattern=os.path.join(C.RAW_DATA_PATTERN.format(data=data),'Subject_0[45].mat'),
#                    bscan_key='images',
#                    layermap_key='manualLayers1',
#                    fluid_key='manualFluid1',
#                    bscan_format='hws',
#                    layermap_format='lws',
#                    layer_labels=C.DME_LABELS,
#                    bscan_folder=C.BSCAN_PATTERN.format(data=data),
#                    layer_folder=C.LAYER_PATTERN.format(data=data, dtype='reduce3'),
#                    fluid_folder=C.FLUID_PATTERN.format(data=data),
#                    valid_slice_indices_fn=get_dme_valid_idx,
#                    remove_from=range(2,8), # DME also has 8 layers, we choose 6 out of 8
#                    n_remove=3)
# extract_data(file_pattern=os.path.join(C.RAW_DATA_PATTERN.format(data=data),'Subject_0[23].mat'),
#                    bscan_key='images',
#                    layermap_key='manualLayers1',
#                    fluid_key='manualFluid1',
#                    bscan_format='hws',
#                    layermap_format='lws',
#                    layer_labels=C.DME_LABELS,
#                    bscan_folder=C.BSCAN_PATTERN.format(data=data),
#                    layer_folder=C.LAYER_PATTERN.format(data=data, dtype='reduce4'),
#                    fluid_folder=C.FLUID_PATTERN.format(data=data),
#                    valid_slice_indices_fn=get_dme_valid_idx,
#                    remove_from=range(2,8), # DME also has 8 layers, we choose 6 out of 8
#                    n_remove=4)

### 1.2.3 AMD

There are only 3 layers within it. Let it go.

### 1.2.4 AROI

In [10]:
data = 'AROI'
dtype = 'reduce1'
extract_data_aroi(raw_data_folder=C.RAW_DATA_PATTERN.format(data=data),
                 bscan_folder=C.BSCAN_PATTERN.format(data=data),
                 layer_folder=C.LAYER_PATTERN.format(data=data, dtype=dtype),
                 fluid_folder=C.FLUID_PATTERN.format(data=data),
                 dtype=dtype,
                 fluid_labels=C.FLUID_LABELS,
                 layer_labels=C.AROI_LABELS,
                 remove_from=range(1,3),
                 n_remove=1)

  3%|▎         | 29/1105 [00:03<01:34, 11.44it/s]

Error occurred when detecting edges data/raw/AROI/24 patient/patient14/mask/number/patient14_raw0075.png


 35%|███▍      | 386/1105 [00:27<00:38, 18.65it/s]

Error occurred when detecting edges data/raw/AROI/24 patient/patient5/mask/number/patient5_raw0067.png
Error occurred when detecting edges data/raw/AROI/24 patient/patient5/mask/number/patient5_raw0064.png
Error occurred when detecting edges data/raw/AROI/24 patient/patient5/mask/number/patient5_raw0065.png


 36%|███▌      | 396/1105 [00:27<00:35, 19.85it/s]

Error occurred when detecting edges data/raw/AROI/24 patient/patient5/mask/number/patient5_raw0066.png


 83%|████████▎ | 918/1105 [01:03<00:10, 18.14it/s]

Error occurred when detecting edges data/raw/AROI/24 patient/patient10/mask/number/patient10_raw0070.png


 89%|████████▊ | 978/1105 [01:07<00:07, 15.98it/s]

Error occurred when detecting edges data/raw/AROI/24 patient/patient10/mask/number/patient10_raw0068.png


 89%|████████▉ | 985/1105 [01:07<00:07, 16.86it/s]

Error occurred when detecting edges data/raw/AROI/24 patient/patient10/mask/number/patient10_raw0069.png


 96%|█████████▌| 1058/1105 [01:12<00:02, 18.00it/s]

Error occurred when detecting edges data/raw/AROI/24 patient/patient11/mask/number/patient11_raw0069.png


100%|██████████| 1105/1105 [01:15<00:00, 14.71it/s]

Sum of skipped files:  9





# 2 Split Data into train, val and test

## 2.1 Original Data

### 2.1.1 RTA

In [11]:
data, dtype = "RTA", "original"
name = "original"

prepare_files(data=data, 
              dst_folder=C.SPLIT_PATTERN.format(data=data, name=name), 
              train_ratio=0.8, 
              test_ratio=0.1,
              with_fluids=False,
              dtype=dtype  )

[INFO] 92 files matches pattern data/extract/bscans/RTA/*.jpg
Preparing train files:
created dst label folder data/splits/RTA/original/labels/train
created dst bscan folder data/splits/RTA/original/bscans/train


100%|██████████| 73/73 [00:00<00:00, 2021.46it/s]


Preparing test files:
created dst label folder data/splits/RTA/original/labels/test
created dst bscan folder data/splits/RTA/original/bscans/test


100%|██████████| 9/9 [00:00<00:00, 1570.77it/s]


Preparing val files:
created dst label folder data/splits/RTA/original/labels/val
created dst bscan folder data/splits/RTA/original/bscans/val


100%|██████████| 10/10 [00:00<00:00, 1461.58it/s]


### 2.1.2 DME

Just a backup for original code
```python
bscan_files = glob(C.DME_BSCAN_ORIGINAL_FOLDER + '*.jpg')
assert len(bscan_files) != 0

train_files, test_files, val_files = split_files(bscan_files, 0.8, 0.1)
assert len(train_files) != 0, f"train_files is empty, data may have already been moved"

splited_files = {
    'train': train_files,
    'test': test_files,
    'val': val_files
}
move_files(splited_files, C.DME_LAYER_BSCAN_ORIGINAL_ROOT, C.DME_SPLIT_ORIGINAL_FOLDER)
```

In [12]:
data, dtype = "DME", "original"
name = "original"

prepare_files(data=data, 
              dst_folder=C.SPLIT_PATTERN.format(data=data, name=name), 
              train_ratio=0.8, 
              test_ratio=0.1,
              with_fluids=True,
              dtype=dtype)

[INFO] 110 files matches pattern data/extract/bscans/DME/*.jpg
Preparing train files:
created dst label folder data/splits/DME/original/labels/train
created dst bscan folder data/splits/DME/original/bscans/train


100%|██████████| 88/88 [00:00<00:00, 184.62it/s]


Preparing test files:
created dst label folder data/splits/DME/original/labels/test
created dst bscan folder data/splits/DME/original/bscans/test


100%|██████████| 11/11 [00:00<00:00, 151.58it/s]


Preparing val files:
created dst label folder data/splits/DME/original/labels/val
created dst bscan folder data/splits/DME/original/bscans/val


100%|██████████| 11/11 [00:00<00:00, 134.92it/s]


### 2.1.3 AMD

In [13]:
data, dtype = "AMD", "original"
name = "original"

prepare_files(data=data, 
              dst_folder=C.SPLIT_PATTERN.format(data=data, name=name), 
              train_ratio=0.8, 
              test_ratio=0.1,
              with_fluids=False,
              dtype=dtype)

[INFO] 29354 files matches pattern data/extract/bscans/AMD/*.jpg
Preparing train files:
created dst label folder data/splits/AMD/original/labels/train
created dst bscan folder data/splits/AMD/original/bscans/train


100%|██████████| 23483/23483 [01:35<00:00, 244.91it/s]


Preparing test files:
created dst label folder data/splits/AMD/original/labels/test
created dst bscan folder data/splits/AMD/original/bscans/test


100%|██████████| 2935/2935 [00:11<00:00, 266.51it/s]


Preparing val files:
created dst label folder data/splits/AMD/original/labels/val
created dst bscan folder data/splits/AMD/original/bscans/val


100%|██████████| 2936/2936 [00:11<00:00, 251.53it/s]


### 2.1.4 AROI

In [14]:
data, dtype = "AROI", "original"
name = "original"

prepare_files(data=data, 
              dst_folder=C.SPLIT_PATTERN.format(data=data, name=name), 
              train_ratio=0.8, 
              test_ratio=0.1,
              with_fluids=True,
              dtype=dtype,
              extension='png')

[INFO] 1096 files matches pattern data/extract/bscans/AROI/*.png
Preparing train files:
created dst label folder data/splits/AROI/original/labels/train
created dst bscan folder data/splits/AROI/original/bscans/train


100%|██████████| 876/876 [00:06<00:00, 141.16it/s]


Preparing test files:
created dst label folder data/splits/AROI/original/labels/test
created dst bscan folder data/splits/AROI/original/bscans/test


100%|██████████| 109/109 [00:00<00:00, 141.95it/s]


Preparing val files:
created dst label folder data/splits/AROI/original/labels/val
created dst bscan folder data/splits/AROI/original/bscans/val


100%|██████████| 111/111 [00:00<00:00, 136.23it/s]


#### 2.1.4.1 AROI Hetero

In [2]:
data, dtype = "AROI", "hetero"
name = "hetero"

prepare_files(data=data, 
              dst_folder=C.SPLIT_PATTERN.format(data=data, name=name), 
              train_ratio=0.8, 
              test_ratio=0.1,
              with_fluids=True,
              dtype=dtype,
              extension='png')

[INFO] 1096 files matches pattern data/extract/layers/AROI/hetero/*.png
Preparing train files:
created dst label folder data/splits/AROI/hetero/labels/train
created dst bscan folder data/splits/AROI/hetero/bscans/train


100%|██████████| 876/876 [00:06<00:00, 138.00it/s]


Preparing test files:
created dst label folder data/splits/AROI/hetero/labels/test
created dst bscan folder data/splits/AROI/hetero/bscans/test


100%|██████████| 109/109 [00:00<00:00, 137.83it/s]


Preparing val files:
created dst label folder data/splits/AROI/hetero/labels/val
created dst bscan folder data/splits/AROI/hetero/bscans/val


100%|██████████| 111/111 [00:00<00:00, 144.80it/s]


### 2.1.5 OP

In [15]:
data, dtype = "OP", "original"
name = "original"

prepare_files(data=data, 
              dst_folder=C.SPLIT_PATTERN.format(data=data, name=name), 
              train_ratio=0.8, 
              test_ratio=0.1,
              with_fluids=False,
              dtype=dtype,
              extension='png')

[INFO] 12025 files matches pattern data/extract/bscans/OP/*.png
Preparing train files:
created dst label folder data/splits/OP/original/labels/train
created dst bscan folder data/splits/OP/original/bscans/train


100%|██████████| 9620/9620 [00:22<00:00, 424.72it/s]


Preparing test files:
created dst label folder data/splits/OP/original/labels/test
created dst bscan folder data/splits/OP/original/bscans/test


100%|██████████| 1202/1202 [00:00<00:00, 1327.87it/s]


Preparing val files:
created dst label folder data/splits/OP/original/labels/val
created dst bscan folder data/splits/OP/original/bscans/val


100%|██████████| 1203/1203 [00:00<00:00, 2500.10it/s]


## 2.2 Reduced-Layer Data

### 2.2.1 RTA

just another backup:
```python
bscan_files = glob(C.RTA_BSCAN_REDUCED_FOLDER+'*.jpg')
print(len(bscan_files))

train_files, test_files, val_files = split_files(bscan_files, 0.8, 0.1)
assert len(train_files) != 0, f"train_files is empty, data may have already been moved"

splited_files = {
    'train': train_files,
    'test': test_files,
    'val': val_files
}
move_files(splited_files, C.RTA_LAYER_BSCAN_REDUCED_ROOT, C.RTA_SPLIT_REDUCED_FOLDER)
```

In [16]:
# data = "RTA"
# name = "reduce_merge"

# prepare_files(data=data, 
#               dst_folder=C.SPLIT_PATTERN.format(data=data, name=name), 
#               train_ratio=0.8, 
#               test_ratio=0.1,
#               with_fluids=False,
#               dtype="reduce1", 
#               merge_original=True)
# prepare_files(data=data, 
#               dst_folder=C.SPLIT_PATTERN.format(data=data, name=name), 
#               train_ratio=0.8, 
#               test_ratio=0.1,
#               with_fluids=False,
#               dtype="reduce2", 
#               merge_original=True)
# prepare_files(data=data, 
#               dst_folder=C.SPLIT_PATTERN.format(data=data, name=name), 
#               train_ratio=0.8, 
#               test_ratio=0.1,
#               with_fluids=False,
#               dtype="reduce3", 
#               merge_original=True)
# prepare_files(data=data, 
#               dst_folder=C.SPLIT_PATTERN.format(data=data, name=name), 
#               train_ratio=0.8, 
#               test_ratio=0.1,
#               with_fluids=False,
#               dtype="reduce4", 
#               merge_original=True)
# prepare_files(data=data, 
#               dst_folder=C.SPLIT_PATTERN.format(data=data, name=name), 
#               train_ratio=0.8, 
#               test_ratio=0.1,
#               with_fluids=False,
#               dtype="reduce5", 
#               merge_original=True)

### 2.2.2 DME

In [17]:
# data = "DME"
# name = "reduce_merge"

# prepare_files(data=data, 
#               dst_folder=C.SPLIT_PATTERN.format(data=data, name=name), 
#               train_ratio=0.8, 
#               test_ratio=0.1,
#               with_fluids=True,
#               dtype="reduce1", 
#               merge_original=True)
# prepare_files(data=data, 
#               dst_folder=C.SPLIT_PATTERN.format(data=data, name=name), 
#               train_ratio=0.8, 
#               test_ratio=0.1,
#               with_fluids=True,
#               dtype="reduce2", 
#               merge_original=True)
# prepare_files(data=data, 
#               dst_folder=C.SPLIT_PATTERN.format(data=data, name=name), 
#               train_ratio=0.8, 
#               test_ratio=0.1,
#               with_fluids=True,
#               dtype="reduce3", 
#               merge_original=True)
# prepare_files(data=data, 
#               dst_folder=C.SPLIT_PATTERN.format(data=data, name=name), 
#               train_ratio=0.8, 
#               test_ratio=0.1,
#               with_fluids=True,
#               dtype="reduce4", 
#               merge_original=True)

### 2.2.3 AROI

In [18]:
data, dtype = "AROI", "reduce1"
name = "reduce1_merge"
merge_original = True

prepare_files(data=data, 
              dst_folder=C.SPLIT_PATTERN.format(data=data, name=name), 
              train_ratio=0.8, 
              test_ratio=0.1,
              with_fluids=True,
              dtype=dtype, 
              merge_original=merge_original,
              extension='png')

[INFO] 0 files matches pattern data/extract/layers/AROI/reduce1/*.png
Preparing train files:
created dst label folder data/splits/AROI/reduce1_merge/labels/train
created dst bscan folder data/splits/AROI/reduce1_merge/bscans/train


0it [00:00, ?it/s]


Preparing test files:
created dst label folder data/splits/AROI/reduce1_merge/labels/test
created dst bscan folder data/splits/AROI/reduce1_merge/bscans/test


0it [00:00, ?it/s]


Preparing val files:
created dst label folder data/splits/AROI/reduce1_merge/labels/val
created dst bscan folder data/splits/AROI/reduce1_merge/bscans/val


0it [00:00, ?it/s]


# 3 Create pix2pix Compatible Datasets

## 3.1 Original Data

### 3.1.1 RTA

In [19]:
label_folder = C.SPLIT_PATTERN.format(data='RTA', name='original') + '/labels'
bscan_folder = C.SPLIT_PATTERN.format(data='RTA', name='original') + '/bscans'
dataset_folder = C.DATASET_PATTERN.format(data='RTA', name='original')
!python pytorch-CycleGAN-and-pix2pix/datasets/combine_A_and_B.py --fold_A $label_folder --fold_B $bscan_folder --fold_AB $dataset_folder

[fold_A] =  data/splits/RTA/original/labels
[fold_B] =  data/splits/RTA/original/bscans
[fold_AB] =  data/datasets/RTA/original
[num_imgs] =  1000000
[use_AB] =  False
[no_multiprocessing] =  False
split = train, use 100/100 images
split = train, number of images = 100
split = test, use 72/72 images
split = test, number of images = 72
split = val, use 79/79 images
split = val, number of images = 79


### 3.1.2 DME

In [20]:
label_folder = C.SPLIT_PATTERN.format(data='DME', name='original') + '/labels'
bscan_folder = C.SPLIT_PATTERN.format(data='DME', name='original') + '/bscans'
dataset_folder = C.DATASET_PATTERN.format(data='DME', name='original')
!python pytorch-CycleGAN-and-pix2pix/datasets/combine_A_and_B.py \
    --fold_A $label_folder \
    --fold_B $bscan_folder \
    --fold_AB $dataset_folder

[fold_A] =  data/splits/DME/original/labels
[fold_B] =  data/splits/DME/original/bscans
[fold_AB] =  data/datasets/DME/original
[num_imgs] =  1000000
[use_AB] =  False
[no_multiprocessing] =  False
split = train, use 110/110 images
split = train, number of images = 110
split = test, use 61/61 images
split = test, number of images = 61
split = val, use 61/61 images
split = val, number of images = 61


### 3.1.3 AMD

In [21]:
label_folder = C.SPLIT_PATTERN.format(data='AMD', name='original') + '/labels'
bscan_folder = C.SPLIT_PATTERN.format(data='AMD', name='original') + '/bscans'
dataset_folder = C.DATASET_PATTERN.format(data='AMD', name='original')
!python pytorch-CycleGAN-and-pix2pix/datasets/combine_A_and_B.py --fold_A $label_folder --fold_B $bscan_folder --fold_AB $dataset_folder

[fold_A] =  data/splits/AMD/original/labels
[fold_B] =  data/splits/AMD/original/bscans
[fold_AB] =  data/datasets/AMD/original
[num_imgs] =  1000000
[use_AB] =  False
[no_multiprocessing] =  False
split = train, use 26899/26899 images
split = train, number of images = 26899
split = test, use 6933/6933 images
split = test, number of images = 6933
split = val, use 6465/6465 images
split = val, number of images = 6465


In [22]:
# delete generated AMD datasets
!rm $dataset_folder/test/Farsiu_*
!rm $dataset_folder/train/Farsiu_*
!rm $dataset_folder/val/Farsiu_*

/bin/bash: /usr/bin/rm: Argument list too long


### 3.1.4 AROI

In [23]:
label_folder = C.SPLIT_PATTERN.format(data='AROI', name='original') + '/labels'
bscan_folder = C.SPLIT_PATTERN.format(data='AROI', name='original') + '/bscans'
dataset_folder = C.DATASET_PATTERN.format(data='AROI', name='original')
!python pytorch-CycleGAN-and-pix2pix/datasets/combine_A_and_B.py \
    --fold_A $label_folder \
    --fold_B $bscan_folder \
    --fold_AB $dataset_folder

[fold_A] =  data/splits/AROI/original/labels
[fold_B] =  data/splits/AROI/original/bscans
[fold_AB] =  data/datasets/AROI/original
[num_imgs] =  1000000
[use_AB] =  False
[no_multiprocessing] =  False
split = train, use 1094/1094 images
split = train, number of images = 1094
split = test, use 377/377 images
split = test, number of images = 377
split = val, use 376/376 images
split = val, number of images = 376


#### 3.1.4.1 AROI Hetero

In [3]:
label_folder = C.SPLIT_PATTERN.format(data='AROI', name='hetero') + '/labels'
bscan_folder = C.SPLIT_PATTERN.format(data='AROI', name='hetero') + '/bscans'
dataset_folder = C.DATASET_PATTERN.format(data='AROI', name='hetero')
!python pytorch-CycleGAN-and-pix2pix/datasets/combine_A_and_B.py \
    --fold_A $label_folder \
    --fold_B $bscan_folder \
    --fold_AB $dataset_folder

[fold_A] =  data/splits/AROI/hetero/labels
[fold_B] =  data/splits/AROI/hetero/bscans
[fold_AB] =  data/datasets/AROI/hetero
[num_imgs] =  1000000
[use_AB] =  False
[no_multiprocessing] =  False
split = train, use 876/876 images
split = train, number of images = 876
split = test, use 109/109 images
split = test, number of images = 109
split = val, use 111/111 images
split = val, number of images = 111


### 3.1.5 OP

In [24]:
label_folder = C.SPLIT_PATTERN.format(data='OP', name='original') + '/labels'
bscan_folder = C.SPLIT_PATTERN.format(data='OP', name='original') + '/bscans'
dataset_folder = C.DATASET_PATTERN.format(data='OP', name='original')
!python pytorch-CycleGAN-and-pix2pix/datasets/combine_A_and_B.py \
    --fold_A $label_folder \
    --fold_B $bscan_folder \
    --fold_AB $dataset_folder

[fold_A] =  data/splits/OP/original/labels
[fold_B] =  data/splits/OP/original/bscans
[fold_AB] =  data/datasets/OP/original
[num_imgs] =  1000000
[use_AB] =  False
[no_multiprocessing] =  False
split = train, use 11945/11945 images
split = train, number of images = 11945
split = test, use 4153/4153 images
split = test, number of images = 4153
split = val, use 4102/4102 images
split = val, number of images = 4102


## 3.2 Reduced-Layer Data

### 3.2.1 RTA

In [25]:
# data, name = "RTA", "reduce_merge"
# bscan_folder = os.path.join(C.SPLIT_PATTERN.format(data=data, name=name), "bscans")
# label_folder = os.path.join(C.SPLIT_PATTERN.format(data=data, name=name), "labels")
# dataset_folder = C.DATASET_PATTERN.format(data=data, name=name)

# !python pytorch-CycleGAN-and-pix2pix/datasets/combine_A_and_B.py \
#     --fold_A $label_folder \
#     --fold_B $bscan_folder \
#     --fold_AB $dataset_folder

### 3.2.2 DME

In [26]:
# data, name = "DME", "reduce_merge"
# bscan_folder = os.path.join(C.SPLIT_PATTERN.format(data=data, name=name), "bscans")
# label_folder = os.path.join(C.SPLIT_PATTERN.format(data=data, name=name), "labels")
# dataset_folder = C.DATASET_PATTERN.format(data=data, name=name)

# !python pytorch-CycleGAN-and-pix2pix/datasets/combine_A_and_B.py \
#     --fold_A $label_folder \
#     --fold_B $bscan_folder \
#     --fold_AB $dataset_folder

### 3.2.3 AROI

In [27]:
data, name = "AROI", "reduce1_merge"

bscan_folder = os.path.join(C.SPLIT_PATTERN.format(data=data, name=name), "bscans")
label_folder = os.path.join(C.SPLIT_PATTERN.format(data=data, name=name), "labels")
dataset_folder = C.DATASET_PATTERN.format(data=data, name=name)

In [28]:
!python pytorch-CycleGAN-and-pix2pix/datasets/combine_A_and_B.py \
    --fold_A "$label_folder" \
    --fold_B "$bscan_folder" \
    --fold_AB "$dataset_folder" 

[fold_A] =  data/splits/AROI/reduce1_merge/labels
[fold_B] =  data/splits/AROI/reduce1_merge/bscans
[fold_AB] =  data/datasets/AROI/reduce1_merge
[num_imgs] =  1000000
[use_AB] =  False
[no_multiprocessing] =  False
split = train, use 1752/1752 images
split = train, number of images = 1752
split = test, use 218/218 images
split = test, number of images = 218
split = val, use 222/222 images
split = val, number of images = 222
