In [1]:
import pandas as pd
from tqdm import tqdm
import re
from numpy import nan as Nan
import pydicom as dicom
import cv2
import shutil, os, glob


# Splitting procedure

We have 50 images per one branch of the LAD in one patient. It means that the angel of rotation per one stride in MPR of LAD is near 7%. We took each 3d image, i. e. 20% of rotation, near 15 images per patient.   
<br>
While splitting we payed attention to a couple of things:
1. Number of stenosis score levels images should be proportional
2. Numbers of different branches of coronary artery should be proportional
3. We took separate patients for test and val
4. 

# Val part
<b>0</b>    254 <br>

<b>1</b>    153 </b>
<br>
<b>NORMAL: </b> <br>
    106 CTCA1961 (NORMAL	NORMAL	NORMAL	NORMAL) <br>
    1007 CTCA1959 (NORMAL	NORMAL	NORMAL	NORMAL	NORMAL) <br>
    1009 CTCA1955 (NORMAL	NORMAL	NORMAL	NORMAL) <br>
<br>

<b>25%: </b> <br>
    1027 CTCA1965 (<25%	<25%	<25%	<25%	NORMAL) <br>
    CTCAAGK05031979 (NORMAL	<25%	NORMAL	<25%) <br>
    CTCAANM18021961 (<25%	<25%	NORMAL	NORMAL) <br>
<br>    

<b>50%</b> <br>
    1001 CTCA1947 (25-50%	25-50%	25-50%	NORMAL	25-50%) <br>
    1002 CTCA1955 (NORMAL	50%	NORMAL	NORMAL) <br>
    1060 CTCA1959 (50%	NORMAL	NORMAL	25-50%) <br>
<br>
<b>70%</b><br>
    1038 CTCA1979 (NORMAL	50-70%	NORMAL	NORMAL) <br>
    1045 CTCA1950 (50-70%	25-50%	NORMAL	25-50%) <br>
    


# Test part

<b>0</b>    254 <br>

<b>1</b>    287 </b>

<b>NORMAL: </b> <br>
    CTCAZHX30011957 (NORMAL	NORMAL	NORMAL	NORMAL	NORMAL), <br>
    CTCAYOG08091955 (NORMAL	NORMAL	NORMAL	NORMAL	NORMAL), <br>
    CTCAYOA13121966 (NORMAL	NORMAL	NORMAL	NORMAL	NORMAL) <br>

<br>
<b>25%: </b> <br>
    CTCAWUK05041963 (25% 25%	25%	NORMAL), <br>
    CTCATHJ17011957 (25%,25%,NORMAL,25%) , <br>
    CTCAPHD16081938 (25%,25%,25%,25%),<br>
    CTCASIP30041975 (25-50%, 25%,25%,25%,25%)<br>
<br>    
<br>
<b>50%</b> <br>
    CTCASTR17021954 (25%	25-50%	25-50%	25%),<br>
    CTCASTS01111969 (50%	50%	NORMAL	NORMAL)<br>
    CTCASTW15121946 (25-50%	NORMAL	NORMAL)<br>
    CTCATKR01031953 (25-50%	NORMAL	NORMAL	50%	NORMAL)<br>
<br>
<b>70%</b><br>
    CTCATUQ02091955 (50-70%	>70%	NORMAL	25-50%)<br>
    CTCAVAH09071948 (	>70%	>70%	NORMAL	50-70%)<br>
    CTCAZDV13081958 (25-50%	50-70%	NORMAL	25%	50%<br>


In [7]:
test_part = [
    'CTCAZHX30011957',
    'CTCAYOG08091955',
    'CTCAYOA13121966',
    'CTCAWUK05041963',
    'CTCATHJ17011957',
    'CTCAPHD16081938',
    'CTCASIP30041975',
    'CTCASTR17021954',
    'CTCASTS01111969',
    'CTCASTW15121946',
    'CTCATKR01031953',
    'CTCATUQ02091955',
    'CTCAVAH09071948',
    'CTCAZDV13081958',
]
val_part = [
    '106 CTCA1961',
    '1007 CTCA1959',
    '1009 CTCA1955',
    '1027 CTCA1965',
    'CTCAAGK05031979',
    'CTCAANM18021961',
    '1001 CTCA1947',
    '1002 CTCA1955',
    '1060 CTCA1959',
    '1038 CTCA1979',
    '1045 CTCA1950',  
]


# Moving the data

In [3]:
def move_dataset_part(dataset_part_name, patients_list,path_to_data , path_to_move):
    """
    Takes patient list and move to new subdataset.
    
    Args:
        - dataset_part_name(str): name of the new subpart of the dataset
        - patients_list: list of the patient, which sould be moved
        - path_to_data: dataset, from which we take the patient folders
        - path_to_move: path, where create new dataset
    Returns:
        - None
    """
    if not os.path.exists(os.path.join(path_to_move, dataset_part_name)):
        os.mkdir(os.path.join(path_to_move, dataset_part_name))
    
    for i in range(len(patients_list)):
        shutil.move(os.path.join(path_to_data, patients_list[i]), os.path.join(path_to_move, dataset_part_name, patients_list[i]))

In [9]:
PATH_TO_DATA = '/home/petryshak/CoronaryArteryPlaqueIdentification/data/ONLY_LAD'
PATH_TO_MOVE = '/home/petryshak/CoronaryArteryPlaqueIdentification/data/'

val_name = 'val'
test_name = 'test'


### Val moving

In [8]:
move_dataset_part(val_name, val_part, PATH_TO_DATA,PATH_TO_MOVE)

### Test moving 

In [10]:
move_dataset_part(test_name, test_part, PATH_TO_DATA, PATH_TO_MOVE)