# Separate the images by territory and classify them

In [1]:
import matplotlib.pyplot as plt
import pydicom
import os
import ntpath
import numpy as np
import cv2

In [3]:
# directori of the DICOMIR file and folder GEMS_IMG
directori='/media/HDD3TB/data_carotid/NEFRONA_20180613/'

# store path
destinacio='/media/HDD3TB/data_carotid/NEFRONA_parts/'

In [4]:
def find(imatge,detect):
    """Read the image on the path detect and return True if the template 
    is in the image with a confident bigger than 85%"""
    template=plt.imread(detect)
    template=template[:,:,0]    
    
    # look for the target in the upper left corner
    zona=imatge[0:150,0:150,0]
    
    # the number 3 is the mode used to look for the template
    res = cv2.matchTemplate(zona,template,3)

    # returns True for a coincidence bigger than 85%
    return True if res.max()>0.85 else False

In [5]:
def classify_im(imatge,file,name,treshold1=14000,treshold2=422):
    """For an image classifies as doopler,modified or non_modified
    
    file: indicates the path where the image should be saved
    name: name of the image
    treshold1: if the variance is big in channels, is related with the presence of red,gree,blue colors
    treshold2: heigh of the image to discriminate modify images"""
    
    # adding a new channel we can classify better the presence of reds, greens and blues
    # the presence of those colors is implies a big variance for those pixels
    expand=255*np.ones([imatge.shape[0],imatge.shape[1],4],dtype=int)
    expand[:,:,0:3]=imatge

    # save doopler images in a folder
    if np.max(np.var(expand,axis=2)) > treshold1:
        os.makedirs(file+'doopler/')if not os.path.exists(file+'doopler/') else None
        plt.imsave(file+'doopler/'+name,imatge)

    else:
        # the modified images are bigger than the originals
        if np.shape(imatge)[0]>treshold2:
            

            # classify the images where the plaque is identified
            if find(imatge,'cm2'):
                
                os.makedirs(file+'PL/')if not os.path.exists(file+'PL/') else None
                plt.imsave(file+'PL/'+name,imatge)
            elif find(imatge,'IMT'):
                

                
                os.makedirs(file+'IMT/')if not os.path.exists(file+'IMT/') else None
                plt.imsave(file+'IMT/'+name,imatge)
            else:


                os.makedirs(file+'modified/')if not os.path.exists(file+'modified/') else None
                plt.imsave(file+'modified/'+name,imatge)


            
        # originals    
        else:
            os.makedirs(file+'originals/')if not os.path.exists(file+'originals/') else None
            plt.imsave(file+'originals/'+name,imatge)

In [6]:
errors=''
dir_data=directori+'GEMS_IMG/'

mesos=os.listdir(dir_data)
for mes in mesos:
    
    if os.path.isdir(dir_data+mes+'/'):
        dies=os.listdir(dir_data+mes+'/')
    else:
        continue

    for dia in dies:
        if os.path.isdir(dir_data+mes+'/'+dia):
            pacients=os.listdir(dir_data+mes+'/'+dia)
        else:
            continue
        
        for pacient in pacients:
            if os.path.isdir(dir_data+mes+'/'+dia+'/'+pacient):
                list_images= os.listdir(dir_data+mes+'/'+dia+'/'+pacient)
            else:
                continue
            for im_dicom in list_images:
                
                path=dir_data+mes+'/'+dia+'/'+pacient+'/'+im_dicom
                # read the file image as binari and transform to string
                file=open(path,"rb").read()[1:]
                file=str(file)

                dcm = pydicom.read_file(path)
                
                # transform de dicom image to numpy array
                test_array=dcm.pixel_array
                if test_array.shape[2]!=3:  # some images has a weird number of channels
                    errors+=path+'\n'
                    with open( 'debug.txt','w') as bugg:
                        bugg.write(errors)

                else :
                    # extract the patien id
                    subjecte = str(dcm.PatientID)  

                    # classify which artery
                    if ("ACC D" in file):     
                        # new destination
                        new_path=destinacio+'ACC/D/'+subjecte+'/'
                        # name of the image
                        name=path.split('/')[-1]
                        # classify the type of image
                        classify_im(test_array,new_path,name)

                    elif ("ACC E" in file):            
                        new_path=destinacio+'ACC/E/'+subjecte+'/'
                        name=path.split('/')[-1]
                        classify_im(test_array,new_path,name)

                    elif ("ACI D" in file):            
                        new_path=destinacio+'ACI/E/'+subjecte+'/'
                        name=path.split('/')[-1]
                        classify_im(test_array,new_path,name)

                    elif ("ACI E" in file):            
                        new_path=destinacio+'ACI/D/'+subjecte+'/'
                        name=path.split('/')[-1]
                        classify_im(test_array,new_path,name)

                    elif ("BULB D" in file) or ("BD " in file):            
                        new_path=destinacio+'BULB/D/'+subjecte+'/'
                        name=path.split('/')[-1]
                        classify_im(test_array,new_path,name)

                    elif ("BULB E" in file) or ("BE " in file):            
                        new_path=destinacio+'BULB/E/'+subjecte+'/'
                        name=path.split('/')[-1]
                        classify_im(test_array,new_path,name)
                    elif ("FEM " in file):

                        if ("COM D" in file):            
                            new_path=destinacio+'FEM/COM_D/'+subjecte+'/'
                            name=path.split('/')[-1]
                            classify_im(test_array,new_path,name)

                        elif ("COM E" in file):            
                            new_path=destinacio+'FEM/COM_E/'+subjecte+'/'
                            name=path.split('/')[-1]
                            classify_im(test_array,new_path,name)

                        elif ("SUP D" in file):            
                            new_path=destinacio+'FEM/SUP_D/'+subjecte+'/'
                            name=path.split('/')[-1]
                            classify_im(test_array,new_path,name)

                        elif ("SUP E" in file):            
                            new_path=destinacio+'FEM/SUP_E/'+subjecte+'/'
                            name=path.split('/')[-1]
                            classify_im(test_array,new_path,name)
                    elif ("ACE D" in file):            
                        new_path=destinacio+'ACE/D'+subjecte+'/'
                        name=path.split('/')[-1]
                        classify_im(test_array,new_path,name)
                    elif ("ACE E" in file):            
                        new_path=destinacio+'ACE/E'+subjecte+'/'
                        name=path.split('/')[-1]
                        classify_im(test_array,new_path,name)

                    elif ("BRAQ " in file):            
                        new_path=destinacio+'BRAQ/'+subjecte+'/'
                        name=path.split('/')[-1]
                        classify_im(test_array,new_path,name)

                    # Images that can not be classified
                    else:     
                        new_path=destinacio+'no_class/'+subjecte+'/'
                        name=path.split('/')[-1]
                        classify_im(test_array,new_path,name)

