# Liver image pre processing

In this notebook, the images in DICOM format are treated for each patient and save in a more numpy usable format. 
The image can be contrast enhanced.

# Imports

In [1]:
import os
import pandas as pd
import numpy as np
import re
import pydicom as dicom #reading dicom images 
import zipfile
import shutil
import cv2
from tqdm import tqdm
from skimage import exposure

# Utilities Functions

In [3]:
def get_seriesId_idx(df):
    """
    Based on the content of the columns, defines which one is the 
    'SeriesId'
    
    Returns the index of the columns containing the SeriesId
    """
    
    #finds colums where non unique values are found
    unique_cols=list()
    for idx, col in enumerate(df.columns):
        if len(df[col].unique()) > 1:
            unique_cols.append(idx)
            
    row= df.iloc[1,unique_cols] #pick the first row
    for row_idx, x in enumerate(row) :
        if not re.search('[a-zA-Z]|\-', x):
            row_idx = row_idx
            break
            
    return unique_cols[row_idx]

def get_filename_idx(df):
    """
    Based on the content of the columns, defines which one is the 
    'filename'
    
    Returns the index of the columns containing the Filename
    """
    
    #finds colums where non unique values are found
    unique_cols=list()
    for idx, col in enumerate(df.columns):
        if len(df[col].unique()) > 1:
            unique_cols.append(idx)
            
    row= df.iloc[1,unique_cols] #pick the first row
    for row_idx, x in enumerate(row) :
        if x.endswith('.dcm'):
            row_idx = row_idx
            break #stop the function
            
    return unique_cols[row_idx]

def get_series_description(df, start_string):
    """
    
    """
    
    #finds colums where non unique values are found
    unique_cols=list()
    for idx, col in enumerate(df.columns):
        if len(df[col].unique()) > 1:
            unique_cols.append(idx)
            
    row = df.iloc[1,unique_cols] #pick the first row
    row_desc = False

    for row_idx, x in enumerate(row) :
        if x.startswith(start_string):
            row_desc = row_idx
    
    if row_desc == False:
        raise NameError('Descrition not found')
    else:
        pass
    
    return unique_cols[row_desc]
       
def extract_mapped_img_filename(path):
    """
    Gets the name of a mapped_mri file
    """
    try: #manifest.CVS
        manifest='/manifest.cvs'
        df = pd.read_csv(path+manifest, index_col=False)
    except FileNotFoundError: #manifest.CSV
        manifest='/manifest.csv'
        df = pd.read_csv(path+manifest, index_col=False)
    
    #get the labels of the columns of interest
    try :
        img_type = df.columns[get_series_description(df, 'ShMO')]

        filename_lab = df.columns[get_filename_idx(df)]        
        
        #images with description : 'LIVER_T1MAP' label
        mapped_img_filename = df[df[img_type].str.endswith('T1MAP') == True][filename_lab].tolist()
        
        if len(mapped_img_filename) >2:
            raise NameError
        else:
            return mapped_img_filename[0]
    
    except NameError:
         pass

# File managing functions

In [4]:
def unzip(zip_path, zip_, contrast = False):
    """
    Extract the .csv file, extract only correct mapped-MRI image.
    """
    error_folder= []
    dst_folder = zip_path + zip_.replace('.zip', '/')
    #if file allready exists, replace
    try:
        os.mkdir(dst_folder)
    except FileExistsError: 
        shutil.rmtree(dst_folder)
        os.mkdir(dst_folder)
         
    # Create a ZipFile Object and load sample.zip in it
    with zipfile.ZipFile(zip_path + zip_ , 'r') as zipObj:
       # Get a list of all archived file names from the zip
        try:
            manifest = [f for f in zipObj.namelist() if f.startswith('manifest')][0]
            zipObj.extract(manifest, dst_folder)
            
            filename = extract_mapped_img_filename(dst_folder)

            with zipfile.ZipFile(zip_path + zip_, 'r') as zipObj2:
                zipObj2.extract(filename, dst_folder)

            ds = dicom.dcmread(os.path.join(dst_folder,filename))
            x = ds.pixel_array
            name = zip_.split('_')[0]
            
            if contrast :
                x =  exposure.equalize_adapthist(x, clip_limit=0.03)
            else:#no preporcessing
                pass 
            
            #normalization and int conversion
            image = np.uint(x * (255/np.max(np.max(x))))
            cv2.imwrite(zip_path+name+'.jpg',image)
            shutil.rmtree(dst_folder)
            return True
        
        except (IndexError, NameError, KeyError) as err:
            print('Error')
            shutil.rmtree(dst_folder)
            return False
            
def unzip_jpg(path, dst_folder, raw_folder= None, contrast= False):
    """
    From the downloaded from the Ukbb in the path folder,
    Extract the mri-mapped image, rename it (as the patientID) and move it to the dst_folder
    Move the zip files to raw_folder
    """
    zip_files = [z for z in os.listdir(path) if z.endswith('_20204_3_0.zip')]
    nb_zip=len(zip_files)
    print('\n{} zip files.\n\n'.format(nb_zip))
    
    if raw_folder:
        os.makedirs(raw_folder+'Untreated/', exist_ok = True)
        os.makedirs(raw_folder+'Treated/', exist_ok = True)
    #moving donwloaded files
    for z in tqdm(zip_files):
        #moving zip file
        os.rename(path+z, dst= dst_folder+z)
        
        #Unziping
        if not unzip(dst_folder, z, contrast):
            if raw_folder:
                shutil.move(dst_folder+z, raw_folder+'Untreated/'+z)
        else:#remove zip file
            if raw_folder:
                shutil.move(dst_folder+z, raw_folder+'Treated/'+z)
            os.remove(dst_folder+z) 

def contrast(input_directory, output_directory):
    """
    Applies a contrast enhancement on the images in the input_folder and stores the
    modified images in the output_folder
    """
    
    images = os.listdir(input_directory)
    
    for image in tqdm(images):
        try:
            x = cv2.imread(input_directory + image)
            x = exposure.equalize_adapthist(x, clip_limit=0.03)
            x = np.uint(x * (255/np.max(np.max(x))))
            cv2.imwrite(output_directory + image, x)
        except AttributeError:
            print(image)

# Run scripts

In [None]:
source = '/n/groups/patel/uk_biobank/project_52887_41230/Liver/'
destination = '/n/groups/patel/Alan/Aging/Medical_Images/images/Liver/Instance3/Liver_20204_3_raw/'

unzip_jpg(source, destination, raw_folder= None, contrast= False)

In [None]:
input_directory = destination
output_directory = '/n/groups/patel/Alan/Aging/Medical_Images/images/Liver/Instance3/Liver_20204_3_contrast/'

# form jpg file to constrasted version
contrast(input_directory, output_directory)