# Notebook to calculate sarcopenia area for MRI

## Import Libraries and Set directories

In [1]:
#Import Libraries
import os
import SimpleITK as sitk
import pandas as pd
import numpy as np
import re

In [None]:
# Install if needed
#pip install xlrd

In [38]:
#pip install xlwt

Collecting xlwt
[?25l  Downloading https://files.pythonhosted.org/packages/44/48/def306413b25c3d01753603b1a222a011b8621aed27cd7f89cbc27e6b0f4/xlwt-1.3.0-py2.py3-none-any.whl (99kB)
[K     |████████████████████████████████| 102kB 1.1MB/s ta 0:00:01
[?25hInstalling collected packages: xlwt
Successfully installed xlwt-1.3.0
Note: you may need to restart the kernel to use updated packages.


In [2]:
import xlwt, xlrd

In [3]:
# Set input/output directories
cwd = os.getcwd()
dir_nifti ='/home/jovyan/data'
print(dir_nifti)

/home/jovyan/data


## Define some handy functions for creating dicom 

In [4]:
# load nifti list
patslist = os.listdir(dir_nifti)
pats = [pat for pat in patslist if 'xls' not in pat]
in_file = [pat for pat in patslist if 'xls' in pat][0]
print('No of patient folders to process: ', len(pats))
print('Input Excel file: ', in_file)

No of patient folders to process:  201
Input Excel file:  sarcopenia data_ES_043021_area_calculation_byLisa.xls


In [5]:
def get_mask_paths(patpath):
    for root, directories, files in os.walk(patpath):
        for filename in files:
            # join the two strings in order to form the full filepath.
            if 'mask' in filename:
                filepath = os.path.join(root, filename)
                return filepath
            
        print('Nifti mask not found for this patient:', patpath)    
        break

In [6]:
def calc_maskarea(path,pat):
    patpath = os.path.join(path,pat)

    maskfile = get_mask_paths(patpath)
    img = sitk.ReadImage(maskfile)
    img_arr = sitk.GetArrayFromImage(img)
    
    # QC1: check there is only 1 2D mask slice in 3d array
    mask_slices = np.unique(np.nonzero(img_arr)[0])
    if len(mask_slices) > 1:
        print('More than 1 axial slice has non zero pixels for pat: ', pat)
        return 0,0,0,0
    
    img_2darr = img_arr[(img_arr!=0).any(axis=(1,2)),:,:]
    psoas_pixels = np.count_nonzero(img_2darr == 1)
    skmuscle_pixels = np.count_nonzero(img_2darr == 2)
    total_pixels = np.count_nonzero(img_2darr != 0)
    total_pixels_chk = psoas_pixels + skmuscle_pixels
    
    if total_pixels != total_pixels_chk:
        print('potential unknown mask labels found for pat: ',pat)
        return 0,0,0,0
        
#     print('mask_slice: ', mask_slice)
#     print('Psoas: ', psoas_pixels)
#     print('skmuscle: ', skmuscle_pixels)
#     print('total_pixels: ', total_pixels)
#     print('total_pixels_chk: ', total_pixels_chk)
#     print(dir(img))
#     print('Spacing: ', img.GetSpacing()[0],img.GetSpacing()[1])
    psoas_area = psoas_pixels * img.GetSpacing()[0]*img.GetSpacing()[1]
    skmuscle_area = skmuscle_pixels * img.GetSpacing()[0]*img.GetSpacing()[1]
    total_area = total_pixels * img.GetSpacing()[0]*img.GetSpacing()[1]
    
    return mask_slices[0]+1,psoas_area,skmuscle_area,total_area
    
    
    #return area    

In [7]:
df = pd.read_excel(os.path.join(dir_nifti,in_file))

In [8]:
display(df.head(10))

Unnamed: 0,patient ID,replacements,race,ethnicity,height (cm),weight,psoas area (mm2),corrected psoas area,abd wall area (mm2),corrected abd wall area,L3 slice no,L3 slice 3D no
0,1,,White,Non-Hispanic,166.1,79.4,5244.73,2622.365,28551.5,14275.75,21,51
1,2,,White,Non-Hispanic,152.4,51.8,4750.97,2375.485,28078.3,14039.15,14,13
2,3,,White,Non-Hispanic,158.5,75.3,5362.14,2681.07,31648.7,15824.35,30,50
3,4,,White,Non-Hispanic,175.3,61.1,4798.54,2399.27,29239.4,14619.7,21,59
4,5,,White,Non-Hispanic,162.6,95.8,6839.7,3419.85,38763.6,19381.8,18,48
5,6,,White,Non-Hispanic,167.4,64.9,6427.97,3213.985,28974.6,14487.3,27,43
6,7,,Unknown,Unknown,169.0,60.4,4989.17,2494.585,30864.1,15432.05,20,10
7,8,,White,Non-Hispanic,,49.7,6913.98,3456.99,29394.2,14697.1,25,39
8,9,b,White,Non-Hispanic,,85.3,5282.13,2641.065,37375.8,18687.9,34,36
9,10,,White,Non-Hispanic,152.9,45.4,4398.55,2199.275,26192.1,13096.05,24,46


In [9]:
df['AUTO_sliceloc'] = np.nan
df['AUTO_psoasarea_mm2'] = np.nan
df['AUTO_skmusclearea_mm2'] = np.nan
df['AUTO_totalarea_mm2'] = np.nan
for i,pat in enumerate(reversed(pats)):
    patname = pat.split(' ')[-1]
    patname = int(re.sub('\D', '', patname))
    z_loc,psoas_area,skmuscle_area,total_area = calc_maskarea(dir_nifti,pat)
    df.loc[df['patient ID']==patname,'AUTO_sliceloc'] = z_loc
    df.loc[df['patient ID']==patname,'AUTO_psoasarea_mm2'] = psoas_area
    df.loc[df['patient ID']==patname,'AUTO_skmusclearea_mm2'] = skmuscle_area
    df.loc[df['patient ID']==patname,'AUTO_totalarea_mm2'] = total_area

print('all patient processed')

all patient processed


In [10]:
with pd.ExcelWriter(os.path.join(dir_nifti,'sarcopeniaMR_AUTOareacalculation.xls')) as writer:  
    df.to_excel(writer, sheet_name='Sheet_name_1')