In [1]:
##############################################################
import numpy as np
import pandas as pd
import os, re
import pickle
import matplotlib.pyplot as plt

##############################################################
def get_fname(patient, is_mask = False):
    fpath = os.path.join(FP_LGG, patient)
    if is_mask:
        pattern = "_([^_]*)_mask.tif$"
        fname = np.array([x for x in os.listdir(fpath) if "mask" in x])
    else:
        pattern = "_([^_]*).tif$"
        fname = np.array([x for x in os.listdir(fpath) if "mask" not in x])
    index = np.array([int(re.findall(pattern, x)[0]) for x in fname])
    idx = np.argsort(index)
    return fname[idx]

def reaxes_img(img):
    img = img.copy()
    img = np.swapaxes(img,0,1)
    img = np.swapaxes(img,1,2)
    return img
##############################################################
FP_OUT = "/data/Duke_BIOS824/res_proj01/"
FP_OUT_COORD = "/data/Duke_BIOS824/res_proj01/coord/"
FP_LGG = "/data/Duke_BIOS824/LGG-segmentation/"
patients = [x for x in os.listdir(FP_LGG) if "TCGA" in x]    

In [2]:
with open(FP_OUT + 'img_slice.pickle', 'rb') as fp:
    img_slice = pickle.load(fp)
fp.close()

with open(FP_OUT + 'img_mask.pickle', 'rb') as fp:
    img_mask  = pickle.load(fp)
fp.close()

check the import files

In [3]:
len(img_slice)

110

In [4]:
patient = patients[0]
img_slice[patient].shape

(256, 256, 23, 3)

In [5]:
patient = patients[0]
img_mask[patient].shape

(256, 256, 23)

# generate coordinates from mask

In [6]:
def get_coord(img, tol = 1e-5):
    """..."""
    ###
    xc = np.arange(img.shape[0])
    yc = np.arange(img.shape[1])
    zc = np.arange(img.shape[2])
    
    ###
    xc, yc, zc = np.meshgrid(xc, yc, zc)
    xc = xc.ravel()
    yc = yc.ravel()   
    zc = zc.ravel()
    
    ###
    cord = np.array([
        (x, y, z, img[x, y, z]) 
        for x, y, z in zip(xc, yc, zc)
        if img[x, y, z] > tol
    ])
    cord = pd.DataFrame(cord, columns=["x", "y", "z", "val"])
    return cord

test run

In [7]:
###
pat = patients[0]
print(pat)

###
cord1 = get_coord(img_mask[pat])

###
slices = img_slice[pat]
slices = slices.mean(axis = 3)
cord2 = get_coord(slices, tol=100)

###
print(cord1.shape)
print(cord2.shape)

TCGA_CS_4941_19960909
(14379, 4)
(84208, 4)


In [8]:
print(FP_OUT_COORD + "coord/" + patient + "_slice.csv")
print(FP_OUT_COORD + "coord/" + patient + "_mask.csv")

/data/Duke_BIOS824/res_proj01/coord/coord/TCGA_CS_4941_19960909_slice.csv
/data/Duke_BIOS824/res_proj01/coord/coord/TCGA_CS_4941_19960909_mask.csv


get coord for all patients

In [9]:
for patient in patients:
    print(patient)
    
    ###
    img  = img_mask[patient]
    cord = get_coord(img)
    fp   = FP_OUT_COORD + patient + "_mask.csv"
    cord.to_csv(fp, index=False)
    
    ###
    img  = img_slice[patient].mean(axis = 3)
    cord = get_coord(img, tol=100)
    fp   = FP_OUT_COORD + patient + "_slice.csv"
    cord.to_csv(fp, index=False)

TCGA_CS_4941_19960909
TCGA_CS_4942_19970222
TCGA_CS_4943_20000902
TCGA_CS_4944_20010208
TCGA_CS_5393_19990606
TCGA_CS_5395_19981004
TCGA_CS_5396_20010302
TCGA_CS_5397_20010315
TCGA_CS_6186_20000601
TCGA_CS_6188_20010812
TCGA_CS_6290_20000917
TCGA_CS_6665_20010817
TCGA_CS_6666_20011109
TCGA_CS_6668_20011025
TCGA_CS_6669_20020102
TCGA_DU_5849_19950405
TCGA_DU_5851_19950428
TCGA_DU_5852_19950709
TCGA_DU_5853_19950823
TCGA_DU_5854_19951104
TCGA_DU_5855_19951217
TCGA_DU_5871_19941206
TCGA_DU_5872_19950223
TCGA_DU_5874_19950510
TCGA_DU_6399_19830416
TCGA_DU_6400_19830518
TCGA_DU_6401_19831001
TCGA_DU_6404_19850629
TCGA_DU_6407_19860514
TCGA_DU_6408_19860521
TCGA_DU_7008_19830723
TCGA_DU_7010_19860307
TCGA_DU_7013_19860523
TCGA_DU_7014_19860618
TCGA_DU_7018_19911220
TCGA_DU_7019_19940908
TCGA_DU_7294_19890104
TCGA_DU_7298_19910324
TCGA_DU_7299_19910417
TCGA_DU_7300_19910814
TCGA_DU_7301_19911112
TCGA_DU_7302_19911203
TCGA_DU_7304_19930325
TCGA_DU_7309_19960831
TCGA_DU_8162_19961029
TCGA_DU_81

In [11]:
!ls /data/Duke_BIOS824/res_proj01/coord

TCGA_CS_4941_19960909_mask.csv	 TCGA_DU_A5TR_19970726_mask.csv
TCGA_CS_4941_19960909_slice.csv  TCGA_DU_A5TR_19970726_slice.csv
TCGA_CS_4942_19970222_mask.csv	 TCGA_DU_A5TS_19970726_mask.csv
TCGA_CS_4942_19970222_slice.csv  TCGA_DU_A5TS_19970726_slice.csv
TCGA_CS_4943_20000902_mask.csv	 TCGA_DU_A5TT_19980318_mask.csv
TCGA_CS_4943_20000902_slice.csv  TCGA_DU_A5TT_19980318_slice.csv
TCGA_CS_4944_20010208_mask.csv	 TCGA_DU_A5TU_19980312_mask.csv
TCGA_CS_4944_20010208_slice.csv  TCGA_DU_A5TU_19980312_slice.csv
TCGA_CS_5393_19990606_mask.csv	 TCGA_DU_A5TW_19980228_mask.csv
TCGA_CS_5393_19990606_slice.csv  TCGA_DU_A5TW_19980228_slice.csv
TCGA_CS_5395_19981004_mask.csv	 TCGA_DU_A5TY_19970709_mask.csv
TCGA_CS_5395_19981004_slice.csv  TCGA_DU_A5TY_19970709_slice.csv
TCGA_CS_5396_20010302_mask.csv	 TCGA_EZ_7264_20010816_mask.csv
TCGA_CS_5396_20010302_slice.csv  TCGA_EZ_7264_20010816_slice.csv
TCGA_CS_5397_20010315_mask.csv	 TCGA_FG_5962_20000626_mask.csv
TCGA_CS_5397_20010315_slic