In [None]:
import os
import glob
import xlrd
import struct
import csv
import nibabel as nib
from pathlib import Path
from shutil import copyfile
from os import listdir

%run viewer.ipynb
#%run viewer_3D.ipynb

class FileIO_MEDICAL(object):
    
    @staticmethod
    def load_nii(pathIn):
        struct = nib.load(pathIn)
        #print("Data type is: ", struct.get_data_dtype())
        return struct.get_fdata()
    
    @staticmethod
    def save_nii(data, pathOut):
        struct = nib.Nifti1Image(data, np.eye(4))
        nib.save(struct, pathOut)
    
    @staticmethod
    def read_dat(path):
        '''
        path: path to the .dat binary file
        Note: the dat file has file identifier (pengxiang's version)
        '''
        fileIn = open(path, "rb")
        fileID = struct.unpack('I', fileIn.read(4))[0]
        dims   = struct.unpack('I', fileIn.read(4))[0]
        shape  = struct.unpack(str(dims)+'I', fileIn.read(4 * dims))
        dat    = struct.unpack(str(np.prod(shape))+'d', fileIn.read(8*np.prod(shape)))
        dat    = np.reshape(dat, shape)
        fileIn.close()
        return dat
    
    @staticmethod
    def write_dat(data, path):
        '''
        @data: data to be written out
        @path: path to write the file
        the output is for cpp persistence computation program
        '''
        data = data.astype('double')
        shape = data.shape
        dims  = len(shape)
        file_out = open(path, "wb")
        file_out.write(struct.pack('I', 0))
        file_out.write(struct.pack('I', dims))
        file_out.write(struct.pack(str(dims)+'I', *(shape)))
        file_out.write(struct.pack(str(np.prod(shape))+'d', *(data.flatten())))
        file_out.close()
        
    @staticmethod
    def copy_to_folder(pathIn, pathOut):
        '''
        @pathIn: 'E:/Data2/BreastMass'
        @pathOut: 'E:/Data2/BreastMass_refine/volumes'
        This function extracts every .nii into a single folder
        '''
        folders = next(os.walk(pathIn))[1]
        for f in folders:
            fileIn = pathIn + "/" + f + "/tumor-initial-label.nii"
            fileOut = pathOut + "/" + f + "_tumor_mask.nii"
            copyfile(fileIn, fileOut)
            
    @staticmethod
    def read_excel_labels(pathIn):
        excel_table = xlrd.open_workbook(pathIn)
        sheet = excel_table.sheet_by_index(0)
        labels = np.zeros(sheet.nrows-1, dtype=np.int32)
        for i in range(1, sheet.nrows):
            labels[i-1] = int(sheet.cell_value(i,1))
        return labels
    
    @staticmethod
    def read_csv_1c1r(pathIn):
        '''
        1c1r: first column and first row are not data
        '''
        with open(pathIn, "r") as csvfile:
            #reader1, reader2 = itertools.tee(csv.reader(csvfile, delimiter='\t'))
            reader = list(csv.reader(csvfile, delimiter="\t"))
            nrow = len(reader)
            ncol = len(reader[0][0].split(','))
            data = np.zeros((nrow-1, ncol-1), dtype=np.float64)
            for i in range(1, nrow):
                line = reader[i][0].split(',')
                for j in range(1, ncol):
                    data[i-1,j-1] = line[j]              
        return data
    
    @staticmethod
    def read_bnd_red_unifieddim(path):
        '''
        Read .bnd and .red from dimension unified files.
        bnd/red file should not contain file_type <pengxiang>
        '''
        fin = open(path, "rb")
        dim = struct.unpack("I", fin.read(4))[0]
        num = struct.unpack(str(dim)+"I", fin.read(4 * dim))

        grand_list = [None] * dim
        for i in range(dim):
            dim_list = [None] * num[i]
            for j in range(num[i]):
                dataInfo = struct.unpack(str(dim)+"I", fin.read(4 * dim))
                assert(dataInfo[0] > 0)
                assert(np.max(dataInfo[1:]) == 0)

                dat = struct.unpack(str(dim*dataInfo[0])+"I", fin.read(4 * dim * dataInfo[0]))
                dat = np.reshape(dat, [dataInfo[0], dim])
                dim_list[j] = np.transpose(dat)
            grand_list[i] = dim_list
        fin.close()
        return grand_list
    
    @staticmethod
    def read_homo_unifieddim(data_folder, homo_folder, dimensions):
        '''
        Read in .bnd and .red files generated by persistence computation program in the unified dimension version
        -- Input:
        @data_folder: directory to folder containing all .dat files
        @homo_folder: directory to folder containing persistence results, i.e. .bnd, .red (.pers files)
        @dimensions: persistence homology dimensions, 2 for 2D files, 3 for 3D files
        -- Output:
            bnd_grand_list: [[structures] * dimensions] * number_of_files
        '''
        file_list = [f for f in listdir(data_folder) if f.endswith('.dat')]
        file_number = len(file_list)
        if (file_number <= 0):
            print("read_homo: invalid file_list argument input ...")
            sys.exit()

        bnd_grand_list = [None] * file_number
        for name_idx in range(file_number):
            bnd_name = homo_folder + "/" + file_list[name_idx] + ".bnd"
            if (not os.path.isfile(bnd_name)):
                print("read_homo: file opens failed ...")
                sys.exit()

            bnd_ = FileIO_MEDICAL.read_bnd_red_unifieddim(bnd_name)
            assert(len(bnd_) == dimensions)
            for i in range(dimensions):
                if len(bnd_[i]) == 0:
                    print(file_list[name_idx] + " doesn't have dim" + str(i) + " bnd or red ...")
                    sys.exit()

            bnd_grand_list[name_idx] = bnd_
        return bnd_grand_list
    
    @staticmethod
    def read_pers_txt(pathIn):
        '''
        @pathIn: path to the pers.txt file
        '''
        with open(pathIn) as f:
            content = f.readlines()
        content = [x.strip() for x in content]
        dim = 0
        persistence = [None] * 3
        i = 0
        while i < len(content):
            line = content[i]
            num = int(line.split('=')[1])
            pers = np.zeros((num,2), dtype=np.float64)
            i = i + 1
            cnt = 0
            while cnt < num:
                pers[cnt,0] = content[i+cnt].split('\t')[0]
                pers[cnt,1] = content[i+cnt].split('\t')[1]
                cnt = cnt + 1
            i = i + cnt
            persistence[dim] = pers
            dim = dim + 1
        return persistence
    
    @staticmethod
    def read_pers_txt_mass(data_folder, homo_folder, dimensions):
        '''
        Read in .bnd and .red files generated by persistence computation program in the unified dimension version
        -- Input:
        @data_folder: directory to folder containing all .dat files
        @homo_folder: directory to folder containing persistence results, i.e. .bnd, .red (.pers files)
        @dimensions: persistence homology dimensions, 2 for 2D files, 3 for 3D files
        -- Output:
            bnd_grand_list: [[structures] * dimensions] * number_of_files
        '''
        file_list = [f for f in listdir(data_folder) if f.endswith('.dat')]
        file_number = len(file_list)
        if (file_number <= 0):
            print("read_pers_txt_mass: invalid file_list argument input ...")
            sys.exit()

        pers_grand_list = [None] * file_number
        for name_idx in range(file_number):
            pers_name = homo_folder + "/" + file_list[name_idx] + ".pers.txt"
            if (not os.path.isfile(pers_name)):
                print("read_pers_txt_mass: file opens failed ...")
                sys.exit()

            pers_ = FileIO_MEDICAL.read_pers_txt(pers_name)
            assert(len(pers_) == dimensions)
            for i in range(dimensions):
                if pers_[i].shape[0] == 0:
                    print(file_list[name_idx] + " doesn't have dim" + str(i) + " bnd or red ...")
                    sys.exit()

            pers_grand_list[name_idx] = pers_
        return pers_grand_list