## feat_matrix 

1. reduce the size of each feature matrix for each .vtp fiber bundle (basis of a percentage 10)
2. attach label to each feat_matrix.h5 file using label mapping
3. combine the 54 tracts mentioned in the supplementary of the paper

In [1]:
import h5py 
import numpy as np
import pandas as pd
import os
from tqdm import tqdm

In [3]:
label_mapping_72 = pd.read_csv("/home/ang/Documents/GitHub/DeepWMA/labels_reference.csv", names=["label_names", "label_array"])
label_mapping_72['label_names'] = label_mapping_72['label_names'].astype('|S')


# label_mapping_72 = pd.read_csv("/home/ang/Documents/GitHub/DeepWMA/labels_reference_66.csv", names=["label_names", "label_array"])
# label_mapping_72['label_names'] = label_mapping_72['label_names'].astype('|S')


In [4]:
# hdf data types
label_hdf_dtype = np.dtype([('label_names', 'a30'), ('label_values', int), ('label_array', int)])
feat_hdf_dtype = np.dtype([('feat', np.ndarray)])

In [5]:
subject_ID = "779370"
patient_data_path = "/media/ang/Data/unnerve_data/779370/"
output_folder="/media/ang/Data/unnerve_data/".format(subject_ID)
reducing_fraction = 0.3
streamlines = 0 

### read featMatrix and create label

1. loop over featMatrix of the patient
2. 

In [6]:

for filename in tqdm(os.listdir(patient_data_path)):
    if filename.endswith((".h5")):
        if ".vtk" not in filename:
            if "atlas" not in filename:
                tract_name = filename.replace(subject_ID + "_T_", "").replace("_featMatrix.h5", "")
                tract_label = label_mapping_72.loc[label_mapping_72.label_names == bytes(tract_name, 'utf-8'),'label_array'].item()
                print(tract_name, tract_label, type(tract_label))

                # read featarray and pass tract_label
                with h5py.File(patient_data_path + filename, "r") as f:
                    feat_array = f.get('feat')[()]
                feat_label=np.empty(feat_array.shape[0], dtype=label_hdf_dtype)
                feat_label.fill((tract_name, tract_label, tract_label))

                # limit the size of the numpy array to reducing fraction of the array so that the error of big size can be resolved 
                n_tracts = int(reducing_fraction*feat_array.shape[0])

                feat_array = feat_array[0:n_tracts]
                feat_label = feat_label[0:n_tracts]

                # append this to a numpy array 

                if streamlines==0:
                    featMatrix = feat_array
                    featLabel = feat_label

                featMatrix = np.append(featMatrix, feat_array, axis=0)
                featLabel = np.append(featLabel, feat_label, axis=0)

                print("(featMatrix, featLabel)", featMatrix.shape, featLabel.shape)
                streamlines = feat_array.shape[0] + streamlines
                print("No. of Streamlines {}".format(streamlines))
    else: 
        continue

  3%|▎         | 2/75 [00:00<00:07,  9.33it/s]

AF_left 0 <class 'int'>
(featMatrix, featLabel) (26792, 30, 30, 3) (26792,)
No. of Streamlines 13396
AF_right 1 <class 'int'>


  5%|▌         | 4/75 [00:00<00:12,  5.64it/s]

(featMatrix, featLabel) (33009, 30, 30, 3) (33009,)
No. of Streamlines 19613
ATR_left 2 <class 'int'>
(featMatrix, featLabel) (38432, 30, 30, 3) (38432,)
No. of Streamlines 25036
ATR_right 3 <class 'int'>


  8%|▊         | 6/75 [00:01<00:16,  4.17it/s]

(featMatrix, featLabel) (46431, 30, 30, 3) (46431,)
No. of Streamlines 33035
CA 4 <class 'int'>
(featMatrix, featLabel) (47322, 30, 30, 3) (47322,)
No. of Streamlines 33926
CC_1 5 <class 'int'>


  9%|▉         | 7/75 [00:01<00:15,  4.27it/s]

(featMatrix, featLabel) (51518, 30, 30, 3) (51518,)
No. of Streamlines 38122
CC_2 6 <class 'int'>


 11%|█         | 8/75 [00:02<00:23,  2.90it/s]

(featMatrix, featLabel) (67512, 30, 30, 3) (67512,)
No. of Streamlines 54116
CC_3 7 <class 'int'>


 12%|█▏        | 9/75 [00:02<00:30,  2.15it/s]

(featMatrix, featLabel) (71957, 30, 30, 3) (71957,)
No. of Streamlines 58561
CC_4 8 <class 'int'>


 13%|█▎        | 10/75 [00:08<02:11,  2.02s/it]

(featMatrix, featLabel) (91439, 30, 30, 3) (91439,)
No. of Streamlines 78043
CC_5 9 <class 'int'>


 15%|█▍        | 11/75 [00:10<02:11,  2.06s/it]

(featMatrix, featLabel) (99260, 30, 30, 3) (99260,)
No. of Streamlines 85864
CC_6 10 <class 'int'>


 16%|█▌        | 12/75 [00:10<01:38,  1.56s/it]

(featMatrix, featLabel) (107459, 30, 30, 3) (107459,)
No. of Streamlines 94063
CC_7 11 <class 'int'>


 17%|█▋        | 13/75 [00:11<01:14,  1.21s/it]

(featMatrix, featLabel) (112953, 30, 30, 3) (112953,)
No. of Streamlines 99557
CC 12 <class 'int'>


 19%|█▊        | 14/75 [00:11<01:02,  1.02s/it]

(featMatrix, featLabel) (131134, 30, 30, 3) (131134,)
No. of Streamlines 117738
CG_right 14 <class 'int'>


 20%|██        | 15/75 [00:12<00:52,  1.14it/s]

(featMatrix, featLabel) (142184, 30, 30, 3) (142184,)
No. of Streamlines 128788
CST_left 15 <class 'int'>


 21%|██▏       | 16/75 [00:12<00:44,  1.32it/s]

(featMatrix, featLabel) (145846, 30, 30, 3) (145846,)
No. of Streamlines 132450
CST_right 16 <class 'int'>


 23%|██▎       | 17/75 [00:13<00:39,  1.49it/s]

(featMatrix, featLabel) (148244, 30, 30, 3) (148244,)
No. of Streamlines 134848
FPT_left 17 <class 'int'>


 24%|██▍       | 18/75 [00:13<00:35,  1.60it/s]

(featMatrix, featLabel) (152665, 30, 30, 3) (152665,)
No. of Streamlines 139269
FPT_right 18 <class 'int'>


 25%|██▌       | 19/75 [00:14<00:32,  1.70it/s]

(featMatrix, featLabel) (155144, 30, 30, 3) (155144,)
No. of Streamlines 141748
FX_left 19 <class 'int'>


 27%|██▋       | 20/75 [00:14<00:30,  1.80it/s]

(featMatrix, featLabel) (155211, 30, 30, 3) (155211,)
No. of Streamlines 141815
FX_right 20 <class 'int'>


 28%|██▊       | 21/75 [00:15<00:28,  1.88it/s]

(featMatrix, featLabel) (155281, 30, 30, 3) (155281,)
No. of Streamlines 141885
ICP_left 21 <class 'int'>


 29%|██▉       | 22/75 [00:15<00:27,  1.92it/s]

(featMatrix, featLabel) (156675, 30, 30, 3) (156675,)
No. of Streamlines 143279
ICP_right 22 <class 'int'>


 31%|███       | 23/75 [00:16<00:26,  1.95it/s]

(featMatrix, featLabel) (158007, 30, 30, 3) (158007,)
No. of Streamlines 144611
IFO_left 23 <class 'int'>


 32%|███▏      | 24/75 [00:16<00:26,  1.93it/s]

(featMatrix, featLabel) (161318, 30, 30, 3) (161318,)
No. of Streamlines 147922
IFO_right 24 <class 'int'>


 33%|███▎      | 25/75 [00:17<00:26,  1.90it/s]

(featMatrix, featLabel) (165790, 30, 30, 3) (165790,)
No. of Streamlines 152394
ILF_left 25 <class 'int'>


 35%|███▍      | 26/75 [00:17<00:25,  1.90it/s]

(featMatrix, featLabel) (167131, 30, 30, 3) (167131,)
No. of Streamlines 153735
ILF_right 26 <class 'int'>


 36%|███▌      | 27/75 [00:18<00:25,  1.91it/s]

(featMatrix, featLabel) (167840, 30, 30, 3) (167840,)
No. of Streamlines 154444
MLF_left 28 <class 'int'>


 37%|███▋      | 28/75 [00:19<00:27,  1.72it/s]

(featMatrix, featLabel) (182512, 30, 30, 3) (182512,)
No. of Streamlines 169116
MLF_right 29 <class 'int'>


 39%|███▊      | 29/75 [00:19<00:28,  1.61it/s]

(featMatrix, featLabel) (193080, 30, 30, 3) (193080,)
No. of Streamlines 179684
OR_left 30 <class 'int'>


 40%|████      | 30/75 [00:20<00:28,  1.60it/s]

(featMatrix, featLabel) (194779, 30, 30, 3) (194779,)
No. of Streamlines 181383
OR_right 31 <class 'int'>


 41%|████▏     | 31/75 [00:21<00:27,  1.59it/s]

(featMatrix, featLabel) (196138, 30, 30, 3) (196138,)
No. of Streamlines 182742
POPT_left 32 <class 'int'>


 43%|████▎     | 32/75 [00:21<00:27,  1.57it/s]

(featMatrix, featLabel) (200049, 30, 30, 3) (200049,)
No. of Streamlines 186653
POPT_right 33 <class 'int'>


 44%|████▍     | 33/75 [00:22<00:27,  1.54it/s]

(featMatrix, featLabel) (205788, 30, 30, 3) (205788,)
No. of Streamlines 192392
SCP_left 34 <class 'int'>


 45%|████▌     | 34/75 [00:23<00:26,  1.53it/s]

(featMatrix, featLabel) (206536, 30, 30, 3) (206536,)
No. of Streamlines 193140
SCP_right 35 <class 'int'>


 47%|████▋     | 35/75 [00:23<00:25,  1.54it/s]

(featMatrix, featLabel) (207424, 30, 30, 3) (207424,)
No. of Streamlines 194028
SLF_III_left 36 <class 'int'>


 48%|████▊     | 36/75 [00:24<00:25,  1.54it/s]

(featMatrix, featLabel) (208861, 30, 30, 3) (208861,)
No. of Streamlines 195465
SLF_III_right 37 <class 'int'>


 49%|████▉     | 37/75 [00:25<00:24,  1.53it/s]

(featMatrix, featLabel) (210850, 30, 30, 3) (210850,)
No. of Streamlines 197454
SLF_II_left 38 <class 'int'>


 51%|█████     | 38/75 [00:25<00:24,  1.52it/s]

(featMatrix, featLabel) (212744, 30, 30, 3) (212744,)
No. of Streamlines 199348
SLF_II_right 39 <class 'int'>


 52%|█████▏    | 39/75 [00:26<00:24,  1.50it/s]

(featMatrix, featLabel) (216406, 30, 30, 3) (216406,)
No. of Streamlines 203010
SLF_I_left 40 <class 'int'>


 53%|█████▎    | 40/75 [00:27<00:24,  1.45it/s]

(featMatrix, featLabel) (221628, 30, 30, 3) (221628,)
No. of Streamlines 208232
STR_left 56 <class 'int'>


 55%|█████▍    | 41/75 [00:28<00:26,  1.26it/s]

(featMatrix, featLabel) (222863, 30, 30, 3) (222863,)
No. of Streamlines 209467
STR_right 57 <class 'int'>


 56%|█████▌    | 42/75 [00:29<00:25,  1.28it/s]

(featMatrix, featLabel) (223106, 30, 30, 3) (223106,)
No. of Streamlines 209710
ST_FO_left 42 <class 'int'>


 57%|█████▋    | 43/75 [00:29<00:24,  1.29it/s]

(featMatrix, featLabel) (227279, 30, 30, 3) (227279,)
No. of Streamlines 213883
ST_FO_right 43 <class 'int'>


 59%|█████▊    | 44/75 [00:30<00:23,  1.30it/s]

(featMatrix, featLabel) (231101, 30, 30, 3) (231101,)
No. of Streamlines 217705
ST_OCC_left 44 <class 'int'>


 60%|██████    | 45/75 [00:31<00:23,  1.28it/s]

(featMatrix, featLabel) (236200, 30, 30, 3) (236200,)
No. of Streamlines 222804
ST_OCC_right 45 <class 'int'>


 61%|██████▏   | 46/75 [00:32<00:23,  1.26it/s]

(featMatrix, featLabel) (241999, 30, 30, 3) (241999,)
No. of Streamlines 228603
ST_PAR_left 46 <class 'int'>


 63%|██████▎   | 47/75 [00:33<00:28,  1.01s/it]

(featMatrix, featLabel) (264635, 30, 30, 3) (264635,)
No. of Streamlines 251239
ST_PAR_right 47 <class 'int'>


 64%|██████▍   | 48/75 [00:34<00:29,  1.08s/it]

(featMatrix, featLabel) (284413, 30, 30, 3) (284413,)
No. of Streamlines 271017
ST_POSTC_left 48 <class 'int'>


 65%|██████▌   | 49/75 [00:35<00:27,  1.07s/it]

(featMatrix, featLabel) (293256, 30, 30, 3) (293256,)
No. of Streamlines 279860
ST_POSTC_right 49 <class 'int'>


 67%|██████▋   | 50/75 [00:37<00:26,  1.06s/it]

(featMatrix, featLabel) (300872, 30, 30, 3) (300872,)
No. of Streamlines 287476
ST_PREC_left 50 <class 'int'>


 68%|██████▊   | 51/75 [00:38<00:29,  1.25s/it]

(featMatrix, featLabel) (315095, 30, 30, 3) (315095,)
No. of Streamlines 301699
ST_PREC_right 51 <class 'int'>


 69%|██████▉   | 52/75 [00:39<00:28,  1.22s/it]

(featMatrix, featLabel) (325041, 30, 30, 3) (325041,)
No. of Streamlines 311645
ST_PREF_left 52 <class 'int'>


 71%|███████   | 53/75 [00:43<00:41,  1.90s/it]

(featMatrix, featLabel) (352948, 30, 30, 3) (352948,)
No. of Streamlines 339552
CG_left 13 <class 'int'>


 72%|███████▏  | 54/75 [00:44<00:37,  1.77s/it]

(featMatrix, featLabel) (361197, 30, 30, 3) (361197,)
No. of Streamlines 347801
MCP 27 <class 'int'>


 73%|███████▎  | 55/75 [00:46<00:34,  1.72s/it]

(featMatrix, featLabel) (374483, 30, 30, 3) (374483,)
No. of Streamlines 361087
SLF_I_right 41 <class 'int'>


 75%|███████▍  | 56/75 [00:47<00:30,  1.58s/it]

(featMatrix, featLabel) (378730, 30, 30, 3) (378730,)
No. of Streamlines 365334
ST_PREF_right 53 <class 'int'>


 76%|███████▌  | 57/75 [00:50<00:37,  2.09s/it]

(featMatrix, featLabel) (405081, 30, 30, 3) (405081,)
No. of Streamlines 391685
ST_PREM_left 54 <class 'int'>


 77%|███████▋  | 58/75 [00:52<00:32,  1.89s/it]

(featMatrix, featLabel) (406557, 30, 30, 3) (406557,)
No. of Streamlines 393161
ST_PREM_right 55 <class 'int'>


 79%|███████▊  | 59/75 [00:53<00:28,  1.78s/it]

(featMatrix, featLabel) (407915, 30, 30, 3) (407915,)
No. of Streamlines 394519
T_OCC_left 58 <class 'int'>


 80%|████████  | 60/75 [00:55<00:26,  1.79s/it]

(featMatrix, featLabel) (410727, 30, 30, 3) (410727,)
No. of Streamlines 397331
T_OCC_right 59 <class 'int'>


 81%|████████▏ | 61/75 [00:57<00:24,  1.74s/it]

(featMatrix, featLabel) (413163, 30, 30, 3) (413163,)
No. of Streamlines 399767
T_PAR_left 60 <class 'int'>


 83%|████████▎ | 62/75 [01:00<00:28,  2.23s/it]

(featMatrix, featLabel) (431617, 30, 30, 3) (431617,)
No. of Streamlines 418221
T_PAR_right 61 <class 'int'>


 84%|████████▍ | 63/75 [01:04<00:33,  2.81s/it]

(featMatrix, featLabel) (447631, 30, 30, 3) (447631,)
No. of Streamlines 434235
T_POSTC_left 62 <class 'int'>


 85%|████████▌ | 64/75 [01:07<00:28,  2.63s/it]

(featMatrix, featLabel) (452710, 30, 30, 3) (452710,)
No. of Streamlines 439314
T_POSTC_right 63 <class 'int'>


 87%|████████▋ | 65/75 [01:09<00:24,  2.47s/it]

(featMatrix, featLabel) (458379, 30, 30, 3) (458379,)
No. of Streamlines 444983
T_PREC_left 64 <class 'int'>


 88%|████████▊ | 66/75 [01:11<00:21,  2.40s/it]

(featMatrix, featLabel) (466746, 30, 30, 3) (466746,)
No. of Streamlines 453350
T_PREC_right 65 <class 'int'>


 89%|████████▉ | 67/75 [01:15<00:24,  3.03s/it]

(featMatrix, featLabel) (475771, 30, 30, 3) (475771,)
No. of Streamlines 462375
T_PREF_left 66 <class 'int'>


 91%|█████████ | 68/75 [01:25<00:35,  5.02s/it]

(featMatrix, featLabel) (492577, 30, 30, 3) (492577,)
No. of Streamlines 479181
T_PREF_right 67 <class 'int'>


 92%|█████████▏| 69/75 [01:34<00:37,  6.21s/it]

(featMatrix, featLabel) (509579, 30, 30, 3) (509579,)
No. of Streamlines 496183
T_PREM_left 68 <class 'int'>


 93%|█████████▎| 70/75 [01:36<00:25,  5.02s/it]

(featMatrix, featLabel) (510700, 30, 30, 3) (510700,)
No. of Streamlines 497304
T_PREM_right 69 <class 'int'>


 95%|█████████▍| 71/75 [01:39<00:17,  4.31s/it]

(featMatrix, featLabel) (512557, 30, 30, 3) (512557,)
No. of Streamlines 499161
UF_left 70 <class 'int'>


 96%|█████████▌| 72/75 [01:41<00:10,  3.61s/it]

(featMatrix, featLabel) (513268, 30, 30, 3) (513268,)
No. of Streamlines 499872
UF_right 71 <class 'int'>


100%|██████████| 75/75 [01:44<00:00,  1.39s/it]

(featMatrix, featLabel) (515422, 30, 30, 3) (515422,)
No. of Streamlines 502026





In [None]:
# feed the numpy array to a combined hdf5 file
h5_feat_array = h5py.File("/media/ang/Data/data/779370/{}_featMatrix.h5".format(subject_ID), 'w')
h5_feat_array.create_dataset('feat', data=featMatrix)
h5_feat_array.close()

h5_feat_label =  h5py.File("/media/ang/Data/data/779370/{}_label.h5".format(subject_ID), 'w')
h5_feat_label.create_dataset('label_array', data=featLabel['label_array'])# type: ignore
h5_feat_label.create_dataset('label_values', data=featLabel['label_values'])  # type: ignore
h5_feat_label.create_dataset('label_names', data=featLabel['label_names'])  # type: ignore

h5_feat_label.close()