### Imports & constants

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import glob

import os

import pandas as pd

import utils

In [2]:
RAW_PATH = os.path.join(os.curdir, 'datasets', 'RAW')
OUTPUT_PATH = os.path.join(os.curdir, 'output', 'RAW.npz')

### Data processing

In [3]:
imgs_s = [] # np.array with sagittal MIPs
imgs_f = [] # np.array with frontal MIPs
v_levels = [] # np.array with labels in mm
ids = [] # np.array with CTs identifiers
thicks = [] # np.array with CTs slices' thicknesses

In [4]:
# reading CTs labels
df = pd.read_csv(os.path.join(RAW_PATH, 'labels.csv'), sep=',', index_col='id')
df = df.fillna(np.nan)

In [5]:
# reading CTs metadata (slice thickness & spacings)
pixels_info = {}

with open(os.path.join(RAW_PATH, 'spacings.asc'), 'r') as file:
    lines = file.readlines()
    
    for line in lines:
        line_splt = line.split()
        
        exam_id = int(line_splt[0].split('_')[1])
        thickness = float(line_splt[-2])
        pixel_spacing = float(line_splt[-1])
        
        pixels_info[exam_id] = (pixel_spacing, thickness)

In [None]:
names = glob.glob(os.path.join(RAW_PATH, '*.raw'))

for name in names:
    print(name)
    
    # where name.split('_') is for example ['/home/ditto/thesis/RAWS/Std3D', '46', '512', '178', '.raw']
    size = int(name.split('_')[-3])
    h = int(name.split('_')[-2])
    exam_id = int(name.split('_')[-4])
    
    if exam_id in ids:
        print(f'Done for: {exam_id}')
        continue
    
    try:
        img = utils.read_bin(name, size, h, 2, signed='Y', byte_order='LE')
    except IndexError:
        print(f'Corrupted file: {exam_id}...')
        continue
    if img is None:
        print(f'Wrong number of bytes per voxel for {name}: returned None.')
        continue
    
    # subtracting mask
    min1 = np.amin(img)
    min2 = np.amin(img[img != min1])
    img[img == min1] = min2
    
    # converting to HU scale
    img = utils.to_HU(img)
    
    # mass center calculation + 0ing everything outside body
    img, center_h, center_w = utils.get_mass_center(img)
    
    # reading labels
    t12_level = -1 if np.isnan(df.loc[exam_id]['Th12']) else int(df.loc[exam_id]['Th12'])
    l3_level = -1 if np.isnan(df.loc[exam_id]['L3']) else int(df.loc[exam_id]['L3'])
    
    # frontal MIP 
    img_cropped = utils.crop_ct(img, center_h, center_w)
    img_mip_f = np.amax(img_cropped, axis=0)
    img_mip_f = np.swapaxes(img_mip_f, 0, 1)
    
    # rescaling to 1mm x 1mm
    img_mip_f = utils.rescale_mip(img_mip_f, pixels_info[exam_id][0], pixels_info[exam_id][1])
    
    # sagittal MIP
    img_cropped = utils.crop_ct(img, center_h, center_w, for_frontal=False)
    img_mip_s = np.amax(img_cropped, axis=1) 
    img_mip_s = np.swapaxes(img_mip_s, 0, 1)
    
    # rescaling to 1mm x 1mm
    img_mip_s = utils.rescale_mip(img_mip_s, pixels_info[exam_id][0], pixels_info[exam_id][1])
    
    # saving
    imgs_s.append(img_mip_s)
    imgs_f.append(img_mip_f)
    
    t12_level = -1 if np.isnan(t12_level) else t12_level
    l3_level = -1 if np.isnan(l3_level) else l3_level
    
    v_levels.append([t12_level, l3_level])
    ids.append(exam_id)
    thicks.append(pixels_info[exam_id][1])

In [7]:
# exporting to .npz
imgs_s = np.asarray(imgs_s)
imgs_f = np.asarray(imgs_f)
v_levels = np.asarray(v_levels)
ids = np.asarray(ids)
thicks = np.asarray(thicks)
n_mips = imgs_s.shape[0]

np.savez_compressed(OUTPUT_PATH, imgs_s=imgs_s, imgs_f=imgs_f,
                    v_levels=v_levels, ids=ids, thicks=thicks, n_mips=n_mips)

  return array(a, dtype, copy=False, order=order)
