In [2]:
import SimpleITK as sitk
import jax.numpy as jnp
import itertools


def join_ct_suv(ct: sitk.Image, suv: sitk.Image,ct1: sitk.Image, suv1: sitk.Image) -> sitk.Image:
    '
    Resample a CT image to the same size as a SUV image
    '
   
    ct_arr=sitk.GetArrayFromImage(ct)
    suv_arr=sitk.GetArrayFromImage(suv)

    ct_arr_1=sitk.GetArrayFromImage(ct1)
    suv_arr_1=sitk.GetArrayFromImage(suv1)
    
    res=jnp.stack([jnp.array(suv_arr),jnp.array(ct_arr),jnp.array(ct_arr_1),jnp.array(suv_arr_1)],axis=-1)
    return res

def load_landmark_data(folder_path:str):
    '
    given path to folder with landmarks files and images after general registaration we load the data
    we want to first load the suv and ct images resample them to the same size and then load the landmarks
    we need to load separately study 0 and 1 
    the output should be in form of a dictionary with keys 'study_0','study_1','From`,`To`' where `From` and `To` are the landmarks
    all the data should be in form of jnp.arrays
    '
    ct_0=sitk.ReadImage(folder_path+'/study_0_ct_soft.nii.gz')
    suv_0=sitk.ReadImage(folder_path+'/study_0_SUVS.nii.gz')
    # Resample ct_0 to match ct_1
            
    ct_1=sitk.ReadImage(folder_path+'/study_1_ct_soft.nii.gz')
    suv_1=sitk.ReadImage(folder_path+'/study_1_SUVS.nii.gz')    
    arr_0 = join_ct_suv(ct_0, suv_0,ct_1, suv_1)

    return {'study':arr_0, 'From':jnp.load(folder_path+'/From.npy'),'To':jnp.load(folder_path+'/To.npy')}

In [3]:
import os
import numpy as np
import numpy as np
folder_path='/root/data/prepared_registered'
# folder_path='/root/data/prepared_registered/pat_2/general_transform'
# load_landmark_data(folder_path)





def reshape_image(arr, img_size):
    # Get the current shape of the input array
    img_size=(img_size[1],img_size[2],img_size[3],img_size[4])
    current_shape = arr.shape
    
    # Check if the current shape is already equal to the desired shape
    if current_shape == img_size:
        print("The input array already has the desired shape.")
        return arr
    
    # Check if the current shape is larger than the desired shape in any dimension
    if any(cs > ds for cs, ds in zip(current_shape, img_size)):
        # Crop the input array from the end of the dimension where it occurs
        arr = arr[:img_size[0], :img_size[1], :img_size[2], :img_size[3]]
        print("The input array has been cropped to the desired shape.")
    
    # Check if the current shape is smaller than the desired shape in any dimension
    if any(cs < ds for cs, ds in zip(current_shape, img_size)):
        # Pad the input array with zeros at the end of the dimension where it occurs

        arr = np.pad(arr, ((0, np.max(img_size[0] - current_shape[0],0)),
                                  (0, np.max(img_size[1] - current_shape[1],0)),
                                  (0, np.max(img_size[2] - current_shape[2],0)),
                                  (0, 0)), mode='constant')
        print("The input array has been padded to the desired shape.")
    
    # If none of the above conditions are met, return the input array as is
    return arr

batch_size=2
img_size = (batch_size,488, 200, 200,2)

def stack_with_pad(arr_0,arr_1):
    if arr_0.shape[0] > arr_1.shape[0]:
        pad_length = arr_0.shape[0] - arr_1.shape[0]
        padding = jnp.full((pad_length, arr_1.shape[1]), -1)
        arr_1 = jnp.concatenate((arr_1, padding), axis=0)
    elif arr_1.shape[0] > arr_0.shape[0]:
        pad_length = arr_1.shape[0] - arr_0.shape[0]
        padding = jnp.full((pad_length, arr_0.shape[1]), -1)
        arr_0 = jnp.concatenate((arr_0, padding), axis=0)
    
    return jnp.stack([arr_0, arr_1])

    


def get_batched(folder_tuple):
    folder_0=load_landmark_data(f"{folder_tuple[0]}/general_transform")
    folder_1=load_landmark_data(f"{folder_tuple[1]}/general_transform")
    arr=jnp.stack([reshape_image(folder_0['study'],img_size),reshape_image(folder_1['study'],img_size)])
    From=stack_with_pad(folder_0['From'],folder_1['From'])
    To=stack_with_pad(folder_0['To'],folder_1['To'])
    return {'study':arr, 'From':From,'To':To}


# folder_tuples = list(itertools.zip_longest(*[iter(folder_names)] * 2))
# tt=list(map(get_batched,folder_tuples))

# tt=list(map(lambda el: reshape_image(load_landmark_data(f"{el}/general_transform")['study'],img_size) ,folder_names))

# create a function that given input array'arr' will change it shape to shape given as 'img_size' if the given image is bigger than 'img_size' in any dimension image should be cropped from the end of dimension where it happend in case when image is bigger than 'img_size' image should be padded with zeros at the end of dimension where it happens ; check weather the resulting image has shape required


In [25]:
import pandas as pd

radiomics_full_data_path="/workspaces/pilot_lymphoma/data/extracted_features_pet_full_curr.csv"
radiomics_full_data=pd.read_csv(radiomics_full_data_path)
radiomics_full_data = radiomics_full_data.loc[:, ~radiomics_full_data.columns.str.contains('Unnamed', case=False)]
radiomics_full_data = radiomics_full_data[radiomics_full_data['lesion_num'] == 1000]
radiomics_full_data["pat_id"]=radiomics_full_data["pat_id"].astype(int)
radiomics_full_data.columns

Index(['pat_id', 'lesion_num', 'study_0_or_1', 'Deauville', 'lab_path',
       'mod_name', 'vol_in_mm3', 'original_firstorder_10Percentile_pet',
       'original_firstorder_90Percentile_pet',
       'original_firstorder_Energy_pet',
       ...
       'wavelet-LLL_glszm_SmallAreaHighGrayLevelEmphasis_ct',
       'wavelet-LLL_glszm_SmallAreaLowGrayLevelEmphasis_ct',
       'wavelet-LLL_glszm_ZoneEntropy_ct',
       'wavelet-LLL_glszm_ZonePercentage_ct',
       'wavelet-LLL_glszm_ZoneVariance_ct', 'wavelet-LLL_ngtdm_Busyness_ct',
       'wavelet-LLL_ngtdm_Coarseness_ct', 'wavelet-LLL_ngtdm_Complexity_ct',
       'wavelet-LLL_ngtdm_Contrast_ct', 'wavelet-LLL_ngtdm_Strength_ct'],
      dtype='object', length=2425)

In [26]:
full_data_table_path="/workspaces/pilot_lymphoma/data/full_table_data_for_delta.csv"
full_data_table= pd.read_csv(full_data_table_path)
full_data_table["pat_id"]=full_data_table["Unnamed: 0"].astype(int)
full_data_table["outcome"]=full_data_table["Unnamed: 12"]
np.unique(full_data_table["outcome"].to_numpy)


array([<bound method IndexOpsMixin.to_numpy of 0     PR
       1     CR
       2     PR
       3     CR
       4     PD
       5     CR
       6     CR
       7     SD
       8     CR
       9     CR
       10    PD
       11    PD
       12    PR
       13    PD
       14    PR
       15    SD
       16    CR
       17    CR
       18    CR
       19    CR
       20    CR
       21    CR
       22    SD
       23    PD
       24    CR
       25    PD
       26    CR
       27    CR
       28    PD
       29    CR
       30    CR
       31    PR
       Name: outcome, dtype: object>                   ], dtype=object)

In [47]:

def subtract_dicts(dict1, dict2,dict_sums):
    # Create a new dictionary with the absolute difference of each entry
    result = {key: abs(dict1[key] - dict2[key])/dict_sums[key] for key in dict1}
    return result



def get_delta_radiomics(full_data_table_row, radiomics_full_data):

    """a function 'get_delta_radiomics' that would have two arguments 'full_data_table_row' and 'radiomics_full_data'.  'full_data_table_row' is a row from main table and contains columns like '[pat_id,outcome]'   'radiomics_full_data' contains multiple column including'[pat_id,study_0_or_1]'  Function should perform all steps:
    1) find 2 rows from 'radiomics_full_data' where  value of column 'pat_id' would be the same as value of column 'pat_id' in 'full_data_table_row' 
    2) From those 2 rows you found drop columns with names: ```['pat_id', 'lesion_num', 'study_0_or_1', 'Deauville', 'lab_path', 'mod_name']```
    3) Save the sum of both rows so each column should have sum of 2 rows
    4) calculate the absolute value of the diffrence between two rows and divide it by the saved sum save information as dictionary called 'res'
    5) add outcome variable to 'res' that you will find in column 'outcome' in 'full_data_table_row' encode the  'outcome' as integer as in the dictionary {'CR':0, 'PD':1, 'PR':2, 'SD':2, }
    6) return calculated dictionary res"""
    full_data_table_row=full_data_table_row[1]
    # print(f"pppp {full_data_table_row['pat_id']}")
    # Step 1
    rows = radiomics_full_data[radiomics_full_data['pat_id'] == full_data_table_row['pat_id']]
    
    # Step 2
    rows = rows.drop(columns=['pat_id', 'lesion_num', 'study_0_or_1', 'Deauville', 'lab_path', 'mod_name'])
    # Step 3
    row_sum = rows.sum().to_dict()
   
    print(len(rows))
    # Step 4
    res = subtract_dicts(rows.iloc[0].to_dict(),rows.iloc[1].to_dict(),row_sum )
    print(res)
    # Step 5
    outcome_dict = {'CR':0, 'PD':1, 'PR':2, 'SD':2}
    res['outcome'] = outcome_dict[full_data_table_row['outcome']]

    # Step 6
    return res

rows = list(full_data_table.iterrows())


get_delta_radiomics(rows[2], radiomics_full_data)



1


IndexError: single positional indexer is out-of-bounds

# Check what is the sum of euclidean distances for diffrent registrations


In [4]:
folder_names = [os.path.join(folder_path, name) for name in os.listdir(folder_path) if os.path.isdir(os.path.join(folder_path, name))]
folder_names= list(filter(lambda el: "pat" in el, folder_names))
folder_names

['/root/data/prepared_registered/pat_12',
 '/root/data/prepared_registered/pat_16',
 '/root/data/prepared_registered/pat_31',
 '/root/data/prepared_registered/pat_21',
 '/root/data/prepared_registered/pat_26',
 '/root/data/prepared_registered/pat_2',
 '/root/data/prepared_registered/pat_19',
 '/root/data/prepared_registered/pat_13',
 '/root/data/prepared_registered/pat_10',
 '/root/data/prepared_registered/pat_28',
 '/root/data/prepared_registered/pat_8',
 '/root/data/prepared_registered/pat_24',
 '/root/data/prepared_registered/pat_15',
 '/root/data/prepared_registered/pat_4',
 '/root/data/prepared_registered/pat_29',
 '/root/data/prepared_registered/pat_14',
 '/root/data/prepared_registered/pat_20',
 '/root/data/prepared_registered/pat_22',
 '/root/data/prepared_registered/pat_5',
 '/root/data/prepared_registered/pat_18',
 '/root/data/prepared_registered/pat_11',
 '/root/data/prepared_registered/pat_9',
 '/root/data/prepared_registered/pat_27',
 '/root/data/prepared_registered/pat_3'

In [7]:
### checking from linear folder the distance
def get_dist_0(fold_name):
  fold=f"{fold_name}/lin_transf"
  fromm=np.load(f"{fold}/From.npy")
  too=np.load(f"{fold}/To.npy")

  res=(fromm-too)
  res=res*(fromm>0)
  res=np.sqrt(np.sum(res**2,axis=-1))
  return np.sum(res.flatten())

np.mean(list(map(get_dist_0,folder_names)))

27.419910917593374