In [2]:
import os
import pandas as pd
import json
import monai
from tqdm.notebook import tqdm
import numpy as np
import matplotlib.pyplot as plt
import plotly.graph_objects as go
from glob import glob
from plotly.subplots import make_subplots

 missing cuda symbols while dynamic loading
 cuFile initialization failed


In [3]:
figures_dir = "/home/jaalzate/Prostate_Cancer_TFM/Data_Analysis/Figures/BIMCV/"
extension = "png"
colors=(
            "#efc86e",
            "#6f9969",
            "#97c684",
            "#aab5d5",
            "#808fe1",
            "#5c66a8",
            "#454a74",
        )

## Plot Functions

In [4]:
def plot_size_variability_plotly(dimensions, means, std_devs, colors, title, save_as_pdf=False):
    x_pos = [str(dimension) for dimension in dimensions]  # Ensure dimensions are string for Plotly

    # Create traces
    trace = go.Bar(
        x=x_pos,
        y=means,
        error_y=dict(
            type='data',  # or 'percent' for percentage-based errors
            array=std_devs,
            visible=True
        ),
        marker_color=colors  # Set bar colors
    )

    # Create layout
    layout = go.Layout(
        #title='Image Size Variability '+title,
        # xaxis=dict(
        #     title='Dimensions'
        # ),
        yaxis=dict(
            title='Size (pixels)'
        ),
        template='plotly_white',  # Use a white background for the plot
        font=dict(
            size=15,  # Only change the font size
            # color and family are omitted to use defaults
        )

    )

    # Create figure and add traces
    fig = go.Figure(data=[trace], layout=layout)

    # Update layout for a tighter appearance and grid lines
    fig.update_layout(
        autosize=False,
        width=1000,  # Custom width, adjust as needed
        height=600,  # Custom height, adjust as needed
        margin=dict(
            l=20,
            r=20,
            b=40,
            t=20,
            pad=0
        ),
        # plot_bgcolor='rgba(0,0,0,0)',  # Transparent background
        # paper_bgcolor='rgba(0,0,0,0)',  # Transparent paper background
        yaxis=dict(gridcolor='rgba(128,128,128,0.5)'),  # Custom grid color (light grey) and transparency
    )

    
    # Show the figure
    fig.show()
    # Save figure as pdf
    if save_as_pdf:
        fig.write_image(os.path.join(figures_dir,"image_size_variability_plotly_"+title+"."+extension))

In [5]:
def plot_spacing_variability_plotly(dimensions, means, std_devs, colors, title, save_as_pdf=False):
    x_pos = [str(dimension) for dimension in dimensions]  # Ensure dimensions are string for Plotly

    # Create traces
    trace = go.Bar(
        x=x_pos,
        y=means,
        error_y=dict(
            type='data',  # or 'percent' for percentage-based errors
            array=std_devs,
            visible=True
        ),
        marker_color=colors  # Set bar colors
    )

    # Create layout
    layout = go.Layout(
        #title='Image Size Variability '+title,
        # xaxis=dict(
        #     title='Dimensions'
        # ),
        yaxis=dict(
            title='Size (pixels)'
        ),
        template='plotly_white',  # Use a white background for the plot
        font=dict(
            size=15,  # Only change the font size
            # color and family are omitted to use defaults
        )

    )

    # Create figure and add traces
    fig = go.Figure(data=[trace], layout=layout)

    # Update layout for a tighter appearance and grid lines
    fig.update_layout(
        autosize=False,
        width=1000,  # Custom width, adjust as needed
        height=600,  # Custom height, adjust as needed
        margin=dict(
            l=20,
            r=20,
            b=40,
            t=20,
            pad=0
        ),
        # plot_bgcolor='rgba(0,0,0,0)',  # Transparent background
        # paper_bgcolor='rgba(0,0,0,0)',  # Transparent paper background
        yaxis=dict(gridcolor='rgba(128,128,128,0.5)'),  # Custom grid color (light grey) and transparency
    )

    
    # Show the figure
    fig.show()
    # Save figure as pdf
    if save_as_pdf:
        fig.write_image(os.path.join(figures_dir,"image_spacing_variability_plotly_"+title+"."+extension))

In [6]:
def plot_variability_plotly_grouped(means, stds, datasets, colors, title,x_ticks,y_axis_title, save_as_pdf=False):
    width = 0.25  # Equivalent to the bar width in Plotly terms

    # Create a figure
    fig = go.Figure()

    # Add bars for each dataset
    for i, dataset in enumerate(datasets):
        means_dataset = means[dataset]
        stds_dataset = stds[dataset]
        x_positions = [x + i * width for x in range(len(x_ticks))]  # Adjust x positions for grouping
        
        fig.add_trace(go.Bar(
            x=x_positions,
            y=means_dataset,
            error_y=dict(
                type='data',  # Specify that the errors are given explicitly
                array=stds_dataset,
                visible=True
            ),
            name=dataset,
            marker_color=colors[i]  # Set the color for each dataset
        ))

    # Update layout for the figure
    fig.update_layout(
        xaxis=dict(
            tickmode='array',
            tickvals=[x + width for x in range(len(x_ticks))],  # Position x-ticks in the center of groups
            ticktext=x_ticks,  # Label x-ticks with dimension names
            #title='Dimension'
        ),
        yaxis=dict(
            title=y_axis_title
        ),
        # legend=dict(
        #     title='Ori'
        # ),
        barmode='group',  # Group bars instead of stacking
        width=1000,  # Customizable figure size
        height=600,
        template='plotly_white',  # Use a white background for cleaner appearance
        font=dict(size=20),  # Set global font size
        margin=dict(
            l=20,
            r=20,
            b=40,
            t=20,
            pad=0
        )
    )

    # Show the figure
    fig.show()
    if save_as_pdf:
        fig.write_image(os.path.join(figures_dir,"image_variability_plotly_"+title+"."+extension),scale=6)

In [7]:
def plot_orientation_distribution(orientation_counts):
    orientation_labels = list(orientation_counts.keys())
    orientation_values = list(orientation_counts.values())
    
    # Define colors for the pie chart
    colors = ['lightseagreen', 'gold', 'lightpink', 'lightblue', 'lightgreen', 'violet']

    fig = go.Figure(data=[go.Pie(labels=orientation_labels, values=orientation_values, textinfo='label+percent', insidetextorientation='radial', marker_colors=colors)])
    
    fig.update_layout(
        title_text='Orientation Distribution',
        title_font_size=30,
        #font=dict(size=25, weight='bold'),
        showlegend=True
    )
    
    fig.show()

def plot_separate_orientations(orientation_counts):
    # Assuming orientation_counts is a dictionary like {'Axial': count, 'Coronal': count, 'Sagittal': count}
    subplots_titles = ['Axial', 'Coronal', 'Sagittal']
    colors = ['lightseagreen', 'gold', 'lightpink']  # Colors for each pie chart
    
    fig = go.Figure()

    for i, title in enumerate(subplots_titles):
        fig.add_trace(go.Pie(labels=[title], values=[orientation_counts[title]], name=title, marker_colors=[colors[i]]))
    
    fig.update_traces(textinfo='label+percent', insidetextorientation='radial')
    fig.update_layout(
        title_text='Separate Orientation Distributions',
        title_font_size=30,
        #font=dict(size=25, weight='bold')
    )
    
    # Adjust layout to fit three pie charts
    fig.update_layout(
        grid={'rows': 1, 'columns': 3, 'pattern': 'independent'},
        annotations=[dict(text=text, x=0.5, y=0.5, font_size=20, showarrow=False) for text in subplots_titles]
    )
    
    fig.show()

In [8]:
def plot_pie_simple(label, count, title = "Pie Chart", size = (400, 400), out_filename = None):
    # Create traces
    trace = go.Pie(labels=label, values=count, textinfo='label+percent', textposition="outside", marker=dict(colors=colors))
    # Create layout
    w, h = size
    layout = go.Layout(
        width=w,
        height=h,
        title=title,
        plot_bgcolor='rgba(0,0,0,0)',
        paper_bgcolor='rgba(0,0,0,0)',
        showlegend=False
    )
    # Create figure and add traces
    fig = go.Figure(data=trace, layout=layout)
    # Show figure
    fig.show()
    # Save figure
    if out_filename:
        fig.write_image(os.path.join(figures_dir,f"{out_filename}.{extension}"),scale=6)

In [9]:
def plot_bar_simple(label, count, title = "Bar Chart", x_axis = "Variable", variable_names = [], size = (800, 400), out_filename = None):
    # Create traces
    trace = go.Bar(x=label, y=count, text=count, marker_color=colors)
    
    # Create layout
    layout = go.Layout(
        title=title,
        xaxis_title=x_axis,
        yaxis_title='Frequency',
        plot_bgcolor='rgba(0,0,0,0)',
    )
    
    # Create figure and add traces
    fig = go.Figure(data=trace, layout=layout)

    if not variable_names:
        variable_names = label
    fig.update_layout(
        xaxis=dict(
            tickmode='array',
            tickvals=[x for x in label],
            ticktext=[(v if str(v) != "nan" else "N/A") for v in variable_names],  # Label x-ticks with dimension names
            #title='Dimension'
        ),
        template='plotly_white',
    )
    
    # Show figure
    fig.show()
    # Save figure
    if out_filename:
        fig.write_image(os.path.join(figures_dir,f"{out_filename}.{extension}"),scale=6)

In [10]:
def plot_box_simple(label, values, name = "", title = "Box Plot", x_axis = "", size = (800, 400), out_filename = None):
    # Create traces
    trace = go.Box(x=values, name=label, marker_color=colors[0], boxmean='sd', orientation="h")
    
    # Create figure and add traces
    fig = go.Figure(data=trace)

    w, h = size
    if not name:
        name = label
    fig.update_layout(
        title_text=title,
        template='plotly_white',
        height=h,
        width=w,
        xaxis_title=x_axis,
        yaxis=dict(tickmode='array',
            tickvals=[0],
            ticktext=[name],
        )
    )
    
    # Show figure
    fig.show()
    # Save figure
    if out_filename:
        fig.write_image(os.path.join(figures_dir,f"{out_filename}.{extension}"),scale=6)

# Getting Images Tables 

In [10]:
# Creating  tables
path = "/mnt/ceib/datalake/FISABIO_datalake/prueba/p0052021_reborn"
derivatives_path = "/mnt/ceib/datalake/FISABIO_datalake/prueba/p0052021_reborn/derivatives/creating_adc"
t2_list = []
adc_list = []
dwi_list = []

# Creating a list of all the files in the directory starting with sub-*
subjects = [f for f in os.listdir(path) if f.startswith("sub-")]
derivatives_subs = [f for f in os.listdir(derivatives_path) if f.startswith("sub-")]

for sub in subjects:
    derivative_sessions=None
    if sub in derivatives_subs:
        derivative_sessions = [f for f in os.listdir(os.path.join(derivatives_path, sub)) if f.startswith("ses-")]
    sessions = [f for f in os.listdir(os.path.join(path, sub)) if f.startswith("ses-")]
    for ses in sessions:
        # Check if the anat and dwi paths exist
        anat_path = os.path.join(path, sub, ses,'mim-mr','anat')
        dwi_path = os.path.join(path, sub, ses,'mim-mr','dwi')

        if os.path.exists(anat_path):
            images_anat = [f for f in os.listdir(anat_path) if f.endswith(".nii.gz")]
            for img in images_anat:
                if 'T2w' in img and 'chunk' not in img:
                    json_path = os.path.join(path, sub, ses,'mim-mr','anat', img.replace('.nii.gz', '.json'))
                    with open(json_path) as f:
                        data = json.load(f)
                    #Add json data to the dict image
                    img_dict = {'subject': sub, 'session': ses, 'image': img, 'modality': 'T2w'}
                    img_dict.update(data)
                    t2_list.append(img_dict)
        if os.path.exists(dwi_path):
            images_dwi = [f for f in os.listdir(dwi_path) if f.endswith(".nii.gz")]
            for img in images_dwi:
                if 'bvalue' in img and 'chunk' not in img:
                    json_path = os.path.join(path, sub, ses,'mim-mr','dwi', img.replace('.nii.gz', '.json'))
                    with open(json_path) as f:
                        data = json.load(f)
                    #Add json data to the dict image
                    img_dict = {'subject': sub, 'session': ses, 'image': img, 'modality': 'dwi'}
                    img_dict.update(data)
                    dwi_list.append(img_dict)
                elif 'adc' in img and 'chunk' not in img:
                    json_path = os.path.join(path, sub, ses,'mim-mr','dwi', img.replace('.nii.gz', '.json'))
                    with open(json_path) as f:
                        data = json.load(f)
                    #Add json data to the dict image
                    img_dict = {'subject': sub, 'session': ses, 'image': img, 'modality': 'adc'}
                    img_dict.update(data)
                    adc_list.append(img_dict)
    if derivative_sessions:
        for ses in derivative_sessions:
            der_dwi_path = os.path.join(derivatives_path, sub, ses,'mim-mr','dwi')
            images_dwi = [f for f in os.listdir(der_dwi_path) if f.endswith(".nii.gz")]
            for img in images_dwi:
                json_path = os.path.join(derivatives_path, sub, ses,'mim-mr','dwi', img.replace('.nii.gz', '.json'))
                with open(json_path) as f:
                    data = json.load(f)
                #Add json data to the dict image
                img_dict = {'subject': sub, 'session': ses, 'image': img, 'modality': 'derivative/adc'}
                img_dict.update(data)
                adc_list.append(img_dict)


t2w_df = pd.DataFrame(t2_list)
dwi_df = pd.DataFrame(dwi_list)
adc_df = pd.DataFrame(adc_list)


In [18]:
t2w_df

Unnamed: 0,subject,session,image,modality,AccessionNumber,AcquisitionDate,AcquisitionDuration,AcquisitionMatrix,AcquisitionNumber,AcquisitionTime,...,PixelAspectRatio,AcquisitionDateTime,BurnedInAnnotation,PatientIdentityRemoved,PixelPaddingValue,ContentQualification,ContrastBolusVolume,WindowCenterWidthExplanation,TimeOfLastCalibration,PlanarConfiguration
0,sub-003629,ses-003863,sub-003629_ses-003863_acq-fse_run-801_bp-prost...,T2w,003863,20190429,167.241257,"[0, 240, 176, 0]",8.0,110304.12,...,,,,,,,,,,
1,sub-003629,ses-003863,sub-003629_ses-003863_acq-fse_run-601_bp-prost...,T2w,003863,20190429,156.429977,"[0, 240, 181, 0]",6.0,105601.79,...,,,,,,,,,,
2,sub-003629,ses-003863,sub-003629_ses-003863_acq-fse_run-301_bp-prost...,T2w,003863,20190429,276.815826,"[256, 0, 0, 190]",3.0,104739.43,...,,,,,,,,,,
3,sub-003629,ses-003863,sub-003629_ses-003863_acq-fse_run-501_bp-prost...,T2w,003863,20190429,125.143990,"[0, 224, 165, 0]",5.0,105347.51,...,,,,,,,,,,
4,sub-005730,ses-006206,sub-005730_ses-006206_acq-propeller_run-7_bp-p...,T2w,006206,20160129,,"[0, 288, 288, 0]",1.0,134830.999486,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
32109,sub-003671,ses-006159,sub-003671_ses-006159_acq-fse_run-7_bp-prostat...,T2w,006159,20131202,,"[0, 332, 272, 0]",1.0,084240,...,,,,,,,,,,
32110,sub-003671,ses-006159,sub-003671_ses-006159_acq-fse_run-6_bp-prostat...,T2w,006159,20131202,,"[0, 332, 288, 0]",1.0,083539,...,,,,,,,,,,
32111,sub-003671,ses-006159,sub-003671_ses-006159_acq-fse_run-4_bp-prostat...,T2w,006159,20131202,,"[0, 352, 272, 0]",1.0,082427,...,,,,,,,,,,
32112,sub-003671,ses-006159,sub-003671_ses-006159_acq-fse_run-5_bp-prostat...,T2w,006159,20131202,,"[300, 0, 0, 256]",1.0,082911,...,,,,,,,,,,


In [11]:
# Creating  tables
path = "/mnt/ceib/datalake/FISABIO_datalake/prueba/p0042021"
derivatives_path = "/mnt/ceib/datalake/FISABIO_datalake/prueba/p0042021/derivatives/creating_adc"
t2_list = []
adc_list = []
dwi_list = []

# Creating a list of all the files in the directory starting with sub-*
subjects = [f for f in os.listdir(path) if f.startswith("sub-")]
derivatives_subs = [f for f in os.listdir(derivatives_path) if f.startswith("sub-")]

for sub in subjects:
    derivative_sessions=None
    if sub in derivatives_subs:
        derivative_sessions = [f for f in os.listdir(os.path.join(derivatives_path, sub)) if f.startswith("ses-")]
    sessions = [f for f in os.listdir(os.path.join(path, sub)) if f.startswith("ses-")]
    for ses in sessions:
        # Check if the anat and dwi paths exist
        anat_path = os.path.join(path, sub, ses,'mim-mr','anat')
        dwi_path = os.path.join(path, sub, ses,'mim-mr','dwi')

        if os.path.exists(anat_path):
            images_anat = [f for f in os.listdir(anat_path) if f.endswith(".nii.gz")]
            for img in images_anat:
                if 'T2w' in img and 'chunk' not in img:
                    json_path = os.path.join(path, sub, ses,'mim-mr','anat', img.replace('.nii.gz', '.json'))
                    with open(json_path) as f:
                        data = json.load(f)
                    #Add json data to the dict image
                    img_dict = {'subject': sub, 'session': ses, 'image': img, 'modality': 'T2w'}
                    img_dict.update(data)
                    t2_list.append(img_dict)
        if os.path.exists(dwi_path):
            images_dwi = [f for f in os.listdir(dwi_path) if f.endswith(".nii.gz")]
            for img in images_dwi:
                if 'bvalue' in img and 'chunk' not in img:
                    json_path = os.path.join(path, sub, ses,'mim-mr','dwi', img.replace('.nii.gz', '.json'))
                    with open(json_path) as f:
                        data = json.load(f)
                    #Add json data to the dict image
                    img_dict = {'subject': sub, 'session': ses, 'image': img, 'modality': 'dwi'}
                    img_dict.update(data)
                    dwi_list.append(img_dict)
                elif 'adc' in img and 'chunk' not in img:
                    json_path = os.path.join(path, sub, ses,'mim-mr','dwi', img.replace('.nii.gz', '.json'))
                    with open(json_path) as f:
                        data = json.load(f)
                    #Add json data to the dict image
                    img_dict = {'subject': sub, 'session': ses, 'image': img, 'modality': 'adc'}
                    img_dict.update(data)
                    adc_list.append(img_dict)
    if derivative_sessions:
        for ses in derivative_sessions:
            der_dwi_path = os.path.join(derivatives_path, sub, ses,'mim-mr','dwi')
            images_dwi = [f for f in os.listdir(der_dwi_path) if f.endswith(".nii.gz")]
            for img in images_dwi:
                json_path = os.path.join(derivatives_path, sub, ses,'mim-mr','dwi', img.replace('.nii.gz', '.json'))
                with open(json_path) as f:
                    data = json.load(f)
                #Add json data to the dict image
                img_dict = {'subject': sub, 'session': ses, 'image': img, 'modality': 'derivative/adc'}
                img_dict.update(data)
                adc_list.append(img_dict)


t2w_df_val = pd.DataFrame(t2_list)
dwi_df_val = pd.DataFrame(dwi_list)
adc_df_val = pd.DataFrame(adc_list)


In [14]:
adc_df_val

Unnamed: 0,subject,session,image,modality,AccessionNumber,AcquisitionDate,AcquisitionMatrix,AcquisitionTime,BitsAllocated,BitsStored,...,BeatRejectionFlag,BurnedInAnnotation,NumberOfStudyRelatedInstances,PatientIdentityRemoved,PatientSize,PixelPaddingValue,ContentQualification,ContributingEquipmentSequence,TriggerWindow,VOILUTFunction
0,sub-000063,ses-000063,sub-000063_ses-000063_acq-bvalue_run-14_bp-pro...,derivative/adc,000063,20160707,"[64, 0, 0, 64]",153703,16,16,...,,,,,,,,,,
1,sub-000063,ses-000063,sub-000063_ses-000063_acq-bvalue_run-13_bp-pro...,derivative/adc,000063,20160707,"[64, 0, 0, 64]",153130,16,16,...,,,,,,,,,,
2,sub-000066,ses-000066,sub-000066_ses-000066_acq-bvalue_run-10_bp-pro...,derivative/adc,000066,20170127,"[64, 0, 0, 64]",155603,16,16,...,,,,,,,,,,
3,sub-000066,ses-000066,sub-000066_ses-000066_acq-bvalue_run-11_bp-pro...,derivative/adc,000066,20170127,"[64, 0, 0, 64]",160240,16,16,...,,,,,,,,,,
4,sub-000184,ses-000184,sub-000184_ses-000184_acq-adc_run-700_bp-abdom...,adc,000184,20180201,"[92, 0, 0, 128]",214945,16,16,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
337,sub-000003,ses-000003,sub-000003_ses-000003_acq-bvalue_run-12_bp-arm...,derivative/adc,000003,20140307,"[64, 0, 0, 64]",084434,16,16,...,,,,,,,,,,
338,sub-000111,ses-000111,sub-000111_ses-000111_acq-bvalue_run-10_bp-abd...,derivative/adc,000111,20150624,"[64, 0, 0, 64]",200541,16,16,...,,,,,,,,,,
339,sub-000111,ses-000111,sub-000111_ses-000111_acq-bvalue_run-11_bp-abd...,derivative/adc,000111,20150624,"[64, 0, 0, 64]",201117,16,16,...,,,,,,,,,,
340,sub-000120,ses-000120,sub-000120_ses-000120_acq-bvalue_run-10_bp-abd...,derivative/adc,000120,20140924,"[64, 0, 0, 64]",082807,16,16,...,,,,,,,,,,


In [15]:
t2w_df_complete = pd.concat([t2w_df, t2w_df_val])
dwi_df_complete = pd.concat([dwi_df, dwi_df_val])
adc_df_complete = pd.concat([adc_df, adc_df_val])

In [17]:
t2w_df_complete.to_csv("/home/jaalzate/Prostate_Cancer_TFM/Data_Analysis/Tables/t2w_df_complete.csv", index=False)
dwi_df_complete.to_csv("/home/jaalzate/Prostate_Cancer_TFM/Data_Analysis/Tables/dwi_df_complete.csv", index=False)
adc_df_complete.to_csv("/home/jaalzate/Prostate_Cancer_TFM/Data_Analysis/Tables/adc_df_complete.csv", index=False)

In [11]:
t2w_df = pd.read_csv("/home/jaalzate/Prostate_Cancer_TFM/Data_Analysis/Tables/t2w_df_complete.csv")
dwi_df = pd.read_csv("/home/jaalzate/Prostate_Cancer_TFM/Data_Analysis/Tables/dwi_df_complete.csv")
adc_df = pd.read_csv("/home/jaalzate/Prostate_Cancer_TFM/Data_Analysis/Tables/adc_df_complete.csv")

Columns (130) have mixed types. Specify dtype option on import or set low_memory=False.
Columns (82,84,131) have mixed types. Specify dtype option on import or set low_memory=False.


In [12]:
t2w_df.subject.nunique(), dwi_df.subject.nunique(), adc_df.subject.nunique()

(7714, 4146, 5631)

In [13]:
t2w_df.session.nunique(), dwi_df.session.nunique(), adc_df.session.nunique()

(8506, 4524, 6199)

In [14]:
# nunique of all three dataframes merged
merged_df = pd.concat([t2w_df, dwi_df, adc_df])
merged_df.subject.nunique(), merged_df.session.nunique()

(8441, 9351)

In [15]:
merged_df

Unnamed: 0,subject,session,image,modality,AccessionNumber,AcquisitionDate,AcquisitionDuration,AcquisitionMatrix,AcquisitionNumber,AcquisitionTime,...,VOILUTFunction,BluePaletteColorLookupTableData,BluePaletteColorLookupTableDescriptor,GreenPaletteColorLookupTableData,GreenPaletteColorLookupTableDescriptor,RedPaletteColorLookupTableData,RedPaletteColorLookupTableDescriptor,ContrastBolusIngredient,ContrastBolusIngredientConcentration,PatientOrientation
0,sub-003629,ses-003863,sub-003629_ses-003863_acq-fse_run-801_bp-prost...,T2w,3863,20190429.0,167.241257,"[0, 240, 176, 0]",8.0,110304.120000,...,,,,,,,,,,
1,sub-003629,ses-003863,sub-003629_ses-003863_acq-fse_run-601_bp-prost...,T2w,3863,20190429.0,156.429977,"[0, 240, 181, 0]",6.0,105601.790000,...,,,,,,,,,,
2,sub-003629,ses-003863,sub-003629_ses-003863_acq-fse_run-301_bp-prost...,T2w,3863,20190429.0,276.815826,"[256, 0, 0, 190]",3.0,104739.430000,...,,,,,,,,,,
3,sub-003629,ses-003863,sub-003629_ses-003863_acq-fse_run-501_bp-prost...,T2w,3863,20190429.0,125.143990,"[0, 224, 165, 0]",5.0,105347.510000,...,,,,,,,,,,
4,sub-005730,ses-006206,sub-005730_ses-006206_acq-propeller_run-7_bp-p...,T2w,6206,20160129.0,,"[0, 288, 288, 0]",1.0,134830.999486,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11162,sub-000003,ses-000003,sub-000003_ses-000003_acq-bvalue_run-12_bp-arm...,derivative/adc,3,20140307.0,,"[64, 0, 0, 64]",,84434.000000,...,,,,,,,,,,
11163,sub-000111,ses-000111,sub-000111_ses-000111_acq-bvalue_run-10_bp-abd...,derivative/adc,111,20150624.0,,"[64, 0, 0, 64]",,200541.000000,...,,,,,,,,,,
11164,sub-000111,ses-000111,sub-000111_ses-000111_acq-bvalue_run-11_bp-abd...,derivative/adc,111,20150624.0,,"[64, 0, 0, 64]",,201117.000000,...,,,,,,,,,,
11165,sub-000120,ses-000120,sub-000120_ses-000120_acq-bvalue_run-10_bp-abd...,derivative/adc,120,20140924.0,,"[64, 0, 0, 64]",,82807.000000,...,,,,,,,,,,


In [17]:
t2w_df.shape, dwi_df.shape, adc_df.shape

((32662, 131), (8036, 131), (11167, 140))

In [18]:
#Calculate percentage of t2w, dwi and adc images based on merged
t2w_perc = (t2w_df.shape[0]/merged_df.shape[0])*100
dwi_perc = (dwi_df.shape[0]/merged_df.shape[0])*100
adc_perc = (adc_df.shape[0]/merged_df.shape[0])*100

t2w_perc, dwi_perc, adc_perc

(62.97503133134098, 15.494071146245059, 21.53089752241396)

# DICOM Metadata Analysis

## T2w Images

In [37]:
manufacturer_counts = t2w_df["Manufacturer"].value_counts(normalize=True, dropna=True).to_dict()
manufacturer_counts = {key: manufacturer_counts[key] for key in sorted(manufacturer_counts)}
manufacturer_counts["GE"] = manufacturer_counts.pop("GE MEDICAL SYSTEMS")
manufacturer_counts["Philips"] += manufacturer_counts.pop("Philips Medical Systems")
manufacturer_counts["Siemens"] += manufacturer_counts.pop("SIEMENS") + manufacturer_counts.pop("Siemens HealthCare GmbH")
label, count = list(manufacturer_counts.keys()), list(manufacturer_counts.values())
plot_pie_simple(label,
                count, 
                title="Distribution of images by Manufacturer",
                size=(600,600),
                out_filename="images_by_manufacturer_t2w")

In [38]:
manufacturer_counts = t2w_df["ManufacturerModelName"].value_counts(normalize=False, dropna=True).to_dict()
manufacturer_counts["SIGNA Voyager"] += manufacturer_counts.pop("Signa Voyager")
manufacturer_counts = {key: manufacturer_counts[key] for key in sorted(manufacturer_counts, key=lambda x: manufacturer_counts.get(x), reverse=True)}
label, count = list(manufacturer_counts.keys()), list(manufacturer_counts.values())
plot_bar_simple(label, count, 
                title="Distribution of images by Manufacturer's Model Name",
                size=(600, 600),
                x_axis = "Model",
                out_filename="images_by_manufacturermodelname_t2w")

In [39]:
manufacturer_counts = t2w_df["MagneticFieldStrength"].value_counts(normalize=True, dropna=True).to_dict()
manufacturer_counts = {f"{key}T": manufacturer_counts[key] for key in sorted(manufacturer_counts)}
label, count = list(manufacturer_counts.keys()), list(manufacturer_counts.values())
plot_pie_simple(label, count, 
                title="Distribution of images by Magnetic Field Strength", 
                size=(600, 600),
                out_filename="images_by_fieldstrength_t2w")

## DWI Images

In [41]:
manufacturer_counts = dwi_df["Manufacturer"].value_counts(normalize=True, dropna=True).to_dict()
manufacturer_counts = {key: manufacturer_counts[key] for key in sorted(manufacturer_counts)}
manufacturer_counts["GE"] = manufacturer_counts.pop("GE MEDICAL SYSTEMS")
manufacturer_counts["Philips"] += manufacturer_counts.pop("Philips Medical Systems")
manufacturer_counts["Siemens"] += manufacturer_counts.pop("SIEMENS") + manufacturer_counts.pop("Siemens HealthCare GmbH")
label, count = list(manufacturer_counts.keys()), list(manufacturer_counts.values())
plot_pie_simple(label, count, title="Distribution of images by Manufacturer",  size=(600, 600), out_filename="images_by_manufacturer_dwi")

In [43]:
manufacturer_counts = dwi_df["ManufacturerModelName"].value_counts(normalize=False, dropna=True).to_dict()
manufacturer_counts = {key: manufacturer_counts[key] for key in sorted(manufacturer_counts, key=lambda x: manufacturer_counts.get(x), reverse=True)}
label, count = list(manufacturer_counts.keys()), list(manufacturer_counts.values())
plot_bar_simple(label, count, 
                title="Distribution of images by Manufacturer's Model Name",
                size=(600, 600),
                x_axis = "Model",
                out_filename="images_by_manufacturermodelname_dwi")

In [44]:
manufacturer_counts = dwi_df["MagneticFieldStrength"].value_counts(normalize=True, dropna=True).to_dict()
manufacturer_counts = {f"{key}T": manufacturer_counts[key] for key in sorted(manufacturer_counts)}
label, count = list(manufacturer_counts.keys()), list(manufacturer_counts.values())
plot_pie_simple(label, count, 
                title="Distribution of images by Magnetic Field Strength", 
                size=(600, 600),
                out_filename="images_by_fieldstrength_dwi")

## ADC Images

In [47]:
manufacturer_counts = adc_df["Manufacturer"].value_counts(normalize=True, dropna=True).to_dict()
manufacturer_counts = {key: manufacturer_counts[key] for key in sorted(manufacturer_counts)}
manufacturer_counts["GE"] = manufacturer_counts.pop("GE MEDICAL SYSTEMS")
manufacturer_counts["Philips"] += manufacturer_counts.pop("Philips Medical Systems")
manufacturer_counts["Siemens"] += manufacturer_counts.pop("SIEMENS")# + manufacturer_counts.pop("Siemens HealthCare GmbH")
label, count = list(manufacturer_counts.keys()), list(manufacturer_counts.values())
plot_pie_simple(label, count, title="Distribution of images by Manufacturer",  size=(600, 600), out_filename="images_by_manufacturer_adc")

In [48]:
manufacturer_counts = adc_df["ManufacturerModelName"].value_counts(normalize=False, dropna=True).to_dict()
manufacturer_counts = {key: manufacturer_counts[key] for key in sorted(manufacturer_counts, key=lambda x: manufacturer_counts.get(x), reverse=True)}
label, count = list(manufacturer_counts.keys()), list(manufacturer_counts.values())
plot_bar_simple(label, count, 
                title="Distribution of images by Manufacturer's Model Name",
                size=(600, 600),
                x_axis = "Model",
                out_filename="images_by_manufacturermodelname_adc")

In [49]:
manufacturer_counts = adc_df["MagneticFieldStrength"].value_counts(normalize=True, dropna=True).to_dict()
manufacturer_counts = {f"{key}T": manufacturer_counts[key] for key in sorted(manufacturer_counts)}
label, count = list(manufacturer_counts.keys()), list(manufacturer_counts.values())
plot_pie_simple(label, count, 
                title="Distribution of images by Magnetic Field Strength", 
                size=(600, 600),
                out_filename="images_by_fieldstrength_adc")

## T2w+DWI+ADC images

In [50]:
complete_df = pd.concat([t2w_df, dwi_df, adc_df])

In [52]:
manufacturer_counts = complete_df["Manufacturer"].value_counts(normalize=True, dropna=True).to_dict()
manufacturer_counts = {key: manufacturer_counts[key] for key in sorted(manufacturer_counts)}
manufacturer_counts["GE"] = manufacturer_counts.pop("GE MEDICAL SYSTEMS")
manufacturer_counts["Philips"] += manufacturer_counts.pop("Philips Medical Systems")
manufacturer_counts["Siemens"] += manufacturer_counts.pop("SIEMENS") + manufacturer_counts.pop("Siemens HealthCare GmbH")
label, count = list(manufacturer_counts.keys()), list(manufacturer_counts.values())
plot_pie_simple(label, count, title="Distribution of images by Manufacturer",  size=(600, 600), out_filename="images_by_manufacturer_complete")

In [53]:
manufacturer_counts = complete_df["ManufacturerModelName"].value_counts(normalize=False, dropna=True).to_dict()
manufacturer_counts = {key: manufacturer_counts[key] for key in sorted(manufacturer_counts, key=lambda x: manufacturer_counts.get(x), reverse=True)}
label, count = list(manufacturer_counts.keys()), list(manufacturer_counts.values())
plot_bar_simple(label, count, 
                title="Distribution of images by Manufacturer's Model Name",
                size=(600, 600),
                x_axis = "Model",
                out_filename="images_by_manufacturermodelname_complete")

In [54]:
manufacturer_counts = complete_df["MagneticFieldStrength"].value_counts(normalize=True, dropna=True).to_dict()
manufacturer_counts = {f"{key}T": manufacturer_counts[key] for key in sorted(manufacturer_counts)}
label, count = list(manufacturer_counts.keys()), list(manufacturer_counts.values())
plot_pie_simple(label, count, 
                title="Distribution of images by Magnetic Field Strength", 
                size=(600, 600),
                out_filename="images_by_fieldstrength_complete")

# Images Analysis

## T2w

In [57]:
path_p0052021 = "/mnt/ceib/datalake/FISABIO_datalake/prueba/p0052021_reborn"
path_p0042021 = "/mnt/ceib/datalake/FISABIO_datalake/prueba/p0042021"

images_paths = t2w_df.copy().apply(lambda x: os.path.join(path_p0052021, x['subject'], x['session'],'mim-mr','anat', x['image']) if 'p0052021' in x["PatientComments"] else os.path.join(path_p0042021, x['subject'], x['session'],'mim-mr','anat', x['image']), axis=1)

# Divide into axial, coronal and sagital images
data_list = []
for path in images_paths.values:
    files = {}
    image_name = path.split('/')[-1]
    if 'sag' in image_name:
        files['sagital'] = path
    elif 'cor' in image_name:
        files['coronal'] = path
    elif 'ax' in image_name:
        files['axial'] = path
    data_list.append(files)

transforms = monai.transforms.Compose(
            [
                monai.transforms.LoadImaged(keys=["sagital", "coronal", "axial"],allow_missing_keys=True,image_only=False),
                ]
)

train_ds = monai.data.Dataset(
    data=data_list,
    transform=transforms,
)

In [59]:
t2w_df

Unnamed: 0,subject,session,image,modality,AccessionNumber,AcquisitionDate,AcquisitionDuration,AcquisitionMatrix,AcquisitionNumber,AcquisitionTime,...,PixelAspectRatio,AcquisitionDateTime,BurnedInAnnotation,PatientIdentityRemoved,PixelPaddingValue,ContentQualification,ContrastBolusVolume,WindowCenterWidthExplanation,TimeOfLastCalibration,PlanarConfiguration
0,sub-003629,ses-003863,sub-003629_ses-003863_acq-fse_run-801_bp-prost...,T2w,3863,20190429.0,167.241257,"[0, 240, 176, 0]",8.0,110304.120000,...,,,,,,,,,,
1,sub-003629,ses-003863,sub-003629_ses-003863_acq-fse_run-601_bp-prost...,T2w,3863,20190429.0,156.429977,"[0, 240, 181, 0]",6.0,105601.790000,...,,,,,,,,,,
2,sub-003629,ses-003863,sub-003629_ses-003863_acq-fse_run-301_bp-prost...,T2w,3863,20190429.0,276.815826,"[256, 0, 0, 190]",3.0,104739.430000,...,,,,,,,,,,
3,sub-003629,ses-003863,sub-003629_ses-003863_acq-fse_run-501_bp-prost...,T2w,3863,20190429.0,125.143990,"[0, 224, 165, 0]",5.0,105347.510000,...,,,,,,,,,,
4,sub-005730,ses-006206,sub-005730_ses-006206_acq-propeller_run-7_bp-p...,T2w,6206,20160129.0,,"[0, 288, 288, 0]",1.0,134830.999486,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
32657,sub-000003,ses-000003,sub-000003_ses-000003_acq-fse_run-8_bp-arm_vp-...,T2w,3,20140307.0,,"[0, 320, 256, 0]",1.0,82739.000000,...,,,,,,,,,,
32658,sub-000111,ses-000111,sub-000111_ses-000111_acq-fse_run-5_bp-abdomen...,T2w,111,20150624.0,,"[0, 320, 256, 0]",1.0,194350.000000,...,,,,,,,,,,
32659,sub-000111,ses-000111,sub-000111_ses-000111_acq-fse_run-8_bp-abdomen...,T2w,111,20150624.0,,"[320, 0, 0, 224]",1.0,195541.000000,...,,,,,,,,,,
32660,sub-000120,ses-000120,sub-000120_ses-000120_acq-fse_run-8_bp-abdomen...,T2w,120,20140924.0,,"[320, 0, 0, 224]",1.0,81911.000000,...,,,,,,,,,,


### Size Variability

In [62]:
image_sizes_axial = []
image_sizes_coronal = []
image_sizes_sagital = []
for image in tqdm(train_ds):
    if 'sagital' in image.keys():
        image_sizes_sagital.append(list(image['sagital'].shape))
    if 'coronal' in image.keys():
        image_sizes_coronal.append(list(image['coronal'].shape))
    if 'axial' in image.keys():
        image_sizes_axial.append(list(image['axial'].shape))

  0%|          | 0/32662 [00:00<?, ?it/s]

In [63]:
widths_axial = [size[0] for size in image_sizes_axial]
heights_axial = [size[1] for size in image_sizes_axial]
depths_axial = [size[2] for size in image_sizes_axial]

widths_coronal = [size[0] for size in image_sizes_coronal]
heights_coronal = [size[1] for size in image_sizes_coronal]
depths_coronal = [size[2] for size in image_sizes_coronal]

widths_sagital = [size[0] for size in image_sizes_sagital]
heights_sagital = [size[1] for size in image_sizes_sagital]
depths_sagital = [size[2] for size in image_sizes_sagital]

In [64]:
mean_width_axial = np.mean(widths_axial)
std_width_axial = np.std(widths_axial)
mean_height_axial = np.mean(heights_axial)
std_height_axial = np.std(heights_axial)
mean_depth_axial = np.mean(depths_axial)
std_depth_axial = np.std(depths_axial)

mean_width_coronal = np.mean(widths_coronal)
std_width_coronal = np.std(widths_coronal)
mean_height_coronal = np.mean(heights_coronal)
std_height_coronal = np.std(heights_coronal)
mean_depth_coronal = np.mean(depths_coronal)
std_depth_coronal = np.std(depths_coronal)

mean_width_sagital = np.mean(widths_sagital)
std_width_sagital = np.std(widths_sagital)
mean_height_sagital = np.mean(heights_sagital)
std_height_sagital = np.std(heights_sagital)
mean_depth_sagital = np.mean(depths_sagital)
std_depth_sagital = np.std(depths_sagital)

In [66]:
x_ticks = ['Width', 'Height', 'Depth']  # Base positions for each group of bars
y_axis_title = 'Size (pixels)'
#random order of colors
colors_sub = np.random.choice(colors, 3, replace=False)
plot_variability_plotly_grouped( {'Axial': [mean_width_axial, mean_height_axial, mean_depth_axial],
                                      'Coronal': [mean_width_coronal, mean_height_coronal, mean_depth_coronal],
                                      'Sagital': [mean_width_sagital, mean_height_sagital, mean_depth_sagital]},
                                    {'Axial': [std_width_axial, std_height_axial, std_depth_axial],
                                      'Coronal': [std_width_coronal, std_height_coronal, std_depth_coronal],
                                      'Sagital': [std_width_sagital, std_height_sagital, std_depth_sagital]},
                                    ['Axial', 'Coronal', 'Sagital'],
                                    colors_sub,
                                    'size_T2w_grouped',
                                    x_ticks,
                                    y_axis_title,
                                    save_as_pdf=True)

### In Plane Reslution (Spacing)

In [67]:
image_spacings_axial = []
image_spacings_coronal = []
image_spacings_sagital = []

for image in tqdm(train_ds):
    if 'sagital' in image.keys():
        image_spacings_sagital.append(list(image['sagital_meta_dict']['pixdim'][1:4]))
    if 'coronal' in image.keys():
        image_spacings_coronal.append(list(image['coronal_meta_dict']['pixdim'][1:4]))
    if 'axial' in image.keys():
        image_spacings_axial.append(list(image['axial_meta_dict']['pixdim'][1:4]))

  0%|          | 0/32662 [00:00<?, ?it/s]

In [68]:
xs_axial = [dim[0] for dim in image_spacings_axial]
ys_axial = [dim[1] for dim in image_spacings_axial]
zs_axial = [dim[2] for dim in image_spacings_axial]

xs_coronal = [dim[0] for dim in image_spacings_coronal]
ys_coronal = [dim[1] for dim in image_spacings_coronal]
zs_coronal = [dim[2] for dim in image_spacings_coronal]

xs_sagital = [dim[0] for dim in image_spacings_sagital]
ys_sagital = [dim[1] for dim in image_spacings_sagital]
zs_sagital = [dim[2] for dim in image_spacings_sagital]

In [69]:
mean_x_axial = np.mean(xs_axial)
std_x_axial = np.std(xs_axial)
mean_y_axial = np.mean(ys_axial)
std_y_axial = np.std(ys_axial)
mean_z_axial = np.mean(zs_axial)
std_z_axial = np.std(zs_axial)

mean_x_coronal = np.mean(xs_coronal)
std_x_coronal = np.std(xs_coronal)
mean_y_coronal = np.mean(ys_coronal)
std_y_coronal = np.std(ys_coronal)
mean_z_coronal = np.mean(zs_coronal)
std_z_coronal = np.std(zs_coronal)

mean_x_sagital = np.mean(xs_sagital)
std_x_sagital = np.std(xs_sagital)
mean_y_sagital = np.mean(ys_sagital)
std_y_sagital = np.std(ys_sagital)
mean_z_sagital = np.mean(zs_sagital)
std_z_sagital = np.std(zs_sagital)

In [70]:
import random
x_ticks = ['X', 'Y', 'Z']  # Base positions for each group of bars
y_axis_title = 'Spacing [mm]'
colors_sub = np.random.choice(colors, 3, replace=False)
plot_variability_plotly_grouped( {'Axial': [mean_x_axial, mean_y_axial, mean_z_axial],
                                'Coronal': [mean_x_coronal, mean_y_coronal, mean_z_coronal],
                                'Sagital': [mean_x_sagital, mean_y_sagital, mean_z_sagital]},
                                {'Axial': [std_x_axial, std_y_axial, std_z_axial],
                                'Coronal': [std_x_coronal, std_y_coronal, std_z_coronal],
                                'Sagital': [std_x_sagital, std_y_sagital, std_z_sagital]},
                                ['Axial', 'Coronal', 'Sagital'],
                                colors_sub,
                                'spacing_T2w_grouped',
                                x_ticks,
                                y_axis_title,
                                save_as_pdf=True)

## DWI and ADC

In [77]:
def construct_adc_path(row, path_p0052021, path_p0042021):
    if row["modality"] == "adc":
        if 'p0052021' in row["PatientComments"]:
            return os.path.join(path_p0052021, row['subject'], row['session'], 'mim-mr', 'dwi', row['image']) 
        else:
            return os.path.join(path_p0042021, row['subject'], row['session'], 'mim-mr', 'dwi', row['image']) 
    else:
        if 'p0052021' in row["PatientComments"]:
            return os.path.join(path_p0052021, "derivatives", "creating_adc", row['subject'], row['session'], 'mim-mr', 'dwi', row['image'])  
        else:
            return os.path.join(path_p0042021, "derivatives", "creating_adc", row['subject'], row['session'], 'mim-mr', 'dwi', row['image'])

In [80]:
path_p0052021 = "/mnt/ceib/datalake/FISABIO_datalake/prueba/p0052021_reborn"
path_p0042021 = "/mnt/ceib/datalake/FISABIO_datalake/prueba/p0042021"

images_paths_dwi = dwi_df.copy().apply(lambda x: os.path.join(path_p0052021, x['subject'], x['session'],'mim-mr','dwi', x['image']) if 'p0052021' in x["PatientComments"] else os.path.join(path_p0042021, x['subject'], x['session'],'mim-mr','dwi', x['image']), axis=1)

images_paths_adc = adc_df.copy().apply(
    lambda x: construct_adc_path(x, path_p0052021, path_p0042021),
    axis=1
)

images_paths = pd.concat([images_paths_dwi, images_paths_adc])

# Divide into axial, coronal and sagital images
data_list = []
for path in images_paths.values:
    files = {}
    image_name = path.split('/')[-1]
    if 'adc' in image_name:
        files['adc'] = path
    elif 'bvalue' in image_name:
        files['dwi'] = path
    data_list.append(files)

transforms = monai.transforms.Compose(
            [
                monai.transforms.LoadImaged(keys=["adc", "dwi"],allow_missing_keys=True,image_only=False),
                ]
)

train_ds = monai.data.Dataset(
    data=data_list,
    transform=transforms,
)

### Size Variability

In [82]:
image_sizes_dwi = []
image_sizes_adc = []
for image in tqdm(train_ds):
    if 'dwi' in image.keys():
        image_sizes_dwi.append(list(image['dwi'].shape))
    if 'adc' in image.keys():
        image_sizes_adc.append(list(image['adc'].shape))

  0%|          | 0/19203 [00:00<?, ?it/s]

In [83]:
widths_dwi = [size[0] for size in image_sizes_dwi]
heights_dwi = [size[1] for size in image_sizes_dwi]
depths_dwi = [size[2] for size in image_sizes_dwi]

widths_adc = [size[0] for size in image_sizes_adc]
heights_adc = [size[1] for size in image_sizes_adc]
depths_adc = [size[2] for size in image_sizes_adc]

In [84]:
mean_width_dwi = np.mean(widths_dwi)
std_width_dwi = np.std(widths_dwi)
mean_height_dwi = np.mean(heights_dwi)
std_height_dwi = np.std(heights_dwi)
mean_depth_dwi = np.mean(depths_dwi)
std_depth_dwi = np.std(depths_dwi)

mean_width_adc = np.mean(widths_adc)
std_width_adc = np.std(widths_adc)
mean_height_adc = np.mean(heights_adc)
std_height_adc = np.std(heights_adc)
mean_depth_adc = np.mean(depths_adc)
std_depth_adc = np.std(depths_adc)



In [85]:
x_ticks = ['Width', 'Height', 'Depth']  # Base positions for each group of bars
y_axis_title = 'Size (pixels)'
#random order of colors
colors_sub = np.random.choice(colors, 3, replace=False)
plot_variability_plotly_grouped( {'DWI': [mean_width_dwi, mean_height_dwi, mean_depth_dwi],
                                      'ADC': [mean_width_adc, mean_height_adc, mean_depth_adc]},
                                    {'DWI': [std_width_dwi, std_height_dwi, std_depth_dwi],
                                      'ADC': [std_width_adc, std_height_adc, std_depth_adc]},
                                    ['DWI', 'ADC'],
                                    colors_sub,
                                    'size_DWI_ADC_grouped',
                                    x_ticks,
                                    y_axis_title,
                                    save_as_pdf=True)

### In plane Resolution

In [86]:
image_spacings_adc = []
image_spacings_dwi = []

for image in tqdm(train_ds):
    if 'dwi' in image.keys():
        image_spacings_dwi.append(list(image['dwi_meta_dict']['pixdim'][1:4]))
    if 'adc' in image.keys():
        image_spacings_adc.append(list(image['adc_meta_dict']['pixdim'][1:4]))
    

  0%|          | 0/19203 [00:00<?, ?it/s]

In [87]:
xs_dwi = [dim[0] for dim in image_spacings_dwi]
ys_dwi = [dim[1] for dim in image_spacings_dwi]
zs_dwi = [dim[2] for dim in image_spacings_dwi]

xs_adc = [dim[0] for dim in image_spacings_adc]
ys_adc = [dim[1] for dim in image_spacings_adc]
zs_adc = [dim[2] for dim in image_spacings_adc]

In [88]:
mean_x_dwi = np.mean(xs_dwi)
std_x_dwi = np.std(xs_dwi)
mean_y_dwi = np.mean(ys_dwi)
std_y_dwi = np.std(ys_dwi)
mean_z_dwi = np.mean(zs_dwi)
std_z_dwi = np.std(zs_dwi)

mean_x_adc = np.mean(xs_adc)
std_x_adc = np.std(xs_adc)
mean_y_adc = np.mean(ys_adc)
std_y_adc = np.std(ys_adc)
mean_z_adc = np.mean(zs_adc)
std_z_adc = np.std(zs_adc)


In [89]:
plot_variability_plotly_grouped( {'DWI': [mean_x_dwi, mean_y_dwi, mean_z_dwi],
                                'ADC': [mean_x_adc, mean_y_adc, mean_z_adc]},
                                {'DWI': [std_x_dwi, std_y_dwi, std_z_dwi],
                                'ADC': [std_x_adc, std_y_adc, std_z_adc]},
                                ['DWI', 'ADC'],
                                colors_sub,
                                'spacing_DWI_ADC_grouped',
                                x_ticks,
                                y_axis_title,
                                save_as_pdf=True)

## Histogram by Manufacturer

# Clinical Variables Analysis

In [110]:
clinical = pd.read_csv("/home/jaalzate/Prostate_Cancer_TFM/Files/Own_data/Clinical_Variables.tsv", sep="\t")

In [114]:
clinical['label_session'].nunique()

10308

In [91]:
csPC_counts = clinical["csPC"].value_counts(dropna=False).to_dict()
csPC_counts = {key: csPC_counts[key] for key in sorted(csPC_counts)}
label, count = [i for i in range(len(csPC_counts.keys()))], list(csPC_counts.values())
plot_bar_simple(label, count, 
                title="Distribution of Clinically Significant Prostate Cancer label",
                x_axis = "Value", 
                variable_names = list(csPC_counts.keys()),
                size=(800, 400),
                out_filename="distribution_csPC")

In [92]:
pir_counts = clinical["PIR"].value_counts(dropna=False).to_dict()
pir_counts = {key: pir_counts[key] for key in sorted(pir_counts)}
label, count = [i for i in range(len(pir_counts.keys()))], list(pir_counts.values())
plot_bar_simple(label, count, 
                title="Distribution of PI-RADS level",
                x_axis = "Level", 
                variable_names = list(pir_counts.keys()),
                size=(800, 400),
                out_filename="distribution_pirads")

In [93]:
tb_counts = clinical["TB"].value_counts(dropna=False).to_dict()
tb_counts = {key: tb_counts[key] for key in sorted(tb_counts)}
label, count = [i for i in range(len(tb_counts.keys()))], list(tb_counts.values())
plot_bar_simple(label, count, 
                title="Distribution of Post-MRI Biopsies",
                x_axis = "Amount", 
                variable_names = list(tb_counts.keys()),
                size=(800, 400),
                out_filename="distribution_tb")

In [94]:
plot_box_simple("Age", clinical["ED"], 
                title="Box Plot for Patient Age", 
                x_axis = "Years",
                size=(800, 400), 
                out_filename="boxplot_age")

In [95]:
plot_box_simple("PSA", clinical["PSA"], 
                title="Box Plot for Prostate-specific Antigen",
                x_axis = "ng/mL",
                size=(800, 400), 
                out_filename="boxplot_psa")

In [96]:
filtered_psa = clinical["PSA"].loc[clinical["PSA"] < clinical["PSA"].quantile(0.99)]
plot_box_simple("PSA", filtered_psa, 
                title="Box Plot for Prostate-specific Antigen",
                x_axis = "ng/mL",
                size=(800, 400), 
                out_filename="boxplot_filtered_psa")

In [97]:
dep_counts = clinical["dep"].value_counts(normalize=True, dropna=True).to_dict()
dep_counts = {key: dep_counts[key] for key in sorted(dep_counts)}
label, count = list(dep_counts.keys()), list(dep_counts.values())
plot_pie_simple(label, count, 
                title="Distribution of images by Source Department", 
                size=(600, 600),
                out_filename="images_by_sourcedepartment")

In [98]:
import numpy as np
import plotly.express as px
from datetime import datetime, date

dates = list(clinical["F_RM"].map(lambda x: datetime.strptime(x,"%Y-%m-%d").date()).values)

to_timestamp = np.vectorize(lambda x: (x - date(1970, 1, 1)).total_seconds())
from_timestamp = np.vectorize(lambda x: datetime.utcfromtimestamp(x))

## Compute the histogram
hist, bin_edges = np.histogram(to_timestamp(dates), bins=20)

bins = 0.5 * (bin_edges[:-1] + bin_edges[1:])

# Create traces
trace = go.Bar(x=from_timestamp(bins), y=hist, text=hist, textposition="outside", marker_color=colors[0])

# Create layout
layout = go.Layout(
    title='Images by Acquisition Date',
    xaxis_title='Date',
    yaxis_title='Frequency'
)

# Create figure and add traces
fig = go.Figure(data=trace, layout=layout)

fig.update_layout(
    template='plotly_white',
    height=500
)

# Show figure
fig.show()

fig.write_image(os.path.join(figures_dir,"images_by_acq_date."+extension),scale=6)

In [99]:
filtered_vp = clinical["VP"].loc[clinical["VP"] < clinical["VP"].quantile(0.9)]
plot_box_simple("PV", filtered_vp, 
                title="Box Plot for Prostate Volume",
                x_axis = f"cm\N{SUPERSCRIPT THREE}",
                size=(800, 400), 
                out_filename="boxplot_filtered_vp")

## Extract prostate volume from masks

In [104]:
def get_seg_prostate_volume(subject, session):
    transforms = monai.transforms.Compose([monai.transforms.LoadImaged(keys=["seg"],allow_missing_keys=True,image_only=False)])
    
    #base_path_p0042021 = f"/mnt/ceib/datalake/FISABIO_datalake/p0042021/derivatives/prostate_segmentation/sub-{subject:06d}/ses-{session:06d}/mim-mr/anat/sub-{subject:06d}_ses-{session:06d}_*-ax_mod-T2w_desc-SEG.nii.gz"
    base_path = f"/mnt/ceib/datalake/FISABIO_datalake/prueba/p0052021_reborn/derivatives/prostate_segmentation/sub-{subject:06d}/ses-{session:06d}/mim-mr/anat/sub-{subject:06d}_ses-{session:06d}_*-ax_mod-T2w_desc-SEG.nii.gz"

    data = [{"seg": filepath} for filepath in [path for path in glob(base_path)]]
    if not data:
        return None
    volume = []
    for segmentation in data:
        transformed_data = transforms(segmentation)
        seg = transformed_data['seg']
        res = np.prod(list(transformed_data['seg_meta_dict']['pixdim'][1:4])) / (10**3) # cm3
        volume.append(res * np.sum(seg))
    return np.mean(volume)

In [105]:
clinical["VP_segmentation"] = clinical.apply(lambda x: get_seg_prostate_volume(x.label_subject, x.label_session), axis=1)
# plot_box_simple("PV", clinical["VP_segmentation"], 
#                 title="Box Plot for Prostate Volume (computed from automatic segmentation)",
#                 x_axis = f"cm\N{SUPERSCRIPT THREE}",
#                 size=(800, 400), 
#                 out_filename="boxplot_vp_segmentation")

In [108]:
plot_box_simple("PV", clinical["VP_segmentation"], 
                title="Box Plot for Prostate Volume (computed from automatic segmentation)",
                x_axis = f"cm\N{SUPERSCRIPT THREE}",
                size=(800, 400), 
                out_filename="boxplot_vp_segmentation")