See the agreement between DSC and subjective ratings

Managing dataframes

In [None]:
import pandas as pd
import seaborn as sns
from matplotlib import pyplot as plt
import numpy as np

In [None]:
# dsc_df = pd.read_csv('/mnt/sda1/Repos/a-eye/Data/new_manual_annotations/similarity/sim_manual_nnunet_N43.csv')
dsc_df = pd.read_csv('/mnt/sda1/Repos/a-eye/Data/new_manual_annotations/similarity/sim_manual_atlas_N43.csv')

In [None]:
dsc_N4_df = dsc_df.iloc[:4]

In [None]:
# remove first 4 rows
# dsc_df = dsc_df.iloc[4:] # we don't have ratings of those 4 subjects
# reset index
# dsc_df = dsc_df.reset_index(drop=True)
# type string
dsc_df['Subject'] = dsc_df['Subject'].astype(str)

Statistics from dsc

In [None]:
# mean
dsc_mean = dsc_df['DSC_all'].mean()
print('Mean DSC_all:', dsc_mean)
# std
dsc_std = dsc_df['DSC_all'].std()
print('Std DSC_all:', dsc_std)
# variance
dsc_var = dsc_df['DSC_all'].var()
print('Variance DSC_all:', dsc_var)

Add those 4 subjects to the dataframe

In [None]:
# dsc_df = pd.concat(dsc_df, )

In [None]:
ratings_df = pd.read_excel('/mnt/sda1/Repos/a-eye/Output/mri_qc/scores.xlsx', sheet_name='brainmask_avg_data')
# remove '.nii.gz' for all the elements in column 'subject'
ratings_df['subject'] = ratings_df['subject'].str.replace('.nii.gz', '')

Add those 4 subjects to the dataframe

In [None]:
# add a row to ratings_df containing 'subject', and 'exclusion' 
ratings_df = ratings_df.append({'subject': '814997', 'rating': 3}, ignore_index=True)
ratings_df = ratings_df.append({'subject': '814999', 'rating': 3}, ignore_index=True)
ratings_df = ratings_df.append({'subject': '815173', 'rating': 2.5}, ignore_index=True)
ratings_df = ratings_df.append({'subject': '815262', 'rating': 3.3}, ignore_index=True)

In [None]:
# combine both dataframes where the subject id matches: dsc_df['Subject'] == ratings_df['subject']
merged_df = dsc_df.merge(ratings_df, left_on='Subject', right_on='subject')

In [None]:
# only keep 'Subject', 'DSC_all', and 'rating' columns from merged_df
print(merged_df[['Subject', 'DSC_all', 'rating']])

Plot

In [None]:
import plotly.express as px
from scipy.stats import shapiro

# normalize 'rating' column
# merged_df['rating'] = merged_df['rating'] / 4

# tissues
tissues = ['all','lens','globe','nerve','int_fat','ext_fat','lat_mus','med_mus','inf_mus','sup_mus']

# plot
for i in tissues:
    # check the normality of the data of DSC_i using Shapiro-Wilk test
    stat, p1 = shapiro(merged_df[f'DSC_{i}'])
    print(f'Statistics={stat}, p={p1}')
    # interpret
    alpha = 0.05
    if p1 > alpha:
        print(f'DSC_{i} sample looks Gaussian (fail to reject H0)')
    else:
        print(f'DSC_{i} sample does not look Gaussian (reject H0)')
    
    # check the normality of the data of rating using Shapiro-Wilk test
    stat, p2 = shapiro(merged_df['rating'])
    print(f'Statistics={stat}, p={p2}')
    # interpret
    alpha = 0.05
    if p2 > alpha:
        print(f'Rating sample looks Gaussian (fail to reject H0)')
    else:
        print(f'Rating sample does not look Gaussian (reject H0)')

    # compute r2 correlation between DSC_all and rating
    if p1 > alpha and p2 > alpha:
        print(f'Pearson correlation between DSC_{i} and rating: {merged_df[f"DSC_{i}"].corr(merged_df["rating"])**2}')
    else:
        print(f'Spearman correlation between DSC_{i} and rating: {merged_df[f"DSC_{i}"].corr(merged_df["rating"], method="spearman")**2}')

    print('\n')
    
    # plot
    fig = px.scatter(merged_df, x='rating', y=f'DSC_{i}')
    fig.update_layout(title=f'DSC_{i} vs Rating N={len(merged_df)}', xaxis_title='Rating', yaxis_title=f'DSC_{i}', xaxis_range=[0, 4], yaxis_range=[0, 1])
    # save as png
    # fig.write_image(f'/home/jaimebarranco/Desktop/new_manual_annotations/agreement_with_ratings/N43/atlas/DSC_{i}_vs_Rating.png')
    
    # fig.show()


correlation

In [None]:
# compute r2 correlation between DSC_all and rating
r2 = merged_df['DSC_all'].corr(merged_df['rating'])**2
print('R2:', r2)