In [10]:
import pandas as pd
from scipy.stats import zscore
import re
import seaborn as sns
import matplotlib.pyplot as plt
% pylab inline

Populating the interactive namespace from numpy and matplotlib


In [11]:
the_mri_tp = 2
the_mri_bin_width = 5
the_pet_bin_width = 0.1

rootdir = '/Users/shuang/Documents/Proj_Radiomics/Data/her2'
im_dir = '{}/her2_Analysis/PETMRI/PETbinwidth{:.1f}_MRItp{}_binwidth{}'.format(rootdir,the_pet_bin_width, the_mri_tp, the_mri_bin_width)
fname = '{}/data_all.csv'.format(im_dir)
df_data = pd.read_csv(fname)

pat = re.compile('texture_|FOstats_|ShapeSize_')
feat_names = [ss for ss in df_data.columns.tolist() if pat.match(ss)]
feat_tag = 'pet_mr_radiomics'

# scale the features to z-score
df_data[feat_names] = df_data[feat_names].apply(zscore)

df_yr = range(1,6,1)
outcome_name_lst = ['DF_{}yr'.format(yy) for yy in df_yr]

In [4]:
# explore correlation between PET and MR features
sns.set(style="white")

from string import ascii_letters
rs = np.random.RandomState(33)
d = pd.DataFrame(data=rs.normal(size=(100, 26)),
                 columns=list(ascii_letters[26:]))

# Compute the correlation matrix
corr = d.corr()

In [35]:
the_df = df_data[feat_names]
the_df.reset_index(inplace=True)

# label the data accordingly to evaluate correlation between PET vs MR features
df_melt = pd.melt(the_df, id_vars=['index'], value_vars=feat_names)

# select PET and MR specific

df_melt['feat_name'] = df_melt['variable'].map(lambda x: re.search('(.+)_(\w+)',x).group(1) if re.search('(.+)_(\w+)',x) else np.nan)
df_melt['modality'] = df_melt['variable'].map(lambda x: re.search('(.+)_(\w+)',x).group(2) if re.search('(.+)_(\w+)',x) else np.nan)

df_mri = df_melt.ix[df_melt['modality'] == 'mri', :]
df_mri = df_mri.pivot(index='index', columns='feat_name', values='value')
radiomic_feats = df_mri.columns.tolist()
print(df_mri.columns)

df_pet = df_melt.ix[df_melt['modality'] == 'pet', :]
df_pet = df_pet.pivot(index='index', columns='feat_name', values='value')
df_pet = df_pet.ix[:, radiomic_feats]
print(df_pet.columns)


Index(['FOstats_energy', 'FOstats_entropy', 'FOstats_kurtosis', 'FOstats_max',
       'FOstats_mean', 'FOstats_min', 'FOstats_skewness', 'FOstats_uniformity',
       'FOstats_variance', 'ShapeSize_compactness1', 'ShapeSize_compactness2',
       'ShapeSize_max_euc_dis', 'ShapeSize_spherical_disproportion',
       'ShapeSize_sphericity', 'ShapeSize_surf_area_cm2',
       'ShapeSize_surface2volratio', 'ShapeSize_vol_cm3',
       'texture_autocorrelation_avg', 'texture_avg_intensity_avg',
       'texture_cluster_prominence_avg', 'texture_cluster_shade_avg',
       'texture_cluster_tendency_avg', 'texture_contrast_avg',
       'texture_correlation_avg', 'texture_diff_avg_avg',
       'texture_diff_entropy_avg', 'texture_diff_var_avg',
       'texture_dissimilarity_avg', 'texture_energy_avg',
       'texture_entropy_avg', 'texture_homogeneity1_avg',
       'texture_homogeneity2_avg', 'texture_idmn_avg', 'texture_idn_avg',
       'texture_imc1_avg', 'texture_imc2_avg', 'texture_inv_var_avg',


In [37]:
sns.set(style="white")
cmap = sns.diverging_palette(220, 10, as_cmap=True)

corr = df_mri.corrwith(df_pet)
print(corr)

#TODO: maybe select a subset of df.corr() to plot for radiomics between PET and MR instead of doing df_mri.corrwith(df_pet)


# sns.heatmap(corr, cmap=cmap, vmax=.3, center=0,square=True, linewidths=.5, cbar_kws={"shrink": .5})

feat_name
FOstats_energy                      -0.041510
FOstats_entropy                      0.140521
FOstats_kurtosis                    -0.015174
FOstats_max                          0.090279
FOstats_mean                        -0.006514
FOstats_min                         -0.120271
FOstats_skewness                     0.011396
FOstats_uniformity                   0.125474
FOstats_variance                    -0.040290
ShapeSize_compactness1               0.676526
ShapeSize_compactness2               0.182927
ShapeSize_max_euc_dis                0.446434
ShapeSize_spherical_disproportion    0.235068
ShapeSize_sphericity                 0.227526
ShapeSize_surf_area_cm2              0.563796
ShapeSize_surface2volratio           0.329349
ShapeSize_vol_cm3                    0.630621
texture_autocorrelation_avg         -0.063262
texture_avg_intensity_avg           -0.010339
texture_cluster_prominence_avg      -0.011068
texture_cluster_shade_avg            0.105885
texture_cluster_tendency

In [31]:
df_mri.columns

Index(['index', 'variable', 'value', 'feat_name', 'modality'], dtype='object')