In [1]:
import pandas as pd
import numpy as np
import glob
import re
from math import log, sqrt

In [2]:
# gather all radiomic data from primary and recurred tumors
rootdir = '/Users/shuang/Documents/Proj_Radiomics/Data/her2'

# get radiomics of all the primary tumor data
fname1 = '{}/her2_Analysis/PETMRI/PETbinwidth0.1_MRItp2_binwidth5/data_all.csv'.format(rootdir)
df_prim_all = pd.read_csv(fname1)

# print(df_prim_all.columns.tolist())

# find all PET radiomics
pat = re.compile('_pet')
feat_names = [ss for ss in df_prim_all.columns.tolist() if re.search('([\w.]+)_pet',ss)]
new_feat_names = [re.search('([\w.]+)_pet', ss).group(1) for ss in df_prim_all.columns.tolist() if re.search('([\w.]+)_pet', ss)]
newer_feat_names = [re.search('([\w.]+)_avg', ss).group(1) if re.search('([\w.]+)_avg', ss) else ss for ss in new_feat_names]

the_col_names = feat_names + ['ptid_side']
df_prim = df_prim_all.loc[:,the_col_names]

# change feature name
col_dict = dict(zip(feat_names, newer_feat_names))
df_prim.rename(col_dict, axis='columns',inplace=True)
df_prim['tumor_type'] = 'Primary'
# print(df_prim.columns.tolist())


json_dir = '{}/her2_ImageFeatures/IsoVoxelSize'.format(rootdir)
all_jsons = glob.glob('{}/*.json'.format(json_dir))

df_recur = pd.DataFrame()
for jj in all_jsons:
    df_tmp = pd.read_json(jj)
    df_recur = df_recur.append(df_tmp, ignore_index=True)
df_recur['FOstats_min'] = df_recur['FOstats_minmax'].apply(lambda x: x[0])
df_recur['FOstats_max'] = df_recur['FOstats_minmax'].apply(lambda x: x[1])
df_recur.drop(columns=['FOstats_minmax'],inplace=True)

# get the average of texture features

pat = re.compile('texture_')
texture_cols = [ss for ss in df_recur.columns.tolist() if pat.match(ss)]
for tc in texture_cols:
    df_recur[tc +'_avg'] = df_recur[tc].apply(np.mean)
    df_recur.drop(tc,axis=1,inplace=True)    
df_recur['tumor_type'] = df_recur['tumor_tag'].map(lambda x: '_'.join(['Recur',x]))
df_recur['ptid_side'] = df_recur[['pt_id','breast_side']].apply(lambda x: '{}_{}'.format(x[0], x[1]), axis=1)
newer_feat_names = [re.search('([\w.]+)_avg', ss).group(1) if re.search('([\w.]+)_avg', ss) else ss for ss in df_recur.columns.tolist()]
col_dict = dict(zip(df_recur.columns.tolist(), newer_feat_names))
df_recur.rename(col_dict, axis='columns',inplace=True)

col_of_interest = df_prim.columns.tolist()
df_recur_oi = df_recur.loc[:,col_of_interest]
df_prim_oi = df_prim.loc[:,col_of_interest]

# combine primary and recur tumor DFs
df_all = pd.concat([df_prim_oi, df_recur_oi], ignore_index=True)
print(df_all)


     FOstats_energy  FOstats_entropy  FOstats_kurtosis  FOstats_mean  \
0     197984.031250         5.536868          5.234993      3.525060   
1     211952.109375         5.470107          5.270049      3.174406   
2         57.455063         1.763697          2.370392      1.091445   
3       7258.604492         5.099894          2.806678      2.700307   
4      57107.488281         5.084719          7.894037      2.232625   
5        983.184448         3.355637          3.422115      1.594757   
6       8460.021484         5.427511          5.978713      2.856201   
7      17500.703125         4.632359          5.544690      2.255796   
8      48165.585938         5.266244          2.678233      2.761572   
9       2454.211670         4.033516          4.632780      1.364780   
10      2425.740234         2.297966          5.669534      0.857146   
11      1980.467773         4.311808          3.249635      1.943283   
12       173.288330         2.303275          3.030634      0.60

In [3]:
inner_radius = 90
outer_radius = 300-10
minr = sqrt(log(.001 * 1E4))
maxr = sqrt(log(100000 * 1E4))
a = (outer_radius - inner_radius) / (minr - maxr)
b = inner_radius - a * maxr
def rad(mic):
    return a * np.sqrt(np.log(mic * 1E4)) + b

#### to do list
- get all the recur tumor radiomics into on data frame by pt_id, tumor_tag, and tumor_type as 'recur_xxxx' where xxx is the site of recurrence?
- get the appropriate data from data_all.csv and mark the tumor_type as 'prim' based on the pt_id from the recur tumor list (only take the PET radiomics)
- combine the two data frame together via concatenate or etc. (this is NOT a JOIN or MERGE operation)
- use this as the input data for the bokeh circle plot to compare the radiomics across all tumors for each radiomic feature category

In [4]:
# the data ready for bokeh plot
ptid_sides = list(df_recur_oi.ptid_side.unique())
print(ptid_sides)

ptidsd_oi = ptid_sides[1]
df1 = df_all[df_all['ptid_side'] == ptidsd_oi]
val_vars = set(df1.columns.tolist()).symmetric_difference(['ptid_side','tumor_type'])

# make an appropriate table
df2 = pd.melt(df1, id_vars=['ptid_side','tumor_type'], value_vars=val_vars, var_name='Radiomics')
# print(df2)
# print(df2.columns.tolist())

df3 = df2.pivot(index='Radiomics',columns='tumor_type',values='value')

df3 = df3.reset_index()

# make another column to categorize radiomic feature to FOstats, shape and size and texture
df3['Radiomics_type'] = df3['Radiomics'].apply(lambda x: re.split('_+',x)[0] if re.split('_+',x) else np.nan)
print(df3)

# for tt in list(df1.tumor_type.unique()):
#     df3['{}_rad'.format(tt)] = df3[tt].apply(rad)
# print(df3)


['116_L', '117_L', '123_L', '13_R', '25_L', '30_L', '69_L', '99_R']
tumor_type                          Radiomics        Primary     Recur_Lung  \
0                              FOstats_energy   11532.794922    6372.353516   
1                             FOstats_entropy       5.128689       4.575003   
2                            FOstats_kurtosis       2.448121       6.384124   
3                                 FOstats_max       5.376147       6.113467   
4                                FOstats_mean       2.952009       1.400479   
5                                 FOstats_min       1.334765       0.470101   
6                            FOstats_skewness       0.545987       1.887966   
7                          FOstats_uniformity       0.031618       0.063987   
8                            FOstats_variance       0.896303       1.055872   
9                      ShapeSize_compactness1       0.539565       0.707344   
10                     ShapeSize_compactness2       0.324010   

In [5]:
from bokeh.io import output_notebook
output_notebook()

In [6]:
from bokeh.plotting import figure, show, output_file

In [7]:
width = 800
height = 800
PLOT_OPTS = dict(
    plot_width=width, plot_height=height, title="",
    x_axis_type=None, y_axis_type=None,
    x_range=(-420, 420), y_range=(-420, 420),
    min_border=0, outline_line_color="black",
    background_fill_color="#f0e1d2")

In [8]:
# the angle for each bacteria type + legend (0.001 to 100)
big_angle = 2.0 * np.pi / (len(df3) + 1)

# why divide by 7? if you look at the figure, in each 'big_angle', 
# it's divided into 7 space since there are 3 kinds of recurred tumors
# can change depending the # of category to plot within each circle
N_tumor_type = len(df1['tumor_type'].unique())
ndiv_small_angle = N_tumor_type*2+1
small_angle = big_angle / ndiv_small_angle

p = figure(**PLOT_OPTS)

In [9]:
p.xgrid.grid_line_color = None
p.ygrid.grid_line_color = None

from bokeh.palettes import Spectral3, Spectral4, Spectral5
from collections import OrderedDict

N_tumor_type = len(df1.tumor_type.unique().tolist())
tp_pal_dict = dict(zip([2,3,4,5],[['#c64737','black'],Spectral3, Spectral4, Spectral5]))
print(tp_pal_dict)


tumor_colors = tp_pal_dict[N_tumor_type]
tumor_color_dict = OrderedDict(zip(df1.tumor_type.unique().tolist(), tumor_colors))
print(tumor_color_dict)

radiomics_colors = ['#deebf7','#e69584','#bdbdbd']
radiomics_color_dict = OrderedDict(zip(df3.Radiomics_type.unique().tolist(), radiomics_colors))
print(radiomics_color_dict)

# annular wedges
angles = np.pi/2 - big_angle/2 - df3.index.to_series()*big_angle
colors = [radiomics_color_dict[rt] for rt in df3.Radiomics_type]
p.annular_wedge(0, 0, inner_radius, outer_radius, -big_angle+angles, angles, color=colors)


{2: ['#c64737', 'black'], 3: ['#99d594', '#ffffbf', '#fc8d59'], 4: ['#2b83ba', '#abdda4', '#fdae61', '#d7191c'], 5: ['#2b83ba', '#abdda4', '#ffffbf', '#fdae61', '#d7191c']}
OrderedDict([('Primary', '#c64737'), (u'Recur_Lung', 'black')])
OrderedDict([('FOstats', '#deebf7'), ('ShapeSize', '#e69584'), ('texture', '#bdbdbd')])


In [10]:
# small wedges
for ii, tt in zip(range(ndiv_small_angle-1, 1, -2), df1.tumor_type.tolist()):
    p.annular_wedge(0, 0, inner_radius, rad(df3[tt]),
                    -big_angle+angles+(ii-1)*small_angle, -big_angle+angles+ii*small_angle,
                    color=tumor_color_dict[tt])

# circular axes and lables
labels = np.power(10.0, np.arange(-3, 6))
radii = a * np.sqrt(np.log(labels * 1E4)) + b
p.circle(0, 0, radius=radii, fill_color=None, line_color="white")
p.text(0, radii[:-1], [str(r) for r in labels[:-1]],
       text_font_size="8pt", text_align="center", text_baseline="middle")

# radial axes
p.annular_wedge(0, 0, inner_radius-10, outer_radius+10,
                -big_angle+angles, -big_angle+angles, color="black")

  


In [11]:
range(18, 18-(N_tumor_type+1)*18, -18)

[18, 0, -18]

In [12]:
# recurred tumor labels
# xr = radii[0]*np.cos(np.array(-big_angle/2 + angles))
# yr = radii[0]*np.sin(np.array(-big_angle/2 + angles))
# label_angle=np.array(-big_angle/2+angles)
# label_angle[label_angle < -np.pi/2] += np.pi # easier to read labels on the left side
# p.text(xr, yr, df3.bacteria, angle=label_angle,
#        text_font_size="9pt", text_align="center", text_baseline="middle")

# OK, these hand drawn legends are pretty clunky, will be improved in future release
# radiomics type legend
p.circle([-40, -40, -40],[-340, -360, -380] , color=list(radiomics_color_dict.values()), radius=5)
p.text([-30, -30, -30], [-340, -360, -380], text=radiomics_color_dict.keys(),
       text_font_size="8pt", text_align="left", text_baseline="middle")

# tumor type legend
rect_xx = [-40]*N_tumor_type
rect_yy = range(18, 18-(N_tumor_type+1)*18, -18)

txt_xx = [-15]*N_tumor_type
txt_yy = rect_yy
p.rect(rect_xx, rect_yy, width=30, height=13,
       color=list(tumor_color_dict.values()))
p.text(txt_xx, txt_yy, text=list(tumor_color_dict),
       text_font_size="9pt", text_align="left", text_baseline="middle")


# from bokeh.models import CategoricalColorMapper
# radiomics_colors = ['#deebf7','#e69584','#bdbdbd']
# radiomics_color_mapper = CategoricalColorMapper(
#     factors = list(df3.Radiomics_type.unique()),
#     palette = radiomics_colors
# )

# tumor_colors = ['#0d3362','#c64737','black']
# tumor_color_mapper = CategoricalColorMapper(
#     factors = list(df1.tumor_type.unique()),
#     palette = tumor_colors
# )

# from bokeh.models import ColumnDataSource
# source = ColumnDataSource(dict(
#     circ_x=[-40, -40, -40],
#     circ_y=[-340, -360, -380],
#     radiomics_type=list(df3.Radiomics_type.unique()),
#     rect_x=[-40, -40, -40],
#     rect_y=[18, 0, -18],
#     tumor_type=list(df1.tumor_type.unique())))

# print(source.column_names)

# p.circle(x='circ_x',
#          y='circ_y',
#          radius=0,
#          color={'field':'radiomics_type','transform':radiomics_color_mapper},
#          source=source,
#          legend='radiomics_type')
# p.legend.border_line_color=None
# p.legend.background_fill_color=None
# # p.legend.location=(0,-50)
# # p.right.append(p.legend[0])

# p.rect(
#     x = 'rect_x',
#     y = 'rect_y',
#     width=0,
#     height=0,
#     color={'field':'tumor_type','transform': tumor_color_mapper},
#     source=source,
#     legend='tumor_type')
# p.legend.border_line_color=None
# p.legend.background_fill_color=None
# # p.legend.location=(-15,18)
# # p.left.append(p.legend[1])

show(p)
output_file("radiomics_primaryVSrecur.html", title="Radiomics_PrimVsRecurTumors_Viz.py example")

# show(p, notebook_handle=True)



In [13]:
# from bokeh.io import export_svgs
# p.output_backend = 'svg'
# export_svgs(p, filename='plot.svg')
# NOTE: SVG export didn't seem to work with this plot but PNG works fine

from bokeh.io import export_png
export_png(p, filename='plot.png')

'/Users/shuang/code/python/PETCTomics/notebooks/plot.png'

In [14]:
# from ipwidgets import interact
# from bokeh.io import push_notebook

# def update(pt_id):
#     new_data = dict()
    
#     source.data = new_data
#     p.title.text = str(ptid)
#     push_notebook()
    
# interact(update, ptid=)