In [2]:
import copy
import glob
import importlib
import itertools
import math
import os
import random
import shutil
import sys
from math import degrees, pi, radians
from os.path import *

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scipy.stats
import SimpleITK as sitk
import skimage.feature as skf
from skimage.morphology import ball
import seaborn as sns
import cnn_builder as cbuild
import config
import lipiodol_analysis as lan
import lipiodol_methods as lm
import lipiodol_vis as lvis
import niftiutils.helper_fxns as hf
import niftiutils.masks as masks
import niftiutils.registration as reg
import niftiutils.transforms as tr
from config import Config
sns.set(style="ticks")
%matplotlib inline

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [3]:
importlib.reload(config)
C = config.Config()
target_dir = "D:\\Lipiodol\\Data"
lesions = [basename(fn) for fn in glob.glob(join(target_dir,"*"))]
num_lesions = len(lesions)
liplvls = [0, 87, 155, 241]

In [4]:
pattern_df = pd.read_excel(C.data_xls_path, "Patterns")

In [141]:
def set_g_bar(g, save_path):
    g.set(yticks=[0.,.2,.4,.6,.8,1.], ylim=(0.,1.));
    """topbar = plt.Rectangle((0,0),1,1,fc="#C3C3C3", edgecolor = 'none')
    midbar = plt.Rectangle((0,0),1,1,fc='#A3A3A3',  edgecolor = 'none')
    bottombar = plt.Rectangle((0,0),1,1,fc='#737373',  edgecolor = 'none')
    l = plt.legend([bottombar, midbar, topbar],
                   ['High Lipiodol', 'Medium Lipiodol', 'Low Lipiodol'], loc='upper left')#, ncol=2, prop={'size':16})
    l.draw_frame(False)"""
    
    for gax in g.axes[0]:
        gax.set_xlabel("")
        gax.set_ylabel("")
        #gax.tick_params('x',width=0)
        gax.set_xticks([], minor=False)
        gax.set_yticks([], minor=False)
        plt.setp(gax.patches, linewidth=1, edgecolor='k')
        
    g.set_titles(visible=False)
    #g.axes[0][0].set_yticklabels(["0%", "20%", "40%", "60%", "80%", "100%"]);
    
    sns.despine(top=True, right=True, left=True)
    g.fig.subplots_adjust(left=.2, top=.95)
    
    #g.fig.tight_layout(w_pad=1)
    #plt.setp(g.ax.lines,linewidth=1);
    g.fig.savefig(save_path, dpi=150, pad_inches=0, transparent=True) #, width=5
    plt.close()

In [92]:
def print_subset_stats(subset):
    print("%.1f%%+-%.1f%% (s.e.=%.1f%%, n=%d)" % (np.mean(subset)*100,
                np.std(subset)*100, np.std(subset)*100/(len(subset)**.5), len(subset)))

### Colorful graph

In [78]:
importlib.reload(lm)
Vasc = [[],[],[],[]]
NonResp = [[],[],[],[]]

for lesion_id in lesions:
    P = lm.get_paths_dict(lesion_id, target_dir)
    M = masks.get_mask(P['ct24Tx']['crop']['tumor'], img_path=P['ct24Tx']['crop']['img'], overlaid=True)
    I,D = hf.nii_load(P['ct24Tx']['crop']['img'])
    
    if not exists(P['ct24Tx']['mrbl']['enh']+".off"):
        mrblM = np.zeros(M.shape)
    else:
        mrblM = masks.get_mask(P['ct24Tx']['mrbl']['enh'], D, I.shape)
        
    if not exists(P['ct24Tx']['mr30']['enh']+".off"):
        mr30M = np.zeros(M.shape)
    else:
        mr30M = masks.get_mask(P['ct24Tx']['mr30']['enh'], D, I.shape)
        
    Masks = [(M!=0) & (M<liplvls[1]),
            (M>liplvls[1]) & (M<liplvls[2]),
            (M>liplvls[2]) & (M<liplvls[3]),
            M>liplvls[3]]
    
    for ix,M in enumerate(Masks):
        if M.sum() > 0:
            Vasc[ix].append((M*mrblM!=0).sum()/M.sum())
            NonResp[ix].append((M*mrblM*mr30M!=0).sum()/(M*mrblM!=0).sum())
        else:
            Vasc[ix].append(np.nan)
            NonResp[ix].append(np.nan)
    #lm.reg_to_ct24(lesion_id, target_dir)



In [88]:
np.isnan(Vasc[3]).sum()

16

In [80]:
np.nanmean(Vasc,1)

array([0.29625662, 0.27145347, 0.22075392, 0.18174122])

## Vascularization statistics

In [None]:
sum(master_df['selective=0']==1)

In [130]:
vasc_depo_df = pd.read_excel(C.data_xls_path, "Perfusion-Deposition Data")

In [131]:
def get_dvasc_df(vasc_depo_df, mode="density"):
    dvasc_df = copy.deepcopy(vasc_depo_df)
    if mode == "density":
        for l in ["N", "V", "A"]:
            for L_ix in range(3):
                dvasc_df[str(liplvls[L_ix])+l] = dvasc_df[str(liplvls[L_ix])+l] - dvasc_df[str(liplvls[L_ix+1])+l]
    elif mode == "V-N":
        dvasc_df["%ddVN"%liplvls[1]] = dvasc_df["%dV"%liplvls[1]] - dvasc_df["%dN"%liplvls[1]]
    return dvasc_df

### Upper graph

In [133]:
dvasc_df = get_dvasc_df(vasc_depo_df)

In [120]:
print(scipy.stats.wilcoxon(vasc_depo_df["%dV"%liplvls[1]], vasc_depo_df["%dN"%liplvls[1]]))

8.6%+-14.7% (s.e.=1.9%)
WilcoxonResult(statistic=307.0, pvalue=1.593651093256237e-06)


  r_plus = np.sum((d > 0) * r, axis=0)
  r_minus = np.sum((d < 0) * r, axis=0)


In [124]:
for i in range(4):
    subset = (dvasc_df["%dV" % liplvls[i]] - dvasc_df["%dN" % liplvls[i]]).dropna()
    print("%.1f%%+-%.1f%% (s.e.=%.1f%%)" % (np.mean(subset)*100,
                                    np.std(subset)*100, np.std(subset)*100/(len(subset)**.5)))

#DV = dvasc_df.dropna()
#[scipy.stats.wilcoxon(DV["%dV" % liplvls[i]], DV["%dN" % liplvls[i]]) for i in range(4)]

for i in range(4):
    subset = dvasc_df[["%dV" % liplvls[i], "%dN" % liplvls[i]]].dropna()
    print(scipy.stats.wilcoxon(subset["%dV" % liplvls[i]], subset["%dN" % liplvls[i]]))

-8.6%+-14.7% (s.e.=1.9%)
-6.9%+-14.4% (s.e.=1.9%)
3.5%+-14.3% (s.e.=1.9%)
11.9%+-19.3% (s.e.=2.6%)
WilcoxonResult(statistic=307.0, pvalue=0.00010477134927041804)
WilcoxonResult(statistic=385.0, pvalue=0.000754754105706784)
WilcoxonResult(statistic=527.0, pvalue=0.06352457698465036)
WilcoxonResult(statistic=168.0, pvalue=9.798519283523615e-06)


In [135]:
master_df = pd.read_excel(r"D:\Lipiodol\MASTER SOPHIE.xlsx", "Lesions analyzed", index_col="Lesion_ID")#"C:\Users\Clinton\Box\FOR CLINTON BOX FOLDER\MASTER SS SOPHIE.xlsx")
#dvasc_df = dvasc_df.join(master_df, how='inner')
dvasc_df = vasc_depo_df.join(master_df, how='inner')
i=1
subset1 = dvasc_df.loc[dvasc_df["0=well delineated, 1=infiltrative"] == 0, "%dA" % liplvls[i]].dropna()
subset2 = dvasc_df.loc[dvasc_df["0=well delineated, 1=infiltrative"] == 1, "%dA" % liplvls[i]].dropna()
print(scipy.stats.mannwhitneyu(subset1, subset2).pvalue)
print_subset_stats(subset1)
print_subset_stats(subset2)

0.00012339292808787657
71.7%+-27.9% (s.e.=3.9%, n=52)
37.5%+-22.9% (s.e.=6.4%, n=13)


In [20]:
i=1
subset1 = dvasc_df.loc[dvasc_df["selective=0"] == 0, "%dA" % liplvls[i]].dropna()
subset2 = dvasc_df.loc[dvasc_df["selective=0"] == 1, "%dA" % liplvls[i]].dropna()
print(scipy.stats.mannwhitneyu(subset1, subset2).pvalue)
print_subset_stats(subset1)
print_subset_stats(subset2)

0.07171699708950603
72.5%+-25.6% (s.e.=4.5%)
57.0%+-32.6% (s.e.=5.8%)


In [21]:
i=1
subset1 = dvasc_df.loc[dvasc_df["HCC(0), ICC(1), other(2)"] == 0, "%dA" % liplvls[i]].dropna()
subset3 = dvasc_df.loc[dvasc_df["HCC(0), ICC(1), other(2)"] == 1, "%dA" % liplvls[i]].dropna()
subset2 = dvasc_df.loc[dvasc_df["HCC(0), ICC(1), other(2)"] == 2, "%dA" % liplvls[i]].dropna()
print(scipy.stats.kruskal(subset1, subset2, subset3).pvalue)
print_subset_stats(subset1)
print_subset_stats(subset2)
print_subset_stats(subset3)

3.471764729349071e-05
76.9%+-27.4% (s.e.=4.6%)
66.7%+-22.8% (s.e.=6.1%)
34.4%+-19.9% (s.e.=5.1%)


In [143]:
kwargs = {"data":dvasc_df, "size":3, "kind":"bar", "color":"#C3C3C3", "legend":False} #, "aspect":.8

g = sns.factorplot(x="0=well delineated, 1=infiltrative", y="%dA"%liplvls[1], aspect=1., **kwargs)
set_g_bar(g, join(C.fig_dir, "Vascularization figures", "Upper graph", "well-del vs infilt.png"))

g = sns.factorplot(x="selective=0", y="%dA"%liplvls[1], aspect=1., **kwargs)
set_g_bar(g, join(C.fig_dir, "Vascularization figures", "Upper graph", "selective vs lobar.png"))

g = sns.factorplot(x="HCC(0), ICC(1), other(2)", y="%dA"%liplvls[1], order=[0,2,1], aspect=1.5, **kwargs)
set_g_bar(g, join(C.fig_dir, "Vascularization figures", "Upper graph", "tumor entity.png"))

g = sns.factorplot(x="0A", y="%dA"%liplvls[1], aspect=.5, **kwargs)
set_g_bar(g, join(C.fig_dir, "Vascularization figures", "Upper graph", "all tumors.png"))

### Middle Graph (Necro to Viable diff, no Lip breakdown)

In [52]:
Vdf = vasc_depo_df.dropna()
dvasc_df = get_dvasc_df(vasc_depo_df, "V-N")
master_df = pd.read_excel(r"D:\Lipiodol\MASTER SOPHIE.xlsx", "Lesions analyzed", index_col="Lesion_ID")#"C:\Users\Clinton\Box\FOR CLINTON BOX FOLDER\MASTER SS SOPHIE.xlsx")
#master_df = master_df.join(pattern_df)
dvasc_df = dvasc_df.join(master_df)

In [53]:
i = 1
subset1 = dvasc_df.loc[dvasc_df["0=well delineated, 1=infiltrative"] == 0, "%ddVN" % liplvls[i]].dropna()
subset2 = dvasc_df.loc[dvasc_df["0=well delineated, 1=infiltrative"] == 1, "%ddVN" % liplvls[i]].dropna()
print("%.2f" % scipy.stats.mannwhitneyu(subset1, subset2).pvalue)
print_subset_stats(subset1)
print_subset_stats(subset2)

0.40
8.9%+-15.7% (s.e.=2.4%)
7.5%+-10.5% (s.e.=2.9%)


In [30]:
i = 1
subset1 = dvasc_df.loc[dvasc_df["selective=0"] == 0, "%ddVN" % liplvls[i]].dropna()
subset2 = dvasc_df.loc[dvasc_df["selective=0"] == 1, "%ddVN" % liplvls[i]].dropna()
print("%.2f" % scipy.stats.mannwhitneyu(subset1, subset2).pvalue)
print_subset_stats(subset1)
print_subset_stats(subset2)

0.43
9.5%+-17.6% (s.e.=3.1%)
7.4%+-9.3% (s.e.=1.9%)


In [31]:
i = 1
subset1 = dvasc_df.loc[dvasc_df["HCC(0), ICC(1), other(2)"] == 0, "%ddVN" % liplvls[i]].dropna()
subset3 = dvasc_df.loc[dvasc_df["HCC(0), ICC(1), other(2)"] == 1, "%ddVN" % liplvls[i]].dropna()
subset2 = dvasc_df.loc[dvasc_df["HCC(0), ICC(1), other(2)"] == 2, "%ddVN" % liplvls[i]].dropna()
print("%.2f" % scipy.stats.kruskal(subset1, subset2, subset3).pvalue)
print_subset_stats(subset1)
print_subset_stats(subset2)
print_subset_stats(subset3)

0.30
10.9%+-15.0% (s.e.=2.6%)
7.1%+-16.1% (s.e.=4.3%)
3.1%+-8.6% (s.e.=2.7%)


In [32]:
df = pd.DataFrame(columns=["Any Coverage", "Lesion_id", "Tissue Type",
                       "Tumor Growth", "Tumor Type", "TACE Type"])

master_df = pd.read_excel(r"D:\Lipiodol\MASTER SOPHIE.xlsx", "Lesions analyzed", index_col="Lesion_ID")#"C:\Users\Clinton\Box\FOR CLINTON BOX FOLDER\MASTER SS SOPHIE.xlsx")
master_df = master_df.join(pattern_df)
modality = "mrbl"

In [33]:
importlib.reload(lvis)
ix = 0
for lesion_id, row in Vdf.iterrows():
    const = lvis.get_df_entry(lesion_id, master_df, modality)
    
    df.loc[ix] = [row["%dN"%liplvls[1]], lesion_id, "Necrosis"] + const
    df.loc[ix+1] = [row["%dV"%liplvls[1]], lesion_id, "Viable"] + const
    ix += 2

In [34]:
def set_g_bar(g, save_path):
    g.set(yticks=[0.,.2,.4,.6,.8,1.], ylim=(0.,1.));
    
    for gax in g.axes[0]:
        gax.set_xlabel("")
        gax.set_ylabel("")
        #gax.tick_params('x',width=0)
        gax.set_xticks([], minor=False)
        gax.set_yticks([], minor=False)
        plt.setp(gax.patches, linewidth=1, edgecolor='k')
        
    g.set_titles(visible=False)
    #g.axes[0][0].set_yticklabels(["0%", "20%", "40%", "60%", "80%", "100%"]);
    
    sns.despine(top=True, right=True, left=True)
    g.fig.subplots_adjust(left=.2, top=.95)
    
    #g.fig.tight_layout(w_pad=1)
    #plt.setp(g.ax.lines,linewidth=1);
    g.fig.savefig(save_path, width=5, dpi=150, pad_inches=0, transparent=True)
    plt.close()

In [35]:
kwargs = {"x":"Tissue Type", "data":df, "size":3, "aspect":.8, "kind":"bar", "legend":False}#, "ci":None

In [36]:
g1 = sns.factorplot(y="Any Coverage", color="#D3D3D3", **kwargs)
set_g_bar(g1, join(C.fig_dir, "Vascularization figures", "Mid graph", "Mean.png"))

for category, order in [("Tumor Growth", None), ("Tumor Type", None),
                        ("TACE Type", ["Selective", "Lobar"])]: #, ("Sparsity", ["Sparse", "Non"])
    order = lan.get_actual_order(category, df, order)
    g1 = sns.factorplot(y="Any Coverage", col=category, color="#D3D3D3", col_order=order, **kwargs)
    set_g_bar(g1, join(C.fig_dir, "Vascularization figures", "Mid graph", "%s.png" % category))

### Alternative Mid Graph (Lip density, no Necro/Viable separation)

In [332]:
Vdf = vasc_depo_df.dropna()

In [331]:
dvasc_df = get_dvasc_df(vasc_depo_df)

In [333]:
master_df = pd.read_excel(r"D:\Lipiodol\MASTER SOPHIE.xlsx", "Lesions analyzed", index_col="Lesion_ID")#"C:\Users\Clinton\Box\FOR CLINTON BOX FOLDER\MASTER SS SOPHIE.xlsx")
#master_df = master_df.join(pattern_df)
dvasc_df = dvasc_df.join(master_df)

In [None]:
for i in range(4):
    print(liplvls[i])
    subset1 = dvasc_df.loc[dvasc_df["0=well delineated, 1=infiltrative"] == 0, "%dA" % liplvls[i]].dropna()
    subset2 = dvasc_df.loc[dvasc_df["0=well delineated, 1=infiltrative"] == 1, "%dA" % liplvls[i]].dropna()
    print(scipy.stats.mannwhitneyu(subset1, subset2))
    print("%.1f%%+-%.1f%% (s.e.=%.1f%%)" % (np.mean(subset1)*100, np.std(subset1)*100, np.std(subset1)*100/(len(subset1)**.5)))
    print("%.1f%%+-%.1f%% (s.e.=%.1f%%)" % (np.mean(subset2)*100, np.std(subset2)*100, np.std(subset2)*100/(len(subset2)**.5)))

In [None]:
for i in range(4):
    print(liplvls[i])
    subset1 = dvasc_df.loc[dvasc_df["selective=0"] == 0, "%dA" % liplvls[i]].dropna()
    subset2 = dvasc_df.loc[dvasc_df["selective=0"] == 1, "%dA" % liplvls[i]].dropna()
    print(scipy.stats.mannwhitneyu(subset1, subset2))
    print("%.1f%%+-%.1f%% (s.e.=%.1f%%)" % (np.mean(subset1)*100, np.std(subset1)*100, np.std(subset1)*100/(len(subset1)**.5)))
    print("%.1f%%+-%.1f%% (s.e.=%.1f%%)" % (np.mean(subset2)*100, np.std(subset2)*100, np.std(subset2)*100/(len(subset2)**.5)))

In [338]:
for i in range(4):
    subset1 = dvasc_df.loc[dvasc_df["HCC(0), ICC(1), other(2)"] == 0, "%dA" % liplvls[i]].dropna()
    subset3 = dvasc_df.loc[dvasc_df["HCC(0), ICC(1), other(2)"] == 1, "%dA" % liplvls[i]].dropna()
    subset2 = dvasc_df.loc[dvasc_df["HCC(0), ICC(1), other(2)"] == 2, "%dA" % liplvls[i]].dropna()
    print(scipy.stats.kruskal(subset1, subset2, subset3))
    print("%.1f%%+-%.1f%% (s.e.=%.1f%%)" % (np.mean(subset1)*100, np.std(subset1)*100, np.std(subset1)*100/(len(subset1)**.5)))
    print("%.1f%%+-%.1f%% (s.e.=%.1f%%)" % (np.mean(subset2)*100, np.std(subset2)*100, np.std(subset2)*100/(len(subset2)**.5)))
    print("%.1f%%+-%.1f%% (s.e.=%.1f%%)" % (np.mean(subset3)*100, np.std(subset3)*100, np.std(subset3)*100/(len(subset3)**.5)))

KruskalResult(statistic=18.14321160363808, pvalue=0.00011488191286503579)
19.8%+-25.4% (s.e.=4.2%)
29.1%+-23.0% (s.e.=7.3%)
60.4%+-12.7% (s.e.=3.8%)
KruskalResult(statistic=15.098642258431852, pvalue=0.0005264674091710209)
28.9%+-16.2% (s.e.=2.7%)
52.0%+-15.2% (s.e.=4.8%)
35.3%+-10.5% (s.e.=3.2%)
KruskalResult(statistic=16.318025146008512, pvalue=0.00028614480244862323)
16.4%+-10.6% (s.e.=1.7%)
12.7%+-10.4% (s.e.=3.3%)
3.0%+-3.4% (s.e.=1.0%)
KruskalResult(statistic=24.268648160106817, pvalue=5.371925862881901e-06)
34.9%+-27.5% (s.e.=4.5%)
6.3%+-5.9% (s.e.=1.9%)
1.3%+-2.0% (s.e.=0.6%)


In [316]:
def get_row(row):
    char="A"
    return [row["%d%s"%(liplvls[1],char)],
            row["%d%s"%(liplvls[1],char)] - row["%d%s"%(liplvls[3],char)],
            row["%d%s"%(liplvls[1],char)] - row["%d%s"%(liplvls[2],char)]]

In [317]:
df = pd.DataFrame(columns=["Any Coverage", "Low-Mid Coverage", "Low Coverage", "Lesion_id",
                       "Tumor Growth", "Tumor Type", "TACE Type"])

importlib.reload(lvis)
ix = 0
modality = "mrbl"
for lesion_id, row in Vdf.iterrows():
    const = lvis.get_df_entry(lesion_id, master_df, modality)
    
    df.loc[ix] = get_row(row) + [lesion_id] + const
    ix += 1

In [318]:
def set_g_bar(g, save_path):
    g.set(yticks=[0.,.2,.4,.6,.8,1.], ylim=(0.,1.));
    
    for gax in g.axes[0]:
        gax.set_xlabel("")
        gax.set_ylabel("")
        #gax.tick_params('x',width=0)
        gax.set_xticks([], minor=False)
        gax.set_yticks([], minor=False)
        plt.setp(gax.patches, linewidth=1, edgecolor='k')
        
    g.set_titles(visible=False)
    #g.axes[0][0].set_yticklabels(["0%", "20%", "40%", "60%", "80%", "100%"]);
    
    sns.despine(top=True, right=True, left=True)
    g.fig.subplots_adjust(left=.2, top=.95)
    
    #g.fig.tight_layout(w_pad=1)
    #plt.setp(g.ax.lines,linewidth=1);
    g.fig.savefig(save_path, width=5, dpi=150, pad_inches=0, transparent=True)
    plt.close()

In [319]:
kwargs = {"data":df, "size":3, "aspect":.8, "kind":"bar", "ci":None, "legend":False}

In [320]:
g1 = sns.factorplot(y="Any Coverage", color="#D3D3D3", **kwargs)
set_g_bar(g1, join(C.fig_dir, "Vascularization figures", "Mid graph", "Mean1.png"))
g2 = sns.factorplot(y="Low-Mid Coverage", color="#939393", **kwargs)
set_g_bar(g2, join(C.fig_dir, "Vascularization figures", "Mid graph", "Mean2.png"))
g3 = sns.factorplot(y="Low Coverage", color="#333333", **kwargs)
set_g_bar(g3, join(C.fig_dir, "Vascularization figures", "Mid graph", "Mean3.png"))

for category, order in [("Tumor Growth", None), ("Tumor Type", None),
                        ("TACE Type", ["Selective", "Lobar"])]: #, ("Sparsity", ["Sparse", "Non"])

    order = lm.get_actual_order(category, df, order)
    g1 = sns.factorplot(y="Any Coverage", col=category, color="#D3D3D3", col_order=order, **kwargs)
    set_g_bar(g1, join(C.fig_dir, "Vascularization figures", "Mid graph", "%s1.png" % category))
    g2 = sns.factorplot(y="Low-Mid Coverage", col=category, color="#939393", col_order=order, **kwargs)
    set_g_bar(g2, join(C.fig_dir, "Vascularization figures", "Mid graph", "%s2.png" % category))
    g3 = sns.factorplot(y="Low Coverage", col=category, color="#333333", col_order=order, **kwargs)
    set_g_bar(g3, join(C.fig_dir, "Vascularization figures", "Mid graph", "%s3.png" % category))

## Response statistics

In [89]:
depo_resp_df = pd.read_excel(C.data_xls_path, "Deposition-Response Data")

In [64]:
Rdf = depo_resp_df.dropna()

In [42]:
scipy.stats.friedmanchisquare(*[Rdf[l] for l in liplvls])

FriedmanchisquareResult(statistic=19.628571428571473, pvalue=0.0002026439953540675)

In [None]:
for l in liplvls[1:]:
    print(0,l,scipy.stats.wilcoxon(Rdf[0], Rdf[l]))
print(liplvls[1],liplvls[2],scipy.stats.wilcoxon(Rdf[liplvls[1]], Rdf[liplvls[2]]))
print(liplvls[3],liplvls[2],scipy.stats.wilcoxon(Rdf[liplvls[3]], Rdf[liplvls[2]]))

In [48]:
dresp_df = copy.deepcopy(Rdf)
for L in liplvls[3:0:-1]:
    dresp_df[L] = dresp_df[L] - dresp_df[0]
dresp_df[0] = 0

In [49]:
for l in liplvls:
    subset=dresp_df[l]
    print("%.1f%%+-%.1f%% (s.e.=%.1f%%)" % (np.mean(subset)*100, np.std(subset)*100, np.std(subset)*100/(len(subset)**.5)))

0.0%+-0.0% (s.e.=0.0%)
3.1%+-12.0% (s.e.=2.0%)
10.1%+-18.5% (s.e.=3.0%)
15.2%+-23.2% (s.e.=3.8%)


### Top graph

In [103]:
pattern_df = pd.read_excel(C.data_xls_path, "Patterns")
df = pd.DataFrame(columns=["Response", "Lesion_id", "Tumor Growth", "Tumor Type", "TACE Type", "Homogeneity", "Sparsity", "Rim Presence"])

master_df = pd.read_excel(r"D:\Lipiodol\MASTER SOPHIE.xlsx", "Lesions analyzed", index_col="Lesion_ID")#"C:\Users\Clinton\Box\FOR CLINTON BOX FOLDER\MASTER SS SOPHIE.xlsx")
master_df = master_df.join(pattern_df)
modality = "ct24"
master_df["lipcoverage_vol"] = master_df["lipcoverage_vol"].astype(float)
master_df["high_lip"] = master_df["high_lip"].astype(float)
master_df["rim_lipiodol"] = master_df["rim_lipiodol"].astype(float)
master_df["low_peripheral"] = master_df["low_peripheral"].astype(float)
master_df["mid_peripheral"] = master_df["mid_peripheral"].astype(float)

importlib.reload(lvis)
ix = 0
for lesion_id, row in depo_resp_df.iterrows():
    const = lvis.get_df_entry(lesion_id, master_df, modality)
    df.loc[ix] = [row["Avg"], lesion_id] + const
    ix += 1

In [94]:
#master_df = pd.read_excel(r"D:\Lipiodol\MASTER SOPHIE.xlsx", "Lesions analyzed", index_col="Lesion_ID")#"C:\Users\Clinton\Box\FOR CLINTON BOX FOLDER\MASTER SS SOPHIE.xlsx")
#dvasc_df = dvasc_df.join(master_df, how='inner')
Rdf = depo_resp_df.join(master_df)
subset1 = Rdf.loc[Rdf["0=well delineated, 1=infiltrative"] == 0, "Avg"].dropna()
subset2 = Rdf.loc[Rdf["0=well delineated, 1=infiltrative"] == 1, "Avg"].dropna()
print(scipy.stats.mannwhitneyu(subset1, subset2).pvalue)
print_subset_stats(subset1)
print_subset_stats(subset2)

0.014722741820050197
-64.7%+-37.5% (s.e.=5.4%, n=48)
-44.3%+-25.8% (s.e.=7.2%, n=13)


In [95]:
master_df = pd.read_excel(r"D:\Lipiodol\MASTER SOPHIE.xlsx", "Lesions analyzed", index_col="Lesion_ID")#"C:\Users\Clinton\Box\FOR CLINTON BOX FOLDER\MASTER SS SOPHIE.xlsx")
#dvasc_df = dvasc_df.join(master_df, how='inner')
Rdf = depo_resp_df.join(master_df)
subset1 = Rdf.loc[Rdf["selective=0"] == 0, "Avg"].dropna()
subset2 = Rdf.loc[Rdf["selective=0"] == 1, "Avg"].dropna()
print(scipy.stats.mannwhitneyu(subset1, subset2).pvalue)
print_subset_stats(subset1)
print_subset_stats(subset2)

0.2618928611239476
-64.5%+-34.8% (s.e.=6.1%, n=33)
-55.5%+-37.4% (s.e.=7.1%, n=28)


In [96]:
master_df = pd.read_excel(r"D:\Lipiodol\MASTER SOPHIE.xlsx", "Lesions analyzed", index_col="Lesion_ID")#"C:\Users\Clinton\Box\FOR CLINTON BOX FOLDER\MASTER SS SOPHIE.xlsx")
#dvasc_df = dvasc_df.join(master_df, how='inner')
Rdf = depo_resp_df.join(master_df)
subset1 = Rdf.loc[Rdf["HCC(0), ICC(1), other(2)"] == 0, "Avg"].dropna()
subset3 = Rdf.loc[Rdf["HCC(0), ICC(1), other(2)"] == 1, "Avg"].dropna()
subset2 = Rdf.loc[Rdf["HCC(0), ICC(1), other(2)"] == 2, "Avg"].dropna()
print(scipy.stats.kruskal(subset1, subset2, subset3).pvalue)
print_subset_stats(subset1)
print_subset_stats(subset2)
print_subset_stats(subset3)

0.01912642478962076
-68.2%+-32.3% (s.e.=5.4%, n=36)
-64.8%+-36.9% (s.e.=9.9%, n=14)
-29.0%+-30.9% (s.e.=9.3%, n=11)


In [104]:
subdf = df.dropna(subset=["Sparsity"])
subset1 = subdf.loc[subdf["Sparsity"].str.contains("Sparse"), "Response"]
subset2 = subdf.loc[subdf["Sparsity"].str.contains("Non"), "Response"]
print(scipy.stats.mannwhitneyu(subset1, subset2).pvalue)
print_subset_stats(subset1)
print_subset_stats(subset2)

0.00021631674693271154
-37.7%+-35.8% (s.e.=6.8%, n=28)
-69.4%+-29.6% (s.e.=5.7%, n=27)


In [106]:
importlib.reload(lan)
focal_df = df.dropna(subset=["Tumor Growth"])
focal_df = focal_df[focal_df["Tumor Growth"].str.contains("Well")]

subdf = focal_df.dropna(subset=["Homogeneity"])
subset1 = subdf.loc[(subdf["Homogeneity"].str.contains("Homo")), "Response"]
subset2 = subdf.loc[(subdf["Homogeneity"].str.contains("Hetero")), "Response"]
print(scipy.stats.mannwhitneyu(subset1, subset2).pvalue)
print_subset_stats(subset1)
print_subset_stats(subset2)

0.002190506540975477
-90.3%+-15.8% (s.e.=5.0%, n=10)
-58.0%+-38.7% (s.e.=6.0%, n=42)


In [107]:
subdf = focal_df.dropna(subset=["Rim Presence"])
subset1 = subdf.loc[(subdf["Rim Presence"].str.contains("Rim")) & (subdf["Sparsity"].str.contains("Sparse")), "Response"]
subset2 = subdf.loc[(subdf["Rim Presence"].str.contains("Non")) & (subdf["Sparsity"].str.contains("Sparse")), "Response"]
print(scipy.stats.mannwhitneyu(subset1, subset2).pvalue)
print_subset_stats(subset1)
print_subset_stats(subset2)

0.02616259535699922
-68.6%+-29.3% (s.e.=10.4%, n=8)
-16.0%+-36.9% (s.e.=11.1%, n=11)


In [108]:
subdf = focal_df.dropna(subset=["Rim Presence"])
subset1 = subdf.loc[(subdf["Rim Presence"].str.contains("Rim")) & ~(subdf["Sparsity"].str.contains("Sparse")), "Response"]
subset2 = subdf.loc[(subdf["Rim Presence"].str.contains("Non")) & ~(subdf["Sparsity"].str.contains("Sparse")), "Response"]
print(scipy.stats.mannwhitneyu(subset1, subset2).pvalue)
print_subset_stats(subset1)
print_subset_stats(subset2)

0.4860232665286046
-68.1%+-27.0% (s.e.=11.0%, n=6)
-69.9%+-32.1% (s.e.=7.8%, n=17)


In [109]:
subdf = focal_df.dropna(subset=["Sparsity"])
subset1 = subdf.loc[subdf["Sparsity"].str.contains("Sparse"), "Response"]
subset2 = subdf.loc[subdf["Sparsity"].str.contains("Non"), "Response"]
print(scipy.stats.mannwhitneyu(subset1, subset2).pvalue)
print_subset_stats(subset1)
print_subset_stats(subset2)

importlib.reload(lan)
infil_df = df.dropna(subset=["Tumor Growth"])
infil_df = infil_df[infil_df["Tumor Growth"].str.contains("Infilt")]
subdf = infil_df.dropna(subset=["Sparsity"])
subset1 = subdf.loc[subdf["Sparsity"].str.contains("Sparse"), "Response"]
subset2 = subdf.loc[subdf["Sparsity"].str.contains("Non"), "Response"]
print(scipy.stats.mannwhitneyu(subset1, subset2).pvalue)
print_subset_stats(subset1)
print_subset_stats(subset2)

0.002606776336451984
-40.5%+-42.6% (s.e.=9.8%, n=19)
-69.4%+-30.9% (s.e.=6.4%, n=23)
0.018621272297989672
-33.1%+-19.0% (s.e.=6.3%, n=9)
-69.5%+-20.9% (s.e.=10.4%, n=4)


In [77]:
pattern_df = pd.read_excel(C.data_xls_path, "Patterns")
df = pd.DataFrame(columns=["Response", "Lesion_id", "Tumor Growth", "Tumor Type", "TACE Type", "Homogeneity", "Sparsity", "Rim Presence"])

master_df = pd.read_excel(r"D:\Lipiodol\MASTER SOPHIE.xlsx", "Lesions analyzed", index_col="Lesion_ID")#"C:\Users\Clinton\Box\FOR CLINTON BOX FOLDER\MASTER SS SOPHIE.xlsx")
master_df = master_df.join(pattern_df)
modality = "ct24"
master_df["lipcoverage_vol"] = master_df["lipcoverage_vol"].astype(float)
master_df["high_lip"] = master_df["high_lip"].astype(float)
master_df["rim_lipiodol"] = master_df["rim_lipiodol"].astype(float)
master_df["low_peripheral"] = master_df["low_peripheral"].astype(float)
master_df["mid_peripheral"] = master_df["mid_peripheral"].astype(float)

importlib.reload(lvis)
ix = 0
for lesion_id, row in depo_resp_df.iterrows():
    const = lvis.get_df_entry(lesion_id, master_df, modality)
    df.loc[ix] = [row["Avg"], lesion_id] + const
    ix += 1

In [78]:
#kwargs = {"x":"Lipiodol Deposition", "y":"Response", "data":df, "size":3, "markers":["s", "o", "^"], "legend":False}
kwargs = {"y":"Response", "data":df, "size":3, "kind":"bar", "legend":False}

def set_g_bar(g, save_path):
    g.set(yticks=[0.,.2,.4,.6,.8,1.], ylim=(0.,1.));
    #g.set(yticks=[-1.,-.8,-.6,-.4,-.2,0.], ylim=(-1.,0.));
    
    for gax in g.axes[0]:
        gax.set_xlabel("")
        gax.set_ylabel("")
        #gax.tick_params('x',width=0)
        gax.set_xticks([], minor=False)
        gax.set_yticks([], minor=False)
        plt.setp(gax.patches, linewidth=1, edgecolor='k')
        
    g.set_titles(visible=False)
    #g.axes[0][0].set_yticklabels(["0%", "20%", "40%", "60%", "80%", "100%"]);
    
    sns.despine(top=True, right=True, left=True, bottom=False)
    g.fig.subplots_adjust(left=.2, top=.95)
    
    #g.fig.tight_layout(w_pad=1)
    #plt.setp(g.ax.lines,linewidth=1);
    g.fig.savefig(save_path, width=5, dpi=150, pad_inches=0, transparent=True)
    plt.close()

In [79]:
df["Response"] = -df["Response"]

In [80]:
importlib.reload(lan)
for category, order in [("Tumor Growth", None), ("Tumor Type", None),
                        ("TACE Type", ["Selective", "Lobar"]), ("Sparsity", ["Sparse", "Non"])]:
    g = sns.factorplot(x=category, order=lan.get_actual_order(category, df, order), **kwargs)
    set_g_bar(g, join(C.fig_dir, "Deposition figures", "Top graph", "%s.png" % category));
    
#for category, order in [("Tumor Growth", None)]:
#    g = sns.factorplot(x=category, order=lan.get_actual_order(category, df, order), **kwargs)
#    set_g_bar(g, join(C.fig_dir, "Deposition figures", "Top graph", "%s with percentage.png" % category));

In [81]:
importlib.reload(lan)
focal_df = df.dropna(subset=["Tumor Growth"])
focal_df = focal_df[focal_df["Tumor Growth"].str.contains("Well")]
for ix, row in focal_df.iterrows():
    focal_df.loc[ix, "Tumor Type"] = lvis.check_column(row["Lesion_id"], master_df, "HCC(0), ICC(1), other(2)", 
                                                  {0: "HCCs", 1: "ICCs", 2: "Metastases"}, "WD")
    focal_df.loc[ix, "Sparsity"] = lvis.check_sparse(row["Lesion_id"], master_df, modality, "WD")
kwargs["data"] = focal_df

for category, order in [("Sparsity", ["Sparse", "Non"]), ("Homogeneity", ["Homo", "Hetero"])]:
    g = sns.factorplot(x=category, order=lan.get_actual_order(category, focal_df, order), **kwargs)
    set_g_bar(g, join(C.fig_dir, "Deposition figures", "Top graph", "Focal_%s.png" % category));
    
focal_df = focal_df.dropna(subset=["Sparsity"])

kwargs["data"] = focal_df[focal_df["Sparsity"].str.startswith("Sparse")]
for category, order in [("Rim Presence", ["Rim", "Non"])]:
    g = sns.factorplot(x=category, order=lan.get_actual_order(category, focal_df, order), **kwargs)
    set_g_bar(g, join(C.fig_dir, "Deposition figures", "Top graph", "Focal_Sparse_%s.png" % category));
kwargs["data"] = focal_df[focal_df["Sparsity"].str.startswith("Non")]
for category, order in [("Rim Presence", ["Rim", "Non"])]:
    g = sns.factorplot(x=category, order=lan.get_actual_order(category, focal_df, order), **kwargs)
    set_g_bar(g, join(C.fig_dir, "Deposition figures", "Top graph", "Focal_Non-Sparse_%s.png" % category));

infil_df = df.dropna(subset=["Tumor Growth"])
infil_df = infil_df[infil_df["Tumor Growth"].str.contains("Infiltrative")]
for ix, row in infil_df.iterrows():
    infil_df.loc[ix, "Tumor Type"] = lvis.check_column(row["Lesion_id"], master_df, "HCC(0), ICC(1), other(2)", 
                                                  {0: "HCCs", 1: "ICCs", 2: "Metastases"}, "Infiltrative")
    infil_df.loc[ix, "Sparsity"] = lvis.check_sparse(row["Lesion_id"], master_df, modality, "Infiltrative")
kwargs["data"] = infil_df

for category, order in [("Sparsity", ["Sparse", "Non"])]:
    g = sns.factorplot(x=category, order=lan.get_actual_order(category, infil_df, order), **kwargs)
    set_g_bar(g, join(C.fig_dir, "Deposition figures", "Top graph", "Infil_%s.png" % category));

## Prediction of Lipiodol deposition

In [None]:
pattern_df = pd.read_excel(C.data_xls_path, "Patterns")

In [277]:
lesion_id, lesions.index(lesion_id)

('BM-09', 6)

In [5]:
cols = ["T_art", "DICE_art", "T_sub", "DICE_sub"]
T_df = pd.DataFrame(columns=cols)

In [None]:
importlib.reload(lan)
for lesion_id in lesions[0:]:
    print(lesion_id)
    T_df.loc[lesion_id] = lan.get_best_T_lip(lesion_id, target_dir, liplvls[2])

In [380]:
T_df["DICE_art"].mean(), T_df["DICE_sub"].mean()

(0.8375604683744453, 0.832073280132539)

In [279]:
lesion_id = "BM-01"

In [None]:
P = lm.get_paths_dict(lesion_id, target_dir)

In [None]:
art.min()

In [None]:
art = hf.nii_load(P['ct24Tx']['mrbl']['art'])[0]

In [None]:
art[M != 0].min()

In [None]:
img = masks.crop_img_to_mask_vicinity(P['ct24Tx']['mrbl']['art'], P['ct24Tx']['crop']['tumor'])

In [None]:
img = masks.draw_mask(P['ct24Tx']['crop']['tumor'], P['ct24Tx']['mrbl']['art']);

In [None]:
img.min()

In [None]:
hf.draw_slices(img)

In [238]:
ct = hf.nii_load(P['ct24Tx']['crop']['img'])[0]
M = masks.get_mask(P['ct24Tx']['crop']['tumor'])[0]
ct[M != 0] = np.nan
ct_U = ct >= T_lip
ct_L = ct < T_lip
art = hf.nii_load(P['ct24Tx']['mrbl']['art'])[0].astype(int)
sub = hf.nii_load(P['ct24Tx']['mrbl']['sub'])[0].astype(int)

  after removing the cwd from sys.path.
  """


In [None]:
M.sum()/M.max()

In [None]:
ct[M != 0] = np.nan

In [None]:
(~np.isnan(ct)).sum()

In [None]:
(ct < 99999).sum()

In [None]:
T_df

In [161]:
lm.reg_to_ct24(lesion_id, target_dir)

In [204]:
lesion_id

'PK-03B'