In [2]:
%load_ext autoreload 
%autoreload 2

In [1]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt 

# Data classes

In [None]:
from cbviz.cbviz.utils import DataNum

In [None]:
all = pd.read_csv('./Metabolite-logFC-Signatures.csv', index_col=0)
df = all.iloc[:, [0,2]].copy()

dn = DataNum(df, ncols=2)

In [None]:
expected = np.repeat('floating', 1)
observed = df.apply(infer_dtype).values
type(expected)

In [None]:
s1 = np.random.permutation(np.repeat(['WT', 'Balanced', 'Minor', 'Major'], (30, 80, 50, 30)))
s2 = np.repeat(['Basal-like', "Classical"], (57, 133))
x = [y for x in [np.random.normal(loc=i, size=s) for i, s in zip((2, -2) , (57, 133)) ] for y in x]

df2 = pd.DataFrame({"x" : x, "s1" : s1, "s2" : s2})
df2['s1'] = df2['s1'].astype('category')

In [None]:
from cbviz.cbviz.utils import DataMix

In [None]:
DataMix(df2, ncat=2)

In [None]:
df = pd.DataFrame({'Age':np.random.randint(low=40, high=90, size=50), 'group':np.repeat(('ctrl', 'polyp'), (25, 25))})

In [None]:
DataMix(df)

# KDE related 

## Split violin 

In [None]:
s1 = np.random.permutation(np.repeat(['WT', 'Balanced', 'Minor', 'Major'], (30, 80, 50, 30)))
s2 = np.repeat(['Basal-like', "Classical"], (57, 133))
x = [y for x in [np.random.normal(loc=i, size=s) for i, s in zip((2, -2) , (57, 133)) ] for y in x]

data = pd.DataFrame({"x" : x, "s1" : s1, "s2" : s2})

In [None]:
from cbviz.cbviz.kdetools import SplitViolin

In [None]:
sp = SplitViolin(data, s1_order=['WT', 'Balanced', 'Minor', 'Major'], s2_order=['Classical', 'Basal-like'])

In [None]:
plt.style.use('cviz')
fig, ax = plt.subplots(figsize=(3,2.5))

for kde in sp.get_violins(colors=('cornflowerblue', 'salmon')):

    ax.fill_betweenx(kde.density, kde.grid, facecolor=kde.color, alpha=0.5)
    ax.plot(kde.mode.ycoords, kde.mode.xcoords, lw=0.5, c='k')

ax.set_yticks(*sp.get_s1_ticks())

leg = ax.legend(handles=sp.get_s2_legend(colors=('cornflowerblue', 'salmon')), 
                loc='upper left', handlelength=0.4,
                bbox_to_anchor=(0, 1.0, 0, 0.15), ncol=2, fontsize='x-small')
leg.get_frame().set_linewidth(0.2)

[ax.axhline(i, lw=0.5, ls=":", c='0.5') for i in (sp.get_s1_ticks()[0])]
ax.set_xlabel(sp.ylabel)
# fig.savefig('./test/Test-SplitViolin.pdf')

## Ridge plot

In [None]:
s1 = np.repeat(['WT', 'Balanced', 'Minor', 'Major'], (30, 80, 50, 30))
x = [y for x in [np.random.normal(loc=i, size=s) for i, s in zip(range(4) , (30, 80, 50, 30)) ] for y in x]

data = pd.DataFrame({"x" : x, "s1" : s1})
data

In [None]:
from cbviz.cbviz.kdetools import Ridge

In [None]:
rp = Ridge(data, s1_order=['WT','Balanced', 'Minor', 'Major'], scale_factor=1.2)

In [None]:
plt.style.use('cviz')
fig, ax = plt.subplots(figsize=(3,2.5))

for kde in rp.get_kdes():

    ax.fill_betweenx(kde.density, kde.grid, facecolor=kde.color, alpha=0.3)
    ax.plot(kde.mode.ycoords, kde.mode.xcoords, lw=0.1, c='k')

ax.set_yticks(*rp.get_s1_ticks())
ax.set_xlabel(rp.ylabel)
[ax.axhline(i, ls=':', lw=0.25, c='0.15') for i in rp.get_s1_ticks()[0]]

# Scatterplots

## XYview

In [None]:
metabolites = ['L-Cystine', 'Cysteine', 'NADH']

In [None]:
all = pd.read_csv('./Metabolite-logFC-Signatures.csv', index_col=0)
df = all.iloc[:, [0,2]].copy()

In [None]:
from cbviz.cbviz.xytools import XYview
from adjustText import adjust_text

In [None]:
cond = df.index.isin(metabolites)
xv = XYview(df, highlight=metabolites, s=np.where(cond, 40, 10), c=np.where(cond, 'r', '.5'), alpha=1)

In [None]:
!pip install adjustText

In [None]:
plt.style.use('cviz')

fig, ax = plt.subplots(figsize=(3,3))

ax.scatter(xv.x, xv.y, **xv.scatter_kw)
xv.add_correlation(fontsize='small')
xv.add_reg_line(color='cornflowerblue', lw=1)
xv.label_dots(adjust=True, fontsize='small')
xv.label_xy()
fig.savefig('./test/Test-XYview.pdf')

## XYpairs

In [None]:
from cbviz.cbviz.xytools import XYpairs

In [None]:
metabolites = ['L-Cystine', 'Cysteine', 'NADH']

In [None]:
all = pd.read_csv('./Metabolite-logFC-Signatures.csv', index_col=0)

In [None]:
np.ptp(all.values.ravel()) * 0.02

In [None]:
all = all.iloc[:, [1,0,3,2]]

In [None]:
cond = all.index.isin(metabolites)
xp = XYpairs(all, lower_tri=False)

In [None]:
plt.style.use('cviz')

fig = plt.figure(figsize=(5, 5))

gs = xp.add_gridspec(hspace=0.3, wspace=0.4)

pairs = xp.get_pairs(highlight=metabolites, s=np.where(cond, 40, 10), c=np.where(cond, 'r', '0.5'), alpha=np.where(cond, 1, 0.5))
for pair in pairs:
    ax = fig.add_subplot(gs[pair.position])
    ax.tick_params(labelsize='xx-small', pad=0)
    xv = pair.XYview
    ax.scatter(xv.x, xv.y, **xv.scatter_kw)
    xv.add_reg_line()
    xv.label_dots(adjust=True, fontsize='x-small')
    if pair.combo == ['FG_effect_sgNT', 'sgARNTL2_effect_FG']:
        xv.add_correlation(loc=1)
    else:
        xv.add_correlation(loc=4)
    xv.label_xy(fontsize='x-small', outer=True)

## XYzoom

In [None]:
metabolites = ['L-Cystine', 'Cysteine', 'NADH']
all = pd.read_csv('./Metabolite-logFC-Signatures.csv', index_col=0)
df = all.iloc[:, [0,2]].copy()

In [None]:
from cbviz.cbviz.xytools import XYzoom

In [None]:
xz = XYzoom(df, ('min', 0), ('min', 0))

In [None]:
top5 = xz.zoom_data.mean(1).nsmallest(5).index.to_list()
cond = xz.data.df.index.isin(top5)

In [None]:
top5

In [None]:
plt.style.use('cviz')

xz = XYzoom(df, ('min', 0), ('min', 0))

top5 = xz.zoom_data.mean(1).nsmallest(5).index.to_list()
cond = xz.data.df.index.isin(top5)

fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(4,2))

ax2.scatter(xz.x, xz.y, **xz.scatter_kw)
xz.add_xy_line(ax2)
xz.add_rect(ax=ax2)
ax1.scatter(xz.x, xz.y, s=np.where(cond, 20, 5), c=np.where(cond, 'r', '.5'), alpha=0.5, linewidths=0)
xz.connect(ax2, ax1, 'left', lw=0.5, ls=':')
xz.label_xy(ax=ax1, outer=False, fontsize='x-small')
xz.label_dots(top5, ax=ax1, adjust=True, adjust_kwargs={'arrowprops':{'arrowstyle':'-', 'lw':0.2}}, fontsize=4)

# Boxplots

## Box plus strip plot

In [None]:
s1 = np.repeat(['WT', 'Balanced', 'Minor', 'Major'], (30, 80, 50, 30))
x = [y for x in [np.random.normal(loc=i, size=s) for i, s in zip(range(4) , (30, 80, 50, 30)) ] for y in x]

data = pd.DataFrame({"x" : x, "s1" : s1})
data

In [None]:
from cbviz.cbviz.bptools import StripBox

In [None]:
sb = StripBox(data, p_method_global='Anova', s1_order=['WT', 'Balanced', 'Minor', 'Major'])

In [None]:
plt.style.use('cviz')

fig, ax = plt.subplots(figsize=(2.5,2.5))

sb.boxplt()
sb.add_strips(s=20, alpha=0.50, linewidths=0)
sb.add_global_p(fontsize='xx-small')

In [None]:
sb.calc_pairwise_p(adj_method='fdr_bh')

In [None]:
plt.style.use('cviz')

fig, ax = plt.subplots(figsize=(3, 2.5))

sb.boxplt()
sb.add_strips(s=20, alpha=0.50, linewidths=0)
sb.add_pair_p(groupA='WT', groupB='Minor', yoffset=0.5, cut_p=True)

In [None]:
from scipy.stats import f_oneway, ttest_ind

In [None]:
x, y = [np.random.normal(i, size=100) for i in range(2)]

In [None]:
data['s1'] = data['s1'].astype('category')

In [None]:
from pandas.api.types import is_categorical_dtype
def cat_cleaner(series):
    if is_categorical_dtype(series):
        return series.cat.remove_unused_categories()
    else:
        return series

In [None]:
data[data['s1'].isin(['WT', 'Balanced'])]['s1']#.apply(cat_cleaner)

In [None]:
data[data['s1'].isin(['WT', 'Balanced'])].apply(cat_cleaner)['s1']

# Dotplot

In [None]:
df = pd.read_csv('./Dotplot_input.csv')

In [None]:
from cbviz.cbviz.xytools import Dotplot

In [None]:
hm = pd.read_table('/home/carlo/Documents/Data/HALLMARK_related/Hallmark_Sets_PerCategory.tsv')
hm = hm[hm['Name'].isin(df['y'])]
df['y'] = df['y'].astype('category').cat.reorder_categories(hm['Name'].to_list())

In [None]:
hm.sort_values(['Process'], [])

In [None]:
df['y']

In [None]:
dp = Dotplot(df, 'x', 'y', 'fdr', 'med_tstat')

In [None]:
dp.cut_size(bins=[0, 1e-20, 1e-5, 0.05, 1], sizes_out=(0, 5, 20, 50), reverse=True)

In [None]:
dp.bins

In [None]:
from matplotlib.colors import CenteredNorm

In [None]:
plt.style.use('cviz')
fig, ax = plt.subplots(figsize=(1.5, 5))

sp = ax.scatter(dp.x, dp.y, s=dp.size_cut, c=dp.color_raw, cmap=plt.cm.coolwarm, norm=CenteredNorm(0))
dp.set_ticklabels(fontsize='x-small')
ax.set_xlim(-0.5, 5.5)
leg = ax.legend(handles=dp.get_size_handles(reverse=True, marker_sizes=(4, 7, 9)), bbox_to_anchor=(1, 1, 0.2, 0), fontsize='x-small')
leg.get_frame().set_linewidth(0.4)
cax = fig.add_axes([1, 0.7, 0.4, 0.03])
cax.set_xticks([-2.5, 0, 2.5], [-2.5, 0, 2.5], fontsize='x-small')
cax.set_title('Progression stat', loc='left', fontsize='xx-small')
fig.colorbar(sp, cax=cax, orientation='horizontal')
fig.savefig('Figures/DotPlot-HALLMARKvsRPcluster.pdf')

# Corrplot

In [16]:
df = pd.read_csv('./Corrplot_input.csv')

In [19]:
df.corr(method='spearman')

Unnamed: 0,Albumin,AP,Bilirubin,Harnstoff,Ca,Cholesterin,Krea,CRP,GGT,Glucose,...,HDL,K,LDH,LDL,Mg,Na,Ph,Eiweiß,Triglyceride,GFR
Albumin,1.0,0.076034,0.212077,0.178567,0.560017,0.40208,0.187759,-0.138873,0.102578,0.096729,...,0.39987,-0.029486,0.01944,0.283477,0.50634,0.226556,0.223297,0.686272,-0.04578,-0.068425
AP,0.076034,1.0,0.03577,0.24493,0.048402,0.026146,0.127143,0.315335,0.300764,0.26399,...,-0.147838,-0.033569,0.087214,0.058111,0.064255,0.068347,0.202417,0.253042,0.241931,-0.228731
Bilirubin,0.212077,0.03577,1.0,0.028606,0.192427,-0.024652,0.222985,0.083903,0.33616,0.079386,...,0.010779,-0.044543,0.167542,0.019142,0.138174,-0.000204,-0.094205,0.179917,-0.065004,-0.101946
Harnstoff,0.178567,0.24493,0.028606,1.0,0.036841,0.151802,0.607021,0.185489,0.159764,0.223356,...,0.078934,0.112081,0.127898,0.107721,0.049889,0.125243,0.023701,0.097734,0.131096,-0.642785
Ca,0.560017,0.048402,0.192427,0.036841,1.0,0.353741,-0.037106,-0.070424,0.06515,0.010031,...,0.203992,0.190663,0.001358,0.244891,0.241927,0.305719,0.285881,0.601331,-0.050764,0.06389
Cholesterin,0.40208,0.026146,-0.024652,0.151802,0.353741,1.0,0.076844,0.009114,0.125092,0.114806,...,0.244159,0.164351,-0.088293,0.905024,0.3598,0.200927,0.060853,0.456088,0.24566,-0.121166
Krea,0.187759,0.127143,0.222985,0.607021,-0.037106,0.076844,1.0,0.099329,0.272895,0.108529,...,0.135056,0.15009,0.098182,0.046728,0.181645,0.062578,-0.121822,0.0309,0.150596,-0.827931
CRP,-0.138873,0.315335,0.083903,0.185489,-0.070424,0.009114,0.099329,1.0,0.283574,0.24532,...,-0.089744,-0.020259,0.047983,-0.037004,-0.06293,-0.111295,0.110545,0.127321,0.162654,-0.181747
GGT,0.102578,0.300764,0.33616,0.159764,0.06515,0.125092,0.272895,0.283574,1.0,0.21402,...,-0.221981,0.0411,0.135209,0.125207,0.002218,0.086436,-0.102864,0.234131,0.401616,-0.166622
Glucose,0.096729,0.26399,0.079386,0.223356,0.010031,0.114806,0.108529,0.24532,0.21402,1.0,...,-0.041467,0.044744,0.220963,0.093657,-0.008635,0.010976,-0.063787,0.112233,0.273941,-0.13758


In [16]:
lab.columns = lab.columns.str.split('(').str[0]

In [17]:
lab.drop('Cl', axis=1, inplace=True)

In [19]:
lab.to_csv('./Corrplot_input.csv')

In [None]:
df = pd.read_csv('./Dotplot_input.csv')

In [None]:
from cbviz.cbviz.xytools import Dotplot

In [None]:
hm = pd.read_table('/home/carlo/Documents/Data/HALLMARK_related/Hallmark_Sets_PerCategory.tsv')
hm = hm[hm['Name'].isin(df['y'])]
df['y'] = df['y'].astype('category').cat.reorder_categories(hm['Name'].to_list())

In [None]:
hm.sort_values(['Process'], [])

In [None]:
df['y']

In [None]:
dp = Dotplot(df, 'x', 'y', 'fdr', 'med_tstat')

In [None]:
dp.cut_size(bins=[0, 1e-20, 1e-5, 0.05, 1], sizes_out=(0, 5, 20, 50), reverse=True)

In [None]:
dp.bins

In [None]:
from matplotlib.colors import CenteredNorm

In [None]:
plt.style.use('cviz')
fig, ax = plt.subplots(figsize=(1.5, 5))

sp = ax.scatter(dp.x, dp.y, s=dp.size_cut, c=dp.color_raw, cmap=plt.cm.coolwarm, norm=CenteredNorm(0))
dp.set_ticklabels(fontsize='x-small')
ax.set_xlim(-0.5, 5.5)
leg = ax.legend(handles=dp.get_size_handles(reverse=True, marker_sizes=(4, 7, 9)), bbox_to_anchor=(1, 1, 0.2, 0), fontsize='x-small')
leg.get_frame().set_linewidth(0.4)
cax = fig.add_axes([1, 0.7, 0.4, 0.03])
cax.set_xticks([-2.5, 0, 2.5], [-2.5, 0, 2.5], fontsize='x-small')
cax.set_title('Progression stat', loc='left', fontsize='xx-small')
fig.colorbar(sp, cax=cax, orientation='horizontal')
fig.savefig('Figures/DotPlot-HALLMARKvsRPcluster.pdf')

In [None]:
df = pd.read_csv('./Dotplot_input.csv')

In [None]:
from cbviz.cbviz.xytools import Dotplot

In [None]:
hm = pd.read_table('/home/carlo/Documents/Data/HALLMARK_related/Hallmark_Sets_PerCategory.tsv')
hm = hm[hm['Name'].isin(df['y'])]
df['y'] = df['y'].astype('category').cat.reorder_categories(hm['Name'].to_list())

In [None]:
hm.sort_values(['Process'], [])

In [None]:
df['y']

In [None]:
dp = Dotplot(df, 'x', 'y', 'fdr', 'med_tstat')

In [None]:
dp.cut_size(bins=[0, 1e-20, 1e-5, 0.05, 1], sizes_out=(0, 5, 20, 50), reverse=True)

In [None]:
dp.bins

In [None]:
from matplotlib.colors import CenteredNorm

In [None]:
plt.style.use('cviz')
fig, ax = plt.subplots(figsize=(1.5, 5))

sp = ax.scatter(dp.x, dp.y, s=dp.size_cut, c=dp.color_raw, cmap=plt.cm.coolwarm, norm=CenteredNorm(0))
dp.set_ticklabels(fontsize='x-small')
ax.set_xlim(-0.5, 5.5)
leg = ax.legend(handles=dp.get_size_handles(reverse=True, marker_sizes=(4, 7, 9)), bbox_to_anchor=(1, 1, 0.2, 0), fontsize='x-small')
leg.get_frame().set_linewidth(0.4)
cax = fig.add_axes([1, 0.7, 0.4, 0.03])
cax.set_xticks([-2.5, 0, 2.5], [-2.5, 0, 2.5], fontsize='x-small')
cax.set_title('Progression stat', loc='left', fontsize='xx-small')
fig.colorbar(sp, cax=cax, orientation='horizontal')
fig.savefig('Figures/DotPlot-HALLMARKvsRPcluster.pdf')