In [None]:
%load_ext autoreload 
%autoreload 2

In [None]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt 

# Data classes

In [None]:
from cbviz.cbviz.utils import DataNum

In [None]:
all = pd.read_csv('./Metabolite-logFC-Signatures.csv', index_col=0)
df = all.iloc[:, [0,2]].copy()

dn = DataNum(df, ncols=2)

In [None]:
expected = np.repeat('floating', 1)
observed = df.apply(infer_dtype).values
type(expected)

In [None]:
s1 = np.random.permutation(np.repeat(['WT', 'Balanced', 'Minor', 'Major'], (30, 80, 50, 30)))
s2 = np.repeat(['Basal-like', "Classical"], (57, 133))
x = [y for x in [np.random.normal(loc=i, size=s) for i, s in zip((2, -2) , (57, 133)) ] for y in x]

df2 = pd.DataFrame({"x" : x, "s1" : s1, "s2" : s2})
df2['s1'] = df2['s1'].astype('category')

In [None]:
from cbviz.cbviz.utils import DataMix

In [None]:
DataMix(df2, ncat=2)

# KDE related 

## Split violin 

In [None]:
s1 = np.random.permutation(np.repeat(['WT', 'Balanced', 'Minor', 'Major'], (30, 80, 50, 30)))
s2 = np.repeat(['Basal-like', "Classical"], (57, 133))
x = [y for x in [np.random.normal(loc=i, size=s) for i, s in zip((2, -2) , (57, 133)) ] for y in x]

data = pd.DataFrame({"x" : x, "s1" : s1, "s2" : s2})

In [None]:
from cbviz.cbviz.kdetools import SplitViolin

In [None]:
sp = SplitViolin(data, s1_order=['WT', 'Balanced', 'Minor', 'Major'], s2_order=['Classical', 'Basal-like'])

In [None]:
plt.style.use('cviz')
fig, ax = plt.subplots(figsize=(3,2.5))

for kde in sp.get_violins(colors=('cornflowerblue', 'salmon')):

    ax.fill_betweenx(kde.density, kde.grid, facecolor=kde.color, alpha=0.5)
    ax.plot(kde.mode.ycoords, kde.mode.xcoords, lw=0.5, c='k')

ax.set_yticks(*sp.get_s1_ticks())

leg = ax.legend(handles=sp.get_s2_legend(colors=('cornflowerblue', 'salmon')), 
                loc='upper left', handlelength=0.4,
                bbox_to_anchor=(0, 1.0, 0, 0.15), ncol=2, fontsize='x-small')
leg.get_frame().set_linewidth(0.2)

[ax.axhline(i, lw=0.5, ls=":", c='0.5') for i in (sp.get_s1_ticks()[0])]
ax.set_xlabel(sp.ylabel)
# fig.savefig('./test/Test-SplitViolin.pdf')

## Ridge plot

In [None]:
s1 = np.repeat(['WT', 'Balanced', 'Minor', 'Major'], (30, 80, 50, 30))
x = [y for x in [np.random.normal(loc=i, size=s) for i, s in zip(range(4) , (30, 80, 50, 30)) ] for y in x]

data = pd.DataFrame({"x" : x, "s1" : s1})
data

In [None]:
from cbviz.cbviz.kdetools import Ridge

In [None]:
rp = Ridge(data, s1_order=['WT','Balanced', 'Minor', 'Major'], scale_factor=1.2)

In [None]:
plt.style.use('cviz')
fig, ax = plt.subplots(figsize=(3,2.5))

for kde in rp.get_kdes():

    ax.fill_betweenx(kde.density, kde.grid, facecolor=kde.color, alpha=0.3)
    ax.plot(kde.mode.ycoords, kde.mode.xcoords, lw=0.1, c='k')

ax.set_yticks(*rp.get_s1_ticks())
ax.set_xlabel(rp.ylabel)
[ax.axhline(i, ls=':', lw=0.25, c='0.15') for i in rp.get_s1_ticks()[0]]

# Scatterplots

## XYview

In [None]:
metabolites = ['L-Cystine', 'Cysteine', 'NADH']

In [None]:
all = pd.read_csv('./Metabolite-logFC-Signatures.csv', index_col=0)
df = all.iloc[:, [0,2]].copy()

In [None]:
from cbviz.cbviz.xytools import XYview
from adjustText import adjust_text

In [None]:
cond = df.index.isin(metabolites)
xv = XYview(df, highlight=metabolites, s=np.where(cond, 40, 10), c=np.where(cond, 'r', '.5'), alpha=1)

In [None]:
!pip install adjustText

In [None]:
plt.style.use('cviz')

fig, ax = plt.subplots(figsize=(3,3))

ax.scatter(xv.x, xv.y, **xv.scatter_kw)
xv.add_correlation(fontsize='small')
xv.add_reg_line(color='cornflowerblue', lw=1)
xv.label_dots(adjust=True, fontsize='small')
xv.label_xy()
fig.savefig('./test/Test-XYview.pdf')

## XYpairs

In [None]:
from cbviz.cbviz.xytools import XYpairs

In [None]:
metabolites = ['L-Cystine', 'Cysteine', 'NADH']

In [None]:
all = pd.read_csv('./Metabolite-logFC-Signatures.csv', index_col=0)

In [None]:
np.ptp(all.values.ravel()) * 0.02

In [None]:
all = all.iloc[:, [1,0,3,2]]

In [None]:
cond = all.index.isin(metabolites)
xp = XYpairs(all, lower_tri=False)

In [None]:
plt.style.use('cviz')

fig = plt.figure(figsize=(5, 5))

gs = xp.add_gridspec(hspace=0.3, wspace=0.4)

pairs = xp.get_pairs(highlight=metabolites, s=np.where(cond, 40, 10), c=np.where(cond, 'r', '0.5'), alpha=np.where(cond, 1, 0.5))
for pair in pairs:
    ax = fig.add_subplot(gs[pair.position])
    ax.tick_params(labelsize='xx-small', pad=0)
    xv = pair.XYview
    ax.scatter(xv.x, xv.y, **xv.scatter_kw)
    xv.add_reg_line()
    xv.label_dots(adjust=True, fontsize='x-small')
    if pair.combo == ['FG_effect_sgNT', 'sgARNTL2_effect_FG']:
        xv.add_correlation(loc=1)
    else:
        xv.add_correlation(loc=4)
    xv.label_xy(fontsize='x-small', outer=True)

## XYzoom

In [None]:
metabolites = ['L-Cystine', 'Cysteine', 'NADH']
all = pd.read_csv('./Metabolite-logFC-Signatures.csv', index_col=0)
df = all.iloc[:, [0,2]].copy()

In [None]:
from cbviz.cbviz.xytools import XYzoom

In [None]:
xz = XYzoom(df, ('min', 0), ('min', 0))

In [None]:
top5 = xz.zoom_data.mean(1).nsmallest(5).index.to_list()
cond = xz.data.df.index.isin(top5)

In [None]:
top5

In [None]:
plt.style.use('cviz')

xz = XYzoom(df, ('min', 0), ('min', 0))

top5 = xz.zoom_data.mean(1).nsmallest(5).index.to_list()
cond = xz.data.df.index.isin(top5)

fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(4,2))

ax2.scatter(xz.x, xz.y, **xz.scatter_kw)
xz.add_xy_line(ax2)
xz.add_rect(ax=ax2)
ax1.scatter(xz.x, xz.y, s=np.where(cond, 20, 5), c=np.where(cond, 'r', '.5'), alpha=0.5, linewidths=0)
xz.connect(ax2, ax1, 'left', lw=0.5, ls=':')
xz.label_xy(ax=ax1, outer=False, fontsize='x-small')
xz.label_dots(top5, ax=ax1, adjust=True, adjust_kwargs={'arrowprops':{'arrowstyle':'-', 'lw':0.2}}, fontsize=4)

# Boxplots

## Box plus strip plot

In [None]:
s1 = np.repeat(['WT', 'Balanced', 'Minor', 'Major'], (30, 80, 50, 30))
x = [y for x in [np.random.normal(loc=i, size=s) for i, s in zip(range(4) , (30, 80, 50, 30)) ] for y in x]

data = pd.DataFrame({"x" : x, "s1" : s1})
data

In [None]:
from cbviz.cbviz.bptools import StripBox

In [None]:
sb = StripBox(data, p_method_global='Anova', s1_order=['WT', 'Balanced', 'Minor', 'Major'])

In [None]:
plt.style.use('cviz')

fig, ax = plt.subplots(figsize=(2.5,2.5))

sb.boxplt()
sb.add_strips(s=20, alpha=0.50, linewidths=0)
sb.add_global_p(fontsize='xx-small')

In [None]:
sb.calc_pairwise_p(adj_method='fdr_bh')

In [None]:
plt.style.use('cviz')

fig, ax = plt.subplots(figsize=(3, 2.5))

sb.boxplt()
sb.add_strips(s=20, alpha=0.50, linewidths=0)
sb.add_pair_p(groupA='WT', groupB='Minor', yoffset=0.5, cut_p=True)

In [None]:
from scipy.stats import f_oneway, ttest_ind

In [None]:
x, y = [np.random.normal(i, size=100) for i in range(2)]

In [None]:
data['s1'] = data['s1'].astype('category')

In [None]:
from pandas.api.types import is_categorical_dtype
def cat_cleaner(series):
    if is_categorical_dtype(series):
        return series.cat.remove_unused_categories()
    else:
        return series

In [None]:
data[data['s1'].isin(['WT', 'Balanced'])]['s1']#.apply(cat_cleaner)

In [None]:
data[data['s1'].isin(['WT', 'Balanced'])].apply(cat_cleaner)['s1']

# Dotplot

In [10]:
df = pd.read_csv('./Dotplot_input.csv')
df.columns

Index(['x', 'y', 'fdr', 'med_tstat'], dtype='object')

In [None]:
hm = pd.read_table('/home/carlo/Dokumente/Data/HALLMARK_related/Hallmark_Sets_PerCategory.tsv')
hm = hm[hm['Name'].isin(df['y'])]
df['y'] = df['y'].astype('category').cat.reorder_categories(hm['Name'].to_list())

In [15]:
fdr = df.pivot(index='y', columns='x', values='fdr')

In [16]:
fdr

x,G1,G2,G3,G4,G5,G6
y,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
ADIPOGENESIS,0.0003767007,0.01940215,0.001124407,,0.0001320872,
ANDROGEN_RESPONSE,,,6.162847e-07,0.007194684,3.189602e-07,
ANGIOGENESIS,0.01919173,0.03733542,0.04377584,,0.001157737,0.013327
APICAL_JUNCTION,0.0001538726,4.630886e-08,6.96565e-10,2.064868e-06,9.833112e-06,0.006818
APICAL_SURFACE,,,3.122347e-05,0.02643221,0.001920122,
APOPTOSIS,0.0003238057,0.002585901,6.048752e-06,0.0001838375,8.58318e-05,
CHOLESTEROL_HOMEOSTASIS,,,0.0008666621,0.02643221,0.01527341,0.024813
COAGULATION,,0.02689477,1.751366e-08,0.03912711,3.1378e-05,0.013327
COMPLEMENT,,0.006335349,6.371552e-06,,0.0001900031,
E2F_TARGETS,1.462642e-16,6.544159e-05,5.661871e-06,2.523119e-41,3.672083e-18,7e-06


In [None]:
fdr.values.ravel()

In [None]:
xx, yy = np.meshgrid(range(6), range(34))

In [None]:
progr = df.pivot(index='y', columns='x', values='med_tstat')
cls = progr.values.ravel()

In [None]:
xx, yy = [arr.ravel() for arr in np.meshgrid(range(6), range(34))]

In [None]:
df.nunique()

In [None]:
from itertools import product

In [36]:
df[list(('x', 'y'))]

Unnamed: 0,x,y
0,G3,APICAL_JUNCTION
1,G2,APICAL_JUNCTION
2,G4,APICAL_JUNCTION
3,G5,APICAL_JUNCTION
4,G1,APICAL_JUNCTION
...,...,...
152,G4,TGF_BETA_SIGNALING
153,G3,TNFA_SIGNALING_VIA_NFKB
154,G2,TNFA_SIGNALING_VIA_NFKB
155,G4,TNFA_SIGNALING_VIA_NFKB


In [33]:
sum([var is None for var in ['x', 'y', 'fdr', None]])

1

In [21]:
[(*x, *y) for x in list(product(['string', 'categorical'], repeat=2))for y in list(product(['floating', 'integer'], repeat=1))]

[('string', 'string', 'floating'),
 ('string', 'string', 'integer'),
 ('string', 'categorical', 'floating'),
 ('string', 'categorical', 'integer'),
 ('categorical', 'string', 'floating'),
 ('categorical', 'string', 'integer'),
 ('categorical', 'categorical', 'floating'),
 ('categorical', 'categorical', 'integer')]

In [24]:
from pandas.api.types import infer_dtype

In [25]:
infer_dtype(pd.Series(range(100)))

'integer'

In [None]:
df[['x', 'y']].reset_index().pivot('y', 'x')

In [None]:
sz = pd.cut(-np.log10(fdr.values.ravel()), bins=[-np.log10(i) for i in [1, 0.05, 1e-5, 1e-10, 1e-40, 1e-100]], include_lowest=True, labels=[0, 20, 40, 60, 80])

In [None]:
from matplotlib.lines import Line2D

In [None]:
sz

In [None]:
handles = [Line2D([0], [0], marker='o', color='w', markerfacecolor='k', markersize=i, label=l) for i, l in zip((4, 8, 10, 12), ('<= 0.05', '<= 1e-5', '<= 1e-10', '<= 1e-40'))]

In [None]:
from matplotlib.colors import CenteredNorm

In [None]:
fig, ax = plt.subplots(figsize=(2,6))
# sz = -np.log10(fdr.values.ravel())*10
sp = ax.scatter(xx, yy, s=sz, c=cls, cmap=plt.cm.coolwarm, norm=CenteredNorm(vcenter=0))
ax.set_xlim(-0.5, 5.5)
ax.set_ylim(-0.5, 34)
ax.set_yticks(np.arange(34), fdr.index.to_list());
ax.set_xticks(np.arange(6), fdr.columns.to_list());
ax.legend(handles = handles, bbox_to_anchor=(1, 0.8, 0.1, 0.2), fontsize='small', handletextpad=0)
cax = fig.add_axes([0.95, 0.65, 0.35, 0.03])
fig.colorbar(sp, cax=cax, orientation='horizontal')