In [None]:
from pathlib import Path

import numpy as np
import pandas as pd

from sklearn import preprocessing

import seaborn as sns
import matplotlib.pyplot as plt

from bioinf_common.plotting import corrplot

In [None]:
sns.set_context('talk')

# Parameters

In [None]:
expr_fname = snakemake.input.expr_fname
info_fname = snakemake.input.info_fname

out_dir = Path(snakemake.output.out_dir)

# Load data

In [None]:
df_expr = pd.read_csv(expr_fname, dtype={'node': str}).set_index('node')
df_expr.head()

In [None]:
df_info = pd.read_csv(info_fname)
df_info.head()

# Correlation clustermap

In [None]:
df_node_colors = pd.DataFrame({
    'node': df_expr.index,
    'idx': preprocessing.LabelEncoder().fit_transform([n.split('_')[0] for n in df_expr.index])
}).set_index('node')

pal = sns.color_palette('tab10')
df_node_colors['color'] = df_node_colors['idx'].apply(lambda x: pal[x])
df_node_colors.drop(columns=['idx'], inplace=True)

df_node_colors.head()

In [None]:
g = sns.clustermap(
    df_expr.T.corr(),
    xticklabels=True, yticklabels=True,
    row_colors=df_node_colors, col_colors=df_node_colors
)

g.ax_heatmap.set_xticklabels(g.ax_heatmap.get_xmajorticklabels(), fontsize=8)
g.ax_heatmap.set_yticklabels(g.ax_heatmap.get_ymajorticklabels(), fontsize=8)

g.savefig(out_dir / 'clustermap.pdf')

# Compare expression levels between conditions

In [None]:
# node_selection = np.random.choice(df_expr.index, 1, replace=False)[0]
node_selection = df_expr.index[-1]  # downstream node which should exhibit large effect

In [None]:
for condition, group in df_info.groupby('condition'):
    sns.distplot(df_expr.loc[node_selection, group['sample']].values.ravel(), kde=False, label=condition)
    
plt.title(node_selection)
plt.legend(loc='best')