In [None]:
import pandas as pd
import sqlite3
import matplotlib.pyplot as plt
import matplotlib as mpl
import numpy as np
import seaborn as sns

In [None]:
color_map = {'acarbose': 'goldenrod', 'control': 'darkblue',
             'UM': 'darkblue', 'UT': 'darkgreen',
             'male': 'blue', 'female': 'magenta',
             'C2013': 'blue', 'Glenn': 'red',
             'B1A': 'lightblue', 'B1B': 'lightgreen',
             'untrusted': 'grey',
             0: 'white', 1: 'black'}

In [None]:
con = sqlite3.connect('data/core.muri.2.denorm.db')

cvrg_ratio_B1A = pd.read_sql(
    """
    SELECT feature_id, coverage_ratio
    FROM variant_cross_coverage
    WHERE genome_id = 'B1A'
    """,
    index_col=['feature_id'],
    con=con).squeeze()

_min = -3.5
_max = 1
bins = np.logspace(_min, _max, num=int((_max - _min) * 10 + 1))
plt.hist(cvrg_ratio_B1A + bins[0], bins=bins)
plt.axvline(1/5, lw=1, linestyle='--', color='k')

plt.xscale('log')
#plt.yscale('symlog')

In [None]:
con = sqlite3.connect('data/core.muri.2.denorm.db')

cvrg_ratio_B1B = pd.read_sql(
    """
    SELECT feature_id, coverage_ratio
    FROM variant_cross_coverage
    WHERE genome_id = 'B1B'
    """,
    index_col=['feature_id'],
    con=con).squeeze()

_min = -3.5
_max = 1
bins = np.logspace(_min, _max, num=int((_max - _min) * 10 + 1))
plt.hist(cvrg_ratio_B1B + bins[0], bins=bins)
plt.axvline(1/5, lw=1, linestyle='--', color='k')
plt.xscale('log')
#plt.yscale('symlog')

In [None]:
library = pd.read_sql(
    """
    SELECT *
    FROM library
    JOIN extraction USING (extraction_id)
    JOIN sample USING (sample_id)
    JOIN mouse USING (mouse_id)
    """,
    index_col='library_id',
    con=con
)
library.loc['trusted'] = 'untrusted'
library.loc[library_list_B1A, 'trusted'] = 'B1A'
library.loc[library_list_B1B, 'trusted'] = 'B1B'

In [None]:
feature = pd.read_sql(
    """
    SELECT feature_id, sequence_id, genome_id
    FROM feature
    JOIN sequence USING (sequence_id)
    WHERE genome_id IN ('B1A', 'B1B')
    """,
    index_col=['feature_id'],
    con=con
)
feature['sequence_alternating'] = (
    feature.sequence_id.map(
        pd.DataFrame({'sequence_id': feature.sequence_id.unique()})
        .reset_index().set_index('sequence_id')
        .squeeze().apply(lambda x: x % 2)
    )
)

In [None]:
cvrg_B1A = (
    pd.read_sql(
        """
        SELECT feature_id, library_id, coverage
        FROM feature_library_coverage
        JOIN feature USING (feature_id)
        JOIN sequence USING (sequence_id)
        WHERE genome_id = 'B1A'
        """,
    index_col=['feature_id', 'library_id'],
    con=con)
            .squeeze()
            .unstack(fill_value=0)
           )

cvrg_B1B = (
    pd.read_sql(
        """
        SELECT feature_id, library_id, coverage
        FROM feature_library_coverage
        JOIN feature USING (feature_id)
        JOIN sequence USING (sequence_id)
        WHERE genome_id = 'B1B'
        """,
    index_col=['feature_id', 'library_id'],
    con=con)
            .squeeze()
            .unstack(fill_value=0)
           )



with open('data/core.a.mags/B1A.g.library.list') as f:
    library_list_B1A = [line.strip() for line in f]
    
with open('data/core.a.mags/B1B.g.library.list') as f:
    library_list_B1B = [line.strip() for line in f]
    
assert not set(library_list_B1A) & set(library_list_B1B)

In [None]:
# Styles

gene_content_heatmap_kwargs = dict(cmap='copper', norm=mpl.colors.SymLogNorm(1e-1, vmin=0, vmax=10, base=10))

In [None]:
_library_list = list(set(library_list_B1B) & set(cvrg_B1A.columns))

cvrg_B1A_features_in_B1B_libs = cvrg_B1A[_library_list]
median_cvrg_B1A_in_B1B_libs = cvrg_B1A_features_in_B1B_libs.median()
ratio_cvrg_B1A_in_B1B_libs = cvrg_B1A_features_in_B1B_libs / median_cvrg_B1A_in_B1B_libs

sns.heatmap(ratio_cvrg_B1A_in_B1B_libs.T, **gene_content_heatmap_kwargs)

In [None]:
_library_list = list(set(library_list_B1A) & set(cvrg_B1A.columns))

cvrg_B1A_features_in_B1A_libs = cvrg_B1A[_library_list]
median_cvrg_B1A_in_B1A_libs = cvrg_B1A_features_in_B1A_libs.median()
ratio_cvrg_B1A_in_B1A_libs = cvrg_B1A_features_in_B1A_libs / median_cvrg_B1A_in_B1A_libs

sns.heatmap(ratio_cvrg_B1A_in_B1A_libs.T, **gene_content_heatmap_kwargs)

In [None]:
_library_list = list(set(library_list_B1A) & set(cvrg_B1B.columns))

cvrg_B1B_features_in_B1A_libs = cvrg_B1B[_library_list]
median_cvrg_B1B_in_B1A_libs = cvrg_B1B_features_in_B1A_libs.median()
ratio_cvrg_B1B_in_B1A_libs = cvrg_B1B_features_in_B1A_libs / median_cvrg_B1B_in_B1A_libs

sns.heatmap(ratio_cvrg_B1B_in_B1A_libs.T, **gene_content_heatmap_kwargs)

In [None]:
_library_list = list(set(library_list_B1B) & set(cvrg_B1B.columns))

cvrg_B1B_features_in_B1B_libs = cvrg_B1B[_library_list]
median_cvrg_B1B_in_B1B_libs = cvrg_B1B_features_in_B1B_libs.median()
ratio_cvrg_B1B_in_B1B_libs = cvrg_B1B_features_in_B1B_libs / median_cvrg_B1B_in_B1B_libs

sns.heatmap(ratio_cvrg_B1B_in_B1B_libs.T, **gene_content_heatmap_kwargs)

In [None]:
cvrg_B1A_features = cvrg_B1A
median_cvrg_B1A = cvrg_B1A_features.median()
ratio_cvrg_B1A = cvrg_B1A_features / median_cvrg_B1A

sns.clustermap(ratio_cvrg_B1A.T, row_cluster=False, col_cluster=False, **gene_content_heatmap_kwargs)

In [None]:
cvrg_B1B_features = cvrg_B1B
median_cvrg_B1B = cvrg_B1B_features.median()
ratio_cvrg_B1B = cvrg_B1B_features / median_cvrg_B1B

sns.clustermap(ratio_cvrg_B1B.T, row_cluster=False, col_cluster=False, **gene_content_heatmap_kwargs)

In [None]:
dmat_raw = [[ratio_cvrg_B1A_in_B1A_libs, ratio_cvrg_B1B_in_B1A_libs],
        [ratio_cvrg_B1A_in_B1B_libs, ratio_cvrg_B1B_in_B1B_libs]]
xsplit, ysplit = dmat_raw[0][0].shape
dmat = pd.concat([pd.concat([d for d in d1], axis='index') for d1 in dmat_raw], axis='columns')

#fig, axs = plt.subplots(2, 2, figsize=(10, 10))
cg = sns.clustermap(
    dmat.T, xticklabels=0, yticklabels=0,
    row_colors=pd.DataFrame([library.site.map(color_map), library.trusted.map(color_map)]).T,
    col_colors=pd.DataFrame({'MAG': feature.genome_id.map(color_map), 'contig': feature.sequence_alternating.map(color_map)}),
    row_cluster=True, col_cluster=False, metric='cosine',
    cmap='copper', norm=mpl.colors.SymLogNorm(1e-1, vmin=0, vmax=10, base=10),
    cbar_kws=dict(orientation='horizontal', label='relative coverage'),
    cbar_pos=(0.27, 0.02, 0.6, 0.03))
ax = cg.ax_heatmap
ax.axvline(xsplit, color='w')
# ax.axhline(ysplit, color='w')
ax.set_xlabel('')
ax.set_ylabel('')
#ax.set_yticklabels([])

cg.ax_col_colors.annotate(
    'B1A Genes',
    xy=(dmat_raw[0][0].shape[0] / 2, 0.5),
    va='center', ha='center'
)

cg.ax_col_colors.annotate(
    'B1B Genes',
    xy=(dmat_raw[0][0].shape[0] + dmat_raw[0][1].shape[0] / 2, 0.5),
    va='center', ha='center'
)

cg.ax_row_colors.annotate(
    'B1A Libs.',
    xy=(1.6, 36),
    va='center', ha='center', rotation=90,
)

cg.ax_row_colors.annotate(
    'B1B Libs.',
    xy=(1.6, 78),
    va='center', ha='center', rotation=90,
)

In [None]:
dmat_raw = [[ratio_cvrg_B1A, ratio_cvrg_B1B]]
xsplit, ysplit = dmat_raw[0][0].shape
dmat = pd.concat([pd.concat([d for d in d1], axis='index') for d1 in dmat_raw], axis='columns')

#fig, axs = plt.subplots(2, 2, figsize=(10, 10))
cg = sns.clustermap(
    dmat.T, xticklabels=0, yticklabels=0,
    row_colors=pd.DataFrame({'site': library.site.map(color_map), 'trusted': library.trusted.map(color_map)}),
    col_colors=pd.DataFrame({'MAG': feature.genome_id.map(color_map), 'contig': feature.sequence_alternating.map(color_map)}),
    row_cluster=True, col_cluster=False, metric='cosine',
    cmap='copper', norm=mpl.colors.SymLogNorm(1e-1, vmin=0, vmax=10, base=10),
    cbar_kws=dict(orientation='horizontal', label='relative coverage'),
    cbar_pos=(0.27, 0.02, 0.6, 0.03))
ax = cg.ax_heatmap
ax.axvline(xsplit, color='w')
# ax.axhline(ysplit, color='w')
ax.set_xlabel('')
ax.set_ylabel('')
#ax.set_yticklabels([])

cg.ax_col_colors.annotate(
    'B1A Genes',
    xy=(dmat_raw[0][0].shape[0] / 2, 0.5),
    va='center', ha='center'
)

cg.ax_col_colors.annotate(
    'B1B Genes',
    xy=(dmat_raw[0][0].shape[0] + dmat_raw[0][1].shape[0] / 2, 0.5),
    va='center', ha='center'
)

cg.ax_row_colors.annotate(
    'B1A Libs.',
    xy=(1.6, 36),
    va='center', ha='center', rotation=90,
)

cg.ax_row_colors.annotate(
    'B1B Libs.',
    xy=(1.6, 78),
    va='center', ha='center', rotation=90,
)