In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import matplotlib.font_manager
matplotlib.font_manager.findSystemFonts(fontpaths=None, fontext='ttf')

In [None]:
cell_type_names = {
    "somatosensory_converted_into_tpm": "Somatosensory N",
    "Dopaminergic_TPM_clean": "Dopaminergic N",
    "kakadarov_tpm": "CD8+ T cell",
    "Cheng_ES_TPM": "Isolated ESC",
    "Gaublomme_GSE75109_TPM_clean": "Th17 A",
    "Gaublomme_GSE75110_TPM_clean": "Th17 B",
    "Gaublomme_GSE75111_TPM_clean": "Th17 C",
    "Rbp4_positive_cells": "Corticostriatal N",
    "Alveolar_cells_Type_I_Merged_Batches": "Lung ACI",
    "Alveolar_cells_Type_II_Merged_Batches": "Lung ACII",
#     "Alveolar_cells_both_types",
    "klein": "Cultured ESC",
    "hepat_TPM_yang_clean": "Liver HB/HC",
    "Yu_First_wave_endocrine_cells": "Pancreatic EC",
}

In [None]:
fam_df = pd.read_csv("lower_families_to_plot.csv").replace(cell_type_names)

In [None]:
fam_df

In [None]:
m_fam_df = pd.DataFrame()

In [None]:
for i, row in fam_df.iterrows():
    m_fam_df.loc[row.family_name, row.dataset_name] = row.ic
#     print(row.family_name, row.dataset_name, row.ic)

In [None]:
m_fam_df

In [None]:
annot_fam_df = pd.DataFrame()
for i, row in fam_df.iterrows():
    annot_fam_df.loc[row.family_name, row.dataset_name] = "{}".format("*" if row.ic_upper >= 1 else "")
#     annot_fam_df.loc[row.family_name, row.dataset_name] = "{:.2f}{}".format(row.ic, "*" if row.ic_upper >= 1 else "")
#     print(row.family_name, row.dataset_name, row.ic)
annot_fam_df = annot_fam_df.fillna("")

In [None]:
annot_fam_df.sort_index(axis=1)

In [None]:
sns.set(font_scale=1.4, style="ticks", font="Arial")
plt.figure(figsize=(5,5))
sns.heatmap(m_fam_df.sort_index(axis=1), cmap="YlOrRd_r", center=0.8, square=True, annot=annot_fam_df.sort_index(axis=1), annot_kws={"size":22, "va": "center_baseline"}, fmt="", xticklabels=True, yticklabels=True, linewidth=1, linecolor="grey")
plt.show()

In [None]:
chr_df = pd.read_csv("lower_chromosomes_to_plot.csv").replace(cell_type_names)

In [None]:
chr_df

In [None]:
def get_combined_coordinates(x):
    _chr = str(x.chromosome.iloc[0])
#     _chr = "0"+_chr if len(_chr) == 1 and _chr != "X" else _chr
    if x.name.startswith("#"):
        return pd.Series({"Combined coordinates": "{}: {}-{}".format(_chr,x.start_gene.min(),x.end_gene.max())})
    else:
        return pd.Series({"Combined coordinates": "{}: {}-{} ({})".format(_chr,x.start_gene.min(),x.end_gene.max(),x.name)})
chr_df = chr_df.merge(chr_df.groupby(["Combined name"]).apply(get_combined_coordinates),left_on="Combined name",right_index=True)

In [None]:
chr_df.loc[:,"Combined coordinates"].unique()

In [None]:
m_chr_df = pd.DataFrame(index=chr_df.loc[:,"Combined coordinates"].unique())
for (i, ic) in chr_df.groupby(["Combined coordinates", "dataset"]).min().ic.iteritems():
    m_chr_df.loc[i[0], i[1]] = ic

In [None]:
annot_chr_df = pd.DataFrame(index=chr_df.loc[:,"Combined coordinates"].unique())
for i, row in chr_df.groupby(["Combined coordinates", "dataset"]).min().iterrows():
#     annot_chr_df.loc[i[0], i[1]] = "{:.2f}{}".format(row.ic, ("**" if row.ic >= 1 else "*") if row.ic_upper >= 1 else "")
    annot_chr_df.loc[i[0], i[1]] = "{}".format(("**" if row.ic >= 1 else "*") if row.ic_upper >= 1 else "")
#     print(row.family_name, row.dataset_name, row.ic)
annot_chr_df = annot_chr_df.fillna("")

In [None]:
sns.set(font_scale=1.7, style="ticks", font="Arial")
plt.figure(figsize=(7,10))
sns.heatmap(m_chr_df.sort_index(axis=1), cmap="YlOrRd_r", square=True, center=0.8, annot=annot_chr_df.sort_index(axis=1), annot_kws={"size":22, "va": "center_baseline"}, fmt="", xticklabels=True, yticklabels=True, linewidth=1, linecolor="grey")
plt.show()