In [None]:
import pandas as pd
import os

from maca import clean_annotation, clean_labels

metadata_folder = os.path.join('..', 'metadata' )

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

sns.set(style='whitegrid', context='paper')

In [None]:
csv = os.path.join(metadata_folder, 'MACA_10x.csv')
tenx_metadata = pd.read_csv(csv, index_col=0)
print(tenx_metadata.shape)
tenx_metadata.head()

In [None]:
tenx_metadata = tenx_metadata.replace("Mammary", 'Mammary_Gland')

In [None]:
tenx_metadata_3month = tenx_metadata.loc[tenx_metadata['mouse.age'] == 3]
print(tenx_metadata_3month.shape)
tenx_metadata_3month.head()

In [None]:
tissue_n_channels = tenx_metadata_3month.groupby('tissue').size()
tissue_n_channels

In [None]:
tissues = tissue_n_channels.index

In [None]:
len(tissue_n_channels)

In [None]:
csv = os.path.join(metadata_folder, 'maca_3month_annotations_10x.csv')
cell_annotations = pd.read_csv(csv, index_col=0)
print(cell_annotations.shape)
cell_annotations.head()

In [None]:
figure_folder = '/Users/olgabot/googledrive/MACA_3mo_manuscript/Main figures'

figure1_folder = f'{figure_folder}/figure1/10x'
figure2_folder = f'{figure_folder}/figure2/10x'

print(f'figure1_folder: "{figure1_folder}"')
print(f'figure2_folder: "{figure2_folder}"')

! mkdir -p "$figure1_folder"
! mkdir -p "$figure2_folder"

In [None]:
folder = '/Users/olgabot/code/maca/metadata/number_of_cells_reads_genes_10x/'

In [None]:
ls $folder/*_nreads_ngenes.csv

In [None]:
import hermione as hm

In [None]:
! head $folder/Bladder*

In [None]:
import glob
import os 
 
globber = f'{folder}/*_nreads_ngenes.csv'

dfs = []

for filename in glob.iglob(globber):
    df = pd.read_csv(filename, index_col=0)
    df['tissue'] = os.path.basename(filename).split('_nreads_ngenes.csv')[0]
    dfs.append(df)
nreads_ngenes = pd.concat(dfs)
print(nreads_ngenes.shape)
print('number of tissues: ', len(nreads_ngenes.groupby('tissue')))
nreads_ngenes.head()

In [None]:
import numpy as np

In [None]:
nreads_ngenes['log10_nUMI'] = np.log10(nreads_ngenes['nUMI'])

In [None]:
colors = pd.read_csv(os.path.join(metadata_folder, 'tissue_colors.csv'), index_col=0, squeeze=True)

# Convert to dictionary to make sure the colors match up
colors = colors.to_dict()
colors

In [None]:
# tissues = sorted(nreads_ngenes['tissue'].unique())
tissues

In [None]:
kwargs = dict(data=nreads_ngenes, row='tissue', 
               row_order=tissues, palette=colors, xlabel_suffix='')

In [None]:
g = hm.horizonplot(x='nGene', **kwargs)
g.set_xlabels('Genes per cell')
for ax in g.axes.flatten():
    if not ax.is_last_row():
        ax.set(xticks=[])
g.savefig(f'{figure1_folder}/horizonplot_genes_per_cell.pdf')

In [None]:
# hm.horizonplot(x='nReads', **kwargs)

In [None]:
g = hm.horizonplot(x='log10_nUMI', **kwargs)
g.set_xlabels('log10(UMI per cell)')
for ax in g.axes.flatten():
    if not ax.is_last_row():
        ax.set(xticks=[])
g.savefig(f'{figure1_folder}/horizonplot_log10_UMI_per_cell.pdf')

In [None]:
n_cells_per_tissue = nreads_ngenes.groupby('tissue').size().reset_index()
n_cells_per_tissue = n_cells_per_tissue.rename(columns={0: 'n_cells'})
n_cells_per_tissue

In [None]:
fig, ax = plt.subplots()
sns.barplot(x='n_cells', y='tissue', data=n_cells_per_tissue, palette=colors, order=tissues)
ax.set(xlabel='Number of cells')
fig.tight_layout()
fig.savefig(f'{figure1_folder}/barplot_n_cells_per_tissue.pdf')

In [None]:
width = 2.40157   # ~61mm
# height = 7.87402  # ~200mm
height = 9.72441  # ~247mm

col = 'annotation'

height_ratios = cell_annotations.groupby('tissue').apply(lambda x: len(x[col].unique()))

fig, axes = plt.subplots(figsize=(width, height), nrows=len(tissues), sharex=True, 
                         gridspec_kw=dict(hspace=.15, height_ratios=height_ratios))

for (tissue, df), ax in zip(cell_annotations.groupby('tissue'), axes):
#     print(f'\n--- {tissue} ---')
    annotation_subannotation = np.log10(df.groupby(col).size()).reset_index()
    annotation_subannotation = annotation_subannotation.rename(columns={0: 'n_cells'})
#     print(annotation_subannotation)
#     print(len(annotation_subannotation))
    
    
#     fig, ax = plt.subplots(figsize=(width, height))
    sns.barplot(x='n_cells', y=col, data=annotation_subannotation, palette='husl', ax=ax)
#     fig.tight_layout()
    ax.set(xlabel='')

    ax.yaxis.set_ticks_position("right")
    ax.set_ylabel(tissue, rotation=0, ha='right')
    
    # only y-axis grid
#     ax.grid(axis='x')
    ax.grid('off', axis='y')
    if ax.is_first_row():
        ax.spines['bottom'].set_visible(False)
    elif ax.is_last_row():
        ax.set(xlabel='log10(# cells)')
        ax.spines['top'].set_visible(False)
    else:
        ax.spines['top'].set_visible(False)
        ax.spines['bottom'].set_visible(False)

ax.invert_xaxis()
fig.tight_layout()
fig.savefig(f'{figure2_folder}/barplot_n_cells_per_annotation_all.pdf')


In [None]:
mm_to_inch = 0.03937007874

width = 76 * mm_to_inch
height = 196 * mm_to_inch

print(f'width: {width}, height: {height}')

col = 'annotation'

height_ratios = cell_annotations.groupby('tissue').apply(lambda x: len(x[col].unique()))

fig, axes = plt.subplots(figsize=(width, height), nrows=len(tissues), sharex=True, 
                         gridspec_kw=dict(height_ratios=height_ratios))

for (tissue, df), ax in zip(cell_annotations.groupby('tissue'), axes):
#     print(f'\n--- {tissue} ---')
    annotation_subannotation = np.log10(df.groupby(col).size()).reset_index()
    annotation_subannotation = annotation_subannotation.rename(columns={0: 'n_cells'})
    annotation_subannotation['annotation'] = annotation_subannotation['annotation'].str.replace('_', ' ')
#     print(annotation_subannotation)
#     print(len(annotation_subannotation))
    
    
#     fig, ax = plt.subplots(figsize=(width, height))
    sns.barplot(x='n_cells', y=col, data=annotation_subannotation, palette='husl', ax=ax, zorder=-1)
#     fig.tight_layout()
    ax.set(xlabel='', ylabel='')

    # Remove "FACS"
    tissue = tissue.replace('_FACS', '').replace('_', ' ')
    ax.set_title(tissue, va='top', fontweight='bold', size=8)
    
    ax.yaxis.set_ticks_position("right")
#     ax.set_ylabel(tissue, rotation=0, ha='right')
    
    # only y-axis grid
#     ax.grid(axis='x')
    ax.grid(axis='x', zorder=100, color='white')
    ax.grid('off', axis='y')
    
    ax.spines['left'].set_visible(False)

    if ax.is_last_row():
        
        xticklabels = [f'$10^{int(i)}$' for i in ax.get_xticks()]
        ax.set_xlabel('Number of cells', va='center')
        ax.set_xticklabels(xticklabels, va='center')

    ax.spines['top'].set_visible(False)
    ax.spines['bottom'].set_visible(False)
        
 
ax.invert_xaxis()
fig.tight_layout(h_pad=0)
fig.savefig(f'{figure2_folder}/barplot_n_cells_per_annotation_all.pdf')


In [None]:
ax.get_xticks()

In [None]:
annotation_subannotation

In [None]:
annotation_subannotation