In [1]:
import os
import sys
import re
from pathlib import Path
from itertools import combinations

from IPython.display import display, HTML, Markdown
import numpy as np
import pandas as pd
from scipy.cluster.hierarchy import linkage, dendrogram

import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns

# Project level imports
from larval_gonad.notebook import Nb

In [2]:
# Setup notebook
nbconfig = Nb.setup_notebook(seurat_dir='../scrnaseq-wf/data/scrnaseq_combine_force')

last updated: 2018-08-31 
Git hash: 51c49c85b2065474dac37643efa2cfec2abb6b4f


## Chromosomal distribution of DEG among germline

In [3]:
def diffs(up='gonia', down='cytes'):
    dat = pd.read_csv(f'../scrnaseq-wf/data/{up}_vs_{down}.tsv', sep='\t', index_col=0).query('p_val_adj <= 0.01')
    dat = dat.join(nbconfig.fbgn2chrom)

    dat[f'{up}'] = dat.avg_logFC > 0
    dat[f'{down}'] = dat.avg_logFC < 0

    chrs = ['chrX', 'chr2L', 'chr2R', 'chr3L', 'chr3R', 'chr4', 'chrY', 'chrM']
    df = dat[[f'{up}', f'{down}', 'chrom']].groupby('chrom').sum().reindex(chrs)
    df.columns = pd.MultiIndex.from_arrays([(f'{up}_vs_{down}', f'{up}_vs_{down}'), (f'{up}', f'{down}')])
    return df.fillna(0)

### Lineage comparisons

In [5]:
lincomp = pd.concat([diffs('gonia', 'early'), diffs('early', 'mid'), diffs('mid', 'late')], axis=1)
lincomp.to_csv('../output/2018-08-31_lineage_comparison.tsv', sep='\t')
lincomp

Unnamed: 0_level_0,gonia_vs_early,gonia_vs_early,early_vs_mid,early_vs_mid,mid_vs_late,mid_vs_late
Unnamed: 0_level_1,gonia,early,early,mid,mid,late
chrom,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
chrX,171.0,266.0,111.0,10.0,0.0,1.0
chr2L,207.0,468.0,158.0,103.0,2.0,6.0
chr2R,265.0,433.0,142.0,106.0,5.0,5.0
chr3L,206.0,416.0,126.0,75.0,6.0,3.0
chr3R,257.0,506.0,174.0,104.0,4.0,7.0
chr4,14.0,5.0,7.0,0.0,0.0,0.0
chrY,0.0,5.0,0.0,0.0,0.0,0.0
chrM,0.0,6.0,0.0,0.0,0.0,7.0


In [29]:
def diffs2(up='gonia', down='cytes'):
    dat = pd.read_csv(f'../scrnaseq-wf/data/{up}_vs_{down}.tsv', sep='\t', index_col=0).query('p_val_adj <= 0.01')
    _df = pd.DataFrame(index=df.index)
    
    if up == 'gonia':
        gonia = dat.avg_logFC > 0
        gonia.name = 'gonia_bias'
        _df = _df.join(gonia)
        
    cyte = dat.avg_logFC < 0
    cyte.name = f'{down}_bias'
    return _df.join(cyte)

In [30]:
dfs = []
for up, down in [('gonia', 'early'), ('early', 'mid'), ('mid', 'late')]:
    dfs.append(diffs2(up, down))

df = pd.concat(dfs, axis=1).fillna(False)

In [33]:
df.astype(int).to_csv('../output/2018-08-31_flag_lineage_comparison.tsv', sep='\t')