In [1]:
import os
import sys
import re
from pathlib import Path

from IPython.display import display, HTML, Markdown
import numpy as np
import pandas as pd

import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns

# Project level imports
from larval_gonad.notebook import Nb

In [2]:
# Setup notebook
nbconfig = Nb.setup_notebook(seurat_dir='../output/scrnaseq-wf/scrnaseq_combine_force')

last updated: 2019-03-14 
Git hash: bf23248e80ec1d59e7af8108be5753020c616dbe


In [10]:
# spearman correlation M1 and L1
(
    pd.read_parquet('../output/scrnaseq-wf/tpm.parquet')
    .assign(cluster=lambda df: df.cluster.map(nbconfig.short_cluster_annot))
    .query('cluster == ["M1º", "L1º"]')
    .pivot_table(index='FBgn', columns='cluster', values='TPM')
    .corr(method='spearman')
)

cluster,L1º,M1º
cluster,Unnamed: 1_level_1,Unnamed: 2_level_1
L1º,1.0,0.973628
M1º,0.973628,1.0


In [3]:
# Genes that are biomarkers in M1 and L1
df = (
    pd.read_csv('../output/scrnaseq-wf/scrnaseq_combine_force/biomarkers_res.0.6.tsv', sep='\t', index_col=0)
    .rename_axis('FBgn')
    .assign(cluster=lambda df: df.cluster.map(nbconfig.short_cluster_annot))
    #.query('cluster == ["M1º", "L1º"]')
    .pivot_table(index='FBgn', columns='cluster', values='avg_logFC')
    .pipe(lambda df: ~df.isnull())
)

pd.crosstab(df["M1º"], df["L1º"])

L1º,False,True
M1º,Unnamed: 1_level_1,Unnamed: 2_level_1
False,3269,29
True,778,966


In [55]:
male_sterile = (
    pd.read_csv('/home/fearjm/Downloads/male_sterile_genes.txt', header=None)
    .iloc[:, 0]
    .tolist()
)

In [104]:
# Genes that are expressed
df = (
    pd.read_parquet('../output/scrnaseq-wf/raw_by_cluster.parquet')
    .assign(cluster=lambda df: pd.Categorical(df.cluster.map(nbconfig.short_cluster_annot), ordered=True, categories=nbconfig.short_cluster_order))
    .dropna()
    .pivot_table(index='FBgn', columns='cluster', values='UMI')
    .fillna(0)
    .pipe(lambda df: df > 0)
)

In [109]:
df.reindex(male_sterile).dropna().sum(axis=1).value_counts()

9.0    360
6.0      5
8.0      5
3.0      4
4.0      4
5.0      3
1.0      2
7.0      1
2.0      1
dtype: int64

In [110]:
display(pd.crosstab(df["E1º"], df["M1º"]))
display(pd.crosstab(df["E1º"], df["L1º"]))
display(pd.crosstab(df["M1º"], df["L1º"]))

M1º,False,True
E1º,Unnamed: 1_level_1,Unnamed: 2_level_1
False,713,1247
True,149,12367


L1º,False,True
E1º,Unnamed: 1_level_1,Unnamed: 2_level_1
False,1011,949
True,579,11937


L1º,False,True
M1º,Unnamed: 1_level_1,Unnamed: 2_level_1
False,490,372
True,1100,12514


In [130]:
dcc = [
    'FBgn0002774',
    'FBgn0002775',
    'FBgn0005616',
    'FBgn0005617',
    'FBgn0014340',
    'FBgn0019660',
    'FBgn0019661',
    'FBgn0283442'
]



In [139]:
(
    pd.read_parquet('../output/scrnaseq-wf/tpm_zscore_w_rep.parquet')
    .assign(cluster=lambda df: df.cluster.map(nbconfig.short_cluster_annot))
    .reset_index()
    .query('FBgn == "FBgn0283442"')
)



Unnamed: 0,FBgn,cluster,rep,tpm_zscore
14372,FBgn0283442,L1º,rep1,-0.211667
28785,FBgn0283442,L1º,rep2,-0.30001
43198,FBgn0283442,L1º,rep3,-0.165666
57611,FBgn0283442,MC,rep1,-0.328423
72024,FBgn0283442,MC,rep2,-0.270219
86437,FBgn0283442,MC,rep3,-0.150829
100850,FBgn0283442,M1º,rep1,-0.291477
115263,FBgn0283442,M1º,rep2,-0.263817
129676,FBgn0283442,M1º,rep3,-0.230109
144089,FBgn0283442,E1º,rep1,0.272393


In [152]:
(
    pd.read_parquet('../output/scrnaseq-wf/raw_by_cluster.parquet')
    .assign(cluster=lambda df: df.cluster.map(nbconfig.short_cluster_annot))
    .query('cluster != "UNK"')
    .pivot_table(index='FBgn', columns='cluster', values='UMI')
    .reindex(columns=nbconfig.short_cluster_order)
    .reindex(dcc)
    .rename(nbconfig.fbgn2symbol)
)




cluster,SP,E1º,M1º,L1º,EC,MC,LC,TE,PC
FBgn,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
mle,594,1060,132,65,147,58,94,124,99
msl-3,310,946,145,53,26,22,10,17,24
msl-2,93,360,49,25,46,27,27,43,34
msl-1,500,199,22,13,81,23,44,85,52
mof,170,200,61,16,15,7,6,30,17
roX2,215,129,85,38,2334,171,1065,2627,5258
roX1,1147,285,166,38,7167,389,4983,6925,3772
vas,2164,213,71,24,58,29,18,7,7


Unnamed: 0_level_0,cluster,rep,tpm_zscore
FBgn,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
FBgn0000008,0,rep1,2.152876
FBgn0000014,0,rep1,-0.415816
FBgn0000015,0,rep1,-0.249634
FBgn0000017,0,rep1,-0.657166
FBgn0000018,0,rep1,-0.826455
FBgn0000024,0,rep1,0.834762
FBgn0000028,0,rep1,0.355698
FBgn0000032,0,rep1,-0.854282
FBgn0000036,0,rep1,-0.616638
FBgn0000037,0,rep1,-0.601090


In [5]:
df.sum()[nbconfig.short_cluster_order]

cluster
SP     12886
E1º    14136
M1º    13614
L1º    12516
EC     12290
MC     12255
LC     12263
TE     11465
PC     11667
dtype: int64

In [10]:
(
    pd.read_csv('../output/scrnaseq-wf/germcell_deg/mid_vs_late.tsv', sep='\t', index_col=0)
    .rename_axis('FBgn')
    .query('p_val_adj <= 0.05')
    .assign(mid_bias=lambda df: df.avg_logFC > 0)
    .mid_bias.value_counts()
)

False    36
True     17
Name: mid_bias, dtype: int64