In [9]:
import os

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from IPython.display import display

from larval_gonad.stats import run_chisq

pd.options.display.max_rows = 200


 # Adult Bulk

In [11]:
adult_bulk = pd.read_feather("../output/expression-atlas-wf/w1118_gene_counts.feather")
adult_ct = (
    adult_bulk
    .assign(flag_on=lambda x: x.Count >= 5)
    .groupby(["tissue", "chrom"])
    .flag_on.sum()
    .unstack()
    .reindex(index=["testis", "ovary"], columns="X,2L,2R,3L,3R,4,Y".split(","))
)  # type: pd.DataFrame

# run_chisq(adult_ct).loc[("testis", ["observed", "adj std residual", "fdr q-value"]),:]
run_chisq(adult_ct).loc[(slice(None), ["observed", "adj std residual", "fdr q-value"]),:]


𝛘^2: 62.2273, p-value: 0.0000, df: 6


Unnamed: 0_level_0,chrom,X,2L,2R,3L,3R,4,Y
tissue,type,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
ovary,observed,4634.0,4818.0,5395.0,5142.0,6364.0,246.0,0.0
ovary,adj std residual,5.0496,-3.577,0.2172,0.2919,-1.7339,1.8987,-4.9609
ovary,fdr q-value,0.0,0.0016,0.3896,0.3896,0.1242,0.1151,0.0
testis,observed,5499.0,6662.0,6994.0,6657.0,8489.0,271.0,32.0
testis,adj std residual,-5.0496,3.577,-0.2172,-0.2919,1.7339,-1.8987,4.9609
testis,fdr q-value,0.0,0.0016,0.3896,0.3896,0.1242,0.1151,0.0


In [12]:
fbgn2symbol = pd.read_feather("../references/gene_annotation_dmel_r6-26.feather", columns=["FBgn", "gene_symbol"]).set_index("FBgn").squeeze()
ovary_y_fbgns = adult_bulk.query("tissue == 'ovary' & chrom == 'Y' & Count > 0")
display(ovary_y_fbgns.merge(fbgn2symbol, on="FBgn").set_index(["FBgn", "gene_symbol"]).sort_values("Count", ascending=False))
fbgn2symbol.reindex(ovary_y_fbgns.FBgn.unique()).to_frame()


Unnamed: 0_level_0,Unnamed: 1_level_0,sample_ID,Count,stage,tissue,rep,data_source,chrom
FBgn,gene_symbol,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
FBgn0058064,ARY,adult_ovary_r4,1,adult,ovary,4,RNA-Seq,Y
FBgn0267433,kl-5,adult_ovary_r2,1,adult,ovary,2,RNA-Seq,Y
FBgn0267433,kl-5,adult_ovary_r3,1,adult,ovary,3,RNA-Seq,Y
FBgn0267592,CCY,adult_ovary_r1,1,adult,ovary,1,RNA-Seq,Y


Unnamed: 0_level_0,gene_symbol
FBgn,Unnamed: 1_level_1
FBgn0058064,ARY
FBgn0267433,kl-5
FBgn0267592,CCY


 # Larval Bulk

In [13]:
larval_bulk = pd.read_feather("../output/bulk2-rnaseq-wf/testis_ovary_counts.feather")
larval_ct = (
    larval_bulk
    .assign(flag_on=lambda x: x.Count >= 5)
    .groupby(["tissue", "chrom"])
    .flag_on.sum()
    .unstack()
    .reindex(index=["testis", "ovary"], columns="X,2L,2R,3L,3R,4,Y".split(","))
)  # type: pd.DataFrame

run_chisq(larval_ct).loc[("testis", ["observed", "adj std residual", "fdr q-value"]),:]


𝛘^2: 145.1302, p-value: 0.0000, df: 6


Unnamed: 0_level_0,chrom,X,2L,2R,3L,3R,4,Y
tissue,type,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
testis,observed,7373.0,9194.0,9291.0,9294.0,11419.0,335.0,80.0
testis,adj std residual,-7.5711,6.5045,-2.1991,1.8733,1.0686,-3.0093,6.8151
testis,fdr q-value,0.0,0.0,0.0498,0.0805,0.2254,0.0075,0.0


In [14]:
fbgn2symbol = pd.read_feather("../references/gene_annotation_dmel_r6-26.feather", columns=["FBgn", "gene_symbol"]).set_index("FBgn").squeeze()
ovary_y_fbgns = larval_bulk.query("tissue == 'ovary' & chrom == 'Y' & Count > 0")
display(ovary_y_fbgns.merge(fbgn2symbol, on="FBgn").set_index(["FBgn", "gene_symbol"]).sort_values("Count", ascending=False))
print(ovary_y_fbgns.Count.sum())
fbgn2symbol.reindex(ovary_y_fbgns.FBgn.unique()).to_frame()


Unnamed: 0_level_0,Unnamed: 1_level_0,sample_ID,Count,stage,tissue,rep,data_source,chrom
FBgn,gene_symbol,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
FBgn0085789,CR41506,A4_OCP,15,L3,ovary,4,RNA-Seq,Y
FBgn0085789,CR41506,A3_OCP,15,L3,ovary,3,RNA-Seq,Y
FBgn0085789,CR41506,A2_OCP,13,L3,ovary,2,RNA-Seq,Y
FBgn0265047,FDY,A1_OCP,12,L3,ovary,1,RNA-Seq,Y
FBgn0085789,CR41506,A1_OCP,7,L3,ovary,1,RNA-Seq,Y
FBgn0085792,CR41509,A3_OCP,5,L3,ovary,3,RNA-Seq,Y
FBgn0085790,CR41507,A1_OCP,4,L3,ovary,1,RNA-Seq,Y
FBgn0085792,CR41509,A2_OCP,3,L3,ovary,2,RNA-Seq,Y
FBgn0085520,CR40801,A2_OCP,3,L3,ovary,2,RNA-Seq,Y
FBgn0085790,CR41507,A2_OCP,3,L3,ovary,2,RNA-Seq,Y


106


Unnamed: 0_level_0,gene_symbol
FBgn,Unnamed: 1_level_1
FBgn0001313,kl-2
FBgn0046698,Pp1-Y2
FBgn0058064,ARY
FBgn0085520,CR40801
FBgn0085789,CR41506
FBgn0085790,CR41507
FBgn0085792,CR41509
FBgn0261399,Pp1-Y1
FBgn0265047,FDY
FBgn0267489,PRY


 # Larval scRNA-Seq

In [15]:
sc = (
    pd.read_feather("../output/seurat3-cluster-wf/aggegated_gene_counts_by_germ_soma.feather")
    .assign(cell_type=lambda x: x.cell_type.replace({"Germline": "Germline", "Cyst Lineage": "Somatic", "Other Somatic": "Somatic"}))
)

sc_ct = (
    sc
    .assign(flag_on=lambda x: x.Count >= 5)
    .groupby(["cell_type", "chrom"])
    .flag_on.sum()
    .unstack()
    .reindex(index=["Germline", "Somatic"], columns="X,2L,2R,3L,3R,4,Y".split(","))
) # type: pd.DataFrame

run_chisq(sc_ct).loc[("Germline", ["observed", "adj std residual", "fdr q-value"]),:]


𝛘^2: 31.5933, p-value: 0.0000, df: 6


Unnamed: 0_level_0,chrom,X,2L,2R,3L,3R,4,Y
cell_type,type,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Germline,observed,5123.0,6330.0,6425.0,6409.0,7791.0,228.0,71.0
Germline,adj std residual,-2.8791,1.5344,-0.8913,1.1611,0.5722,-0.72,4.5056
Germline,fdr q-value,0.0221,0.2868,0.3387,0.3387,0.3387,0.3387,0.0001


In [16]:
fbgn2symbol = pd.read_feather("../references/gene_annotation_dmel_r6-26.feather", columns=["FBgn", "gene_symbol"]).set_index("FBgn").squeeze()
somatic_y_fbgns = sc.query("cell_type == 'Somatic' & chrom == 'Y' & Count > 0")
display(somatic_y_fbgns.merge(fbgn2symbol, on="FBgn").set_index(["FBgn", "gene_symbol"]).sort_values("Count", ascending=False))
print(somatic_y_fbgns.Count.sum())
fbgn2symbol.reindex(somatic_y_fbgns.FBgn.unique()).to_frame()



Unnamed: 0_level_0,Unnamed: 1_level_0,rep,cell_type,Count,tissue,stage,sample_ID,data_source,chrom
FBgn,gene_symbol,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
FBgn0267422,CR45775,rep3,Somatic,175,testis,L3,L3_scRNAseq_rep3,scRNA-Seq,Y
FBgn0267422,CR45775,rep2,Somatic,113,testis,L3,L3_scRNAseq_rep2,scRNA-Seq,Y
FBgn0267412,CG45765,rep3,Somatic,87,testis,L3,L3_scRNAseq_rep3,scRNA-Seq,Y
FBgn0058064,ARY,rep3,Somatic,86,testis,L3,L3_scRNAseq_rep3,scRNA-Seq,Y
FBgn0267422,CR45775,rep3,Somatic,78,testis,L3,L3_scRNAseq_rep3,scRNA-Seq,Y
FBgn0267412,CG45765,rep1,Somatic,72,testis,L3,L3_scRNAseq_rep1,scRNA-Seq,Y
FBgn0267412,CG45765,rep2,Somatic,68,testis,L3,L3_scRNAseq_rep2,scRNA-Seq,Y
FBgn0058064,ARY,rep3,Somatic,54,testis,L3,L3_scRNAseq_rep3,scRNA-Seq,Y
FBgn0046698,Pp1-Y2,rep3,Somatic,53,testis,L3,L3_scRNAseq_rep3,scRNA-Seq,Y
FBgn0058064,ARY,rep2,Somatic,50,testis,L3,L3_scRNAseq_rep2,scRNA-Seq,Y


1575


Unnamed: 0_level_0,gene_symbol
FBgn,Unnamed: 1_level_1
FBgn0001313,kl-2
FBgn0046323,ORY
FBgn0046697,Ppr-Y
FBgn0046698,Pp1-Y2
FBgn0058064,ARY
FBgn0085644,CR41423
FBgn0085789,CR41506
FBgn0085790,CR41507
FBgn0085831,CR42201
FBgn0259845,Su(Ste):CR42414
