In [1]:
import os

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from larval_gonad.stats import run_chisq


 # Adult Bulk

In [3]:
adult_ct = (
    pd.read_feather("../output/expression-atlas-wf/w1118_gene_counts.feather")
    .assign(flag_on=lambda x: x.Count >= 5)
    .groupby(["tissue", "chrom"])
    .flag_on.sum()
    .unstack()
    .reindex(index=["testis", "ovary"], columns="X,2L,2R,3L,3R,4,Y".split(","))
)  # type: pd.DataFrame

run_chisq(adult_ct).loc[("testis", ["observed", "adj std residual", "fdr q-value"]),:]


𝛘^2: 62.2273, p-value: 0.0000, df: 6


Unnamed: 0_level_0,chrom,X,2L,2R,3L,3R,4,Y
tissue,type,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
testis,observed,5499.0,6662.0,6994.0,6657.0,8489.0,271.0,32.0
testis,adj std residual,-5.0496,3.577,-0.2172,-0.2919,1.7339,-1.8987,4.9609
testis,fdr q-value,0.0,0.0016,0.3896,0.3896,0.1242,0.1151,0.0


 # Larval Bulk

In [4]:
larval_ct = (
    pd.read_feather("../output/bulk2-rnaseq-wf/testis_ovary_counts.feather")
    .assign(flag_on=lambda x: x.Count >= 5)
    .groupby(["tissue", "chrom"])
    .flag_on.sum()
    .unstack()
    .reindex(index=["testis", "ovary"], columns="X,2L,2R,3L,3R,4,Y".split(","))
)  # type: pd.DataFrame

run_chisq(larval_ct).loc[("testis", ["observed", "adj std residual", "fdr q-value"]),:]


𝛘^2: 145.1302, p-value: 0.0000, df: 6


Unnamed: 0_level_0,chrom,X,2L,2R,3L,3R,4,Y
tissue,type,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
testis,observed,7373.0,9194.0,9291.0,9294.0,11419.0,335.0,80.0
testis,adj std residual,-7.5711,6.5045,-2.1991,1.8733,1.0686,-3.0093,6.8151
testis,fdr q-value,0.0,0.0,0.0498,0.0805,0.2254,0.0075,0.0


 # Larval scRNA-Seq

In [5]:
sc_ct = (
    pd.read_feather("../output/seurat3-cluster-wf/aggegated_gene_counts_by_germ_soma.feather")
    .assign(cell_type=lambda x: x.cell_type.replace({"Germline": "Germline", "Cyst Lineage": "Somatic", "Other Somatic": "Somatic"}))
    .assign(flag_on=lambda x: x.Count >= 5)
    .groupby(["cell_type", "chrom"])
    .flag_on.sum()
    .unstack()
    .reindex(index=["Germline", "Somatic"], columns="X,2L,2R,3L,3R,4,Y".split(","))
) # type: pd.DataFrame

run_chisq(sc_ct).loc[("Germline", ["observed", "adj std residual", "fdr q-value"]),:]


𝛘^2: 31.5933, p-value: 0.0000, df: 6


Unnamed: 0_level_0,chrom,X,2L,2R,3L,3R,4,Y
cell_type,type,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Germline,observed,5123.0,6330.0,6425.0,6409.0,7791.0,228.0,71.0
Germline,adj std residual,-2.8791,1.5344,-0.8913,1.1611,0.5722,-0.72,4.5056
Germline,fdr q-value,0.0221,0.2868,0.3387,0.3387,0.3387,0.3387,0.0001
