In [1]:
import pandas as pd
import scanpy as sc
import anndata as ad
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import scipy.sparse as sps
from scipy.stats import ranksums, spearmanr, kendalltau
import pickle as pkl

import os
import sys

module_path = os.path.abspath(os.path.join('../..'))
if module_path not in sys.path:
    sys.path.append(module_path)

import tools.util_probe as up
import tools.util as ut
import tools.NB_est as nb
import tools.countsplit as cs
import tools.ClusterDE as cd

import importlib

import warnings

warnings.filterwarnings("ignore")

figure_path = "data_summary_figures"
if not os.path.exists(figure_path):
    os.makedirs(figure_path)

dataset_paths = [
    "../../data/MOBA_scRNAseq/outs_S3/filtered_data_maxpool_processed.h5ad",
    "../../data/MOBA_scRNAseq/outs_S2/filtered_data_maxpool_processed.h5ad",
    "../../data/MOBA_scRNAseq/outs_S1/filtered_data_maxpool_processed.h5ad",
    "../../data/probe_Bac_scRNAseq_Rosenthal/B subtilis minimal media/filtered_data_maxpool_processed.h5ad",
    "../../data/probe_Bac_scRNAseq_Rosenthal/B_subilis_DNA-damage-30min/filtered_data_maxpool_processed.h5ad",
    # "../../data/probe_Bac_scRNAseq_Rosenthal/Sporulation/filtered_data_maxpool_processed.h5ad",
    "../../data/probe_Bac_scRNAseq_Rosenthal/MPA_energy_stress/filtered_data_maxpool_processed.h5ad",
    "../../data/BacDrop/antibiotics_data_processed_3.h5ad",
    "../../data/BacDrop/untreated_data_processed.h5ad",
    "../../data/BacDrop/klebsiella_BIDMC35_data_processed_2.h5ad",
    "../../data/BacDrop/klebsiella_4species_data_processed.h5ad",
    "../../data/BacDrop/ecoli_data_processed.h5ad",
    "../../data/BacDrop/pseudomonas_data_processed.h5ad",
    "../../data/BacDrop/enterococcus_data_processed.h5ad",
]

dataset_names = [
    "Pseudomonas_balanced_PB",
    "Pseudomonas_li_PB",
    "Ecoli_balanced_PB",
    "Bsub_minmed_PB",
    "Bsub_damage_PB",
    # "Bsub_sporulation_PB",
    "Bsub_MPA_PB",
    "Klebs_antibiotics_BD",
    "Klebs_untreated_BD",
    "Klebs_BIDMC35_BD",
    "Klebs_4species_BD",
    "Ecoli_4species_BD",
    "Pseudomonas_4species_BD",
    "Efaecium_4species_BD"
]

datasets = [sc.read_h5ad(p) for p in dataset_paths]


In [11]:
h5 = sc.read_10x_h5("../../data/probe_Bac_scRNAseq_Rosenthal/B subtilis minimal media/filtered_feature_bc_matrix.h5")

In [19]:
df = h5.to_df()

In [21]:
df

Unnamed: 0,aadK_1,aadK_2,aadK_3,aadK_4,aadK_5,aadK_6,aadK_7,aadK_8,aadK_9,aadK_10,...,zwf_6,zwf_7,zwf_8,zwf_9,zwf_10,zwf_11,zwf_12,zwf_13,zwf_14,zwf_15
AAACCTGAGCTTATCG-1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0
AAACCTGAGGATGCGT-1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
AAACCTGAGGATGGAA-1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
AAACCTGCAAATTGCC-1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
AAACCTGCAATGAAAC-1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
TTTGGTTCACAACGCC-1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
TTTGGTTTCTACTCAT-1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
TTTGTCAAGACAAAGG-1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
TTTGTCACAAGCCATT-1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
