In [1]:
import pandas as pd
import pyarrow.dataset as ds
import gcsfs
import os
import subprocess

# Initialize GCS file system
fs = gcsfs.GCSFileSystem()

# GCS base path
gcs_base_path = "gs://arc-ctc-scbasecamp/2025-02-25/"

# STARsolo feature type
feature_type = "GeneFull_Ex50pAS"

# Metadata path
metadata_path = os.path.join(gcs_base_path, "metadata", feature_type)

# Get the sample metadata file path for Homo sapiens
sample_metadata_path = os.path.join(metadata_path, "Homo_sapiens", "sample_metadata.parquet")

# Load the metadata
sample_metadata = ds.dataset(sample_metadata_path, filesystem=fs, format="parquet").to_table().to_pandas()

# Filter for A549 cell line
a549_samples = sample_metadata[sample_metadata["cell_line"].str.contains("A549", na=False)]

# Print the results
print(f"Found {len(a549_samples)} samples with A549 cell line:")
# print(a549_samples[["srx_accession", "tissue", "disease", "perturbation"]])

Found 34 samples with A549 cell line:


In [2]:


# Create a directory to save the files
output_dir = "a549_data"
os.makedirs(output_dir, exist_ok=True)

# Download the files using gsutil
for i, row in a549_samples.iterrows():
    file_path = row["file_path"]
    output_file = os.path.join(output_dir, os.path.basename(file_path))
    
    print(f"Downloading {file_path} to {output_file}...")
    
    # Using subprocess to call gsutil
    fs.get(file_path, output_file)

print(f"All A549 h5ad files downloaded to {output_dir}")

Downloading gs://arc-ctc-scbasecamp/2025-02-25/h5ad/GeneFull_Ex50pAS/Homo_sapiens/ERX8792190.h5ad to a549_data/ERX8792190.h5ad...
Downloading gs://arc-ctc-scbasecamp/2025-02-25/h5ad/GeneFull_Ex50pAS/Homo_sapiens/SRX21897869.h5ad to a549_data/SRX21897869.h5ad...
Downloading gs://arc-ctc-scbasecamp/2025-02-25/h5ad/GeneFull_Ex50pAS/Homo_sapiens/SRX25289894.h5ad to a549_data/SRX25289894.h5ad...
Downloading gs://arc-ctc-scbasecamp/2025-02-25/h5ad/GeneFull_Ex50pAS/Homo_sapiens/SRX17488180.h5ad to a549_data/SRX17488180.h5ad...
Downloading gs://arc-ctc-scbasecamp/2025-02-25/h5ad/GeneFull_Ex50pAS/Homo_sapiens/SRX17915869.h5ad to a549_data/SRX17915869.h5ad...
Downloading gs://arc-ctc-scbasecamp/2025-02-25/h5ad/GeneFull_Ex50pAS/Homo_sapiens/SRX19004457.h5ad to a549_data/SRX19004457.h5ad...
Downloading gs://arc-ctc-scbasecamp/2025-02-25/h5ad/GeneFull_Ex50pAS/Homo_sapiens/SRX24227811.h5ad to a549_data/SRX24227811.h5ad...
Downloading gs://arc-ctc-scbasecamp/2025-02-25/h5ad/GeneFull_Ex50pAS/Homo_sapi

In [5]:
import os
import numpy as np
import pandas as pd
import scanpy as sc
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
import anndata

# Directory containing the downloaded A549 files
data_dir = "a549_data"

# List all h5ad files in the directory
a549_files = [file for file in os.listdir(data_dir) if file.endswith('.h5ad')]
print(f"Found {len(a549_files)} A549 h5ad files")

# Create output directories
results_dir = "a549_perturbation_analysis"
os.makedirs(results_dir, exist_ok=True)
fig_dir = os.path.join(results_dir, "figures")
os.makedirs(fig_dir, exist_ok=True)

# Load all datasets
adatas = []
for file in a549_files:
    file_path = os.path.join(data_dir, file)
    print(f"Loading {file}...")
    try:
        adata = sc.read_h5ad(file_path)
        # Extract accession ID from filename
        accession = file.replace('.h5ad', '')
        adata.obs['sample_id'] = accession
        adatas.append(adata)
    except Exception as e:
        print(f"Error loading {file}: {e}")


# Combine the data
print("Combining data...")
combined = adatas[0].concatenate(adatas[1:], join='outer', index_unique='-')
print(f"Combined data shape: {combined.shape}")

# Basic preprocessing
sc.pp.filter_cells(combined, min_genes=200)
sc.pp.filter_genes(combined, min_cells=10)
# sc.pp.normalize_total(combined, target_sum=1e4)
# sc.pp.log1p(combined)

Found 34 A549 h5ad files
Loading SRX25289881.h5ad...
Loading SRX19215443.h5ad...
Loading SRX25289889.h5ad...
Loading SRX21897872.h5ad...
Loading SRX25289894.h5ad...
Loading SRX25289888.h5ad...
Loading SRX17150747.h5ad...
Loading SRX17915869.h5ad...
Loading SRX21897873.h5ad...
Loading SRX17150748.h5ad...
Loading SRX17941757.h5ad...
Loading SRX26771412.h5ad...
Loading SRX17941758.h5ad...
Loading ERX8792190.h5ad...
Loading SRX25289892.h5ad...
Loading SRX24227811.h5ad...
Loading SRX25289893.h5ad...
Loading SRX25289890.h5ad...
Loading SRX25289884.h5ad...
Loading SRX17488180.h5ad...
Loading SRX22159982.h5ad...
Loading SRX21897869.h5ad...
Loading SRX17915870.h5ad...
Loading SRX25289887.h5ad...
Loading SRX17150750.h5ad...
Loading SRX19004457.h5ad...
Loading SRX25289886.h5ad...
Loading SRX25289891.h5ad...
Loading SRX25289880.h5ad...
Loading SRX17150749.h5ad...
Loading SRX25289879.h5ad...
Loading SRX25289885.h5ad...
Loading SRX19215444.h5ad...
Loading SRX25289882.h5ad...
Combining data...


  combined = adatas[0].concatenate(adatas[1:], join='outer', index_unique='-')


Combined data shape: (252338, 36601)


In [6]:
import pyarrow.dataset as ds
import gcsfs

fs = gcsfs.GCSFileSystem()
gcs_base_path = "gs://arc-ctc-scbasecamp/2025-02-25/"
feature_type = "GeneFull_Ex50pAS"
metadata_path = os.path.join(gcs_base_path, "metadata", feature_type)
sample_metadata_path = os.path.join(metadata_path, "Homo_sapiens", "sample_metadata.parquet")
sample_metadata = ds.dataset(sample_metadata_path, filesystem=fs, format="parquet").to_table().to_pandas()

# Filter for A549 cell line and extract perturbation information
a549_metadata = sample_metadata[sample_metadata["cell_line"].str.contains("A549", na=False)]

# Map sample_id to perturbation information
id_to_perturbation = dict(zip(a549_metadata["srx_accession"], a549_metadata["perturbation"]))

# Add perturbation information to the anndata object
combined.obs['perturbation'] = combined.obs['sample_id'].map(id_to_perturbation).fillna('unknown')

In [7]:
unique_drugs = combined.obs['perturbation'].unique()
# unique_drugs = [drug for drug in unique_drugs if drug != 'control']

print(f"Analyzing differential expression for {len(unique_drugs)} drugs. Drug list: {unique_drugs}")

Analyzing differential expression for 28 drugs. Drug list: ['Infected with H1N1 (A/California/07/2009), 8 hours post infection'
 'glyconanomaterials for combating bacterial infections'
 'infected with H3N2 (A/Perth/16/2009)'
 'CAR T cell therapy, SUV39H1 knockout' 'uninfected (mock treatment)'
 'irradiation' 'NS1 4xstop (mutant)'
 'CAR T cell therapy with SUV39H1 knockout'
 'irradiated A549 cells (6 Gy γ-ray treatment)'
 'lentiviral pool for expression of 120 gRNAs, tamoxifen, puromycin'
 'mixed sample treatments include DMSO, ARS-1620, and Vemurafenib'
 'Ritonavir, gemcitabine, cisplatin'
 'Infected with H3N2 (A/Perth/16/2009), 16 hours post infection'
 'ACME HS dissociation'
 'infected with H3N2, Cetuximab (anti-EGFR therapy)'
 'Infected (Cal07, 16 hours, Replicate 2)' 'in vitro culture of iPSC-RPE'
 'antineoplastic agents, mixed micelles for drug delivery'
 'CAR T cell therapy, SUV39H1 knockout, CD19 tumor cell treatment'
 'wild-type (WT) virus'
 'infected with H3N2 (A/Perth/16/2009

In [9]:
# 假设 combined 是你的 AnnData 对象
# 将 'uninfected (mock treatment)' 和 'uninfected (Mock)' 替换为 'control'
combined.obs['perturbation'] = combined.obs['perturbation'].replace(
    ['uninfected (mock treatment)', 'uninfected (Mock)'], 'control'
)

# 验证修改后的唯一值
unique_drugs = combined.obs['perturbation'].unique()
print(f"Analyzing differential expression for {len(unique_drugs)} drugs. Drug list: {unique_drugs}。 length: {len(unique_drugs)}")

Analyzing differential expression for 27 drugs. Drug list: ['Infected with H1N1 (A/California/07/2009), 8 hours post infection'
 'glyconanomaterials for combating bacterial infections'
 'infected with H3N2 (A/Perth/16/2009)'
 'CAR T cell therapy, SUV39H1 knockout' 'control' 'irradiation'
 'NS1 4xstop (mutant)' 'CAR T cell therapy with SUV39H1 knockout'
 'irradiated A549 cells (6 Gy γ-ray treatment)'
 'lentiviral pool for expression of 120 gRNAs, tamoxifen, puromycin'
 'mixed sample treatments include DMSO, ARS-1620, and Vemurafenib'
 'Ritonavir, gemcitabine, cisplatin'
 'Infected with H3N2 (A/Perth/16/2009), 16 hours post infection'
 'ACME HS dissociation'
 'infected with H3N2, Cetuximab (anti-EGFR therapy)'
 'Infected (Cal07, 16 hours, Replicate 2)' 'in vitro culture of iPSC-RPE'
 'antineoplastic agents, mixed micelles for drug delivery'
 'CAR T cell therapy, SUV39H1 knockout, CD19 tumor cell treatment'
 'wild-type (WT) virus'
 'infected with H3N2 (A/Perth/16/2009), 16 hours post infe

In [10]:
print(combined.obs['perturbation'].value_counts())

perturbation
ACME HS dissociation                                                 38909
irradiation                                                          17272
irradiation (IR 6h)                                                  16167
infected with H3N2 (A/Perth/16/2009)                                 15949
6 Gy γ-ray treatment                                                 15126
CAR T cell therapy, SUV39H1 knockout, CD19 tumor cell treatment      13063
irradiated A549 cells (6 Gy γ-ray treatment)                         12216
mixed sample treatments include DMSO, ARS-1620, and Vemurafenib      11968
CAR T cell therapy, SUV39H1 knockout                                 10886
infected with H1N1 (A/California/07/2009)                             9998
control                                                               8649
in vitro culture of iPSC-RPE                                          8117
CAR T cell therapy with SUV39H1 knockout                              7693
Infected wit

In [12]:
combined

AnnData object with n_obs × n_vars = 246262 × 33388
    obs: 'gene_count', 'umi_count', 'SRX_accession', 'sample_id', 'batch', 'n_genes', 'perturbation'
    var: 'gene_symbols', 'feature_types', 'n_cells'

In [13]:
output_file = "a549_perturbation_analysis_loka/a549_combined_data_loka.h5ad"
combined.write_h5ad(output_file)

In [None]:
a = combined.X[0].toarray().flatten().tolist()  # 如果 .X 是 sparse matrix
# a = [i for i in a if i > 0] 
# a, len(a)

# Below is previous code, above all is the modified code.

In [3]:
import os
import numpy as np
import pandas as pd
import scanpy as sc
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
import anndata

# Directory containing the downloaded A549 files
data_dir = "a549_data"

# List all h5ad files in the directory
a549_files = [file for file in os.listdir(data_dir) if file.endswith('.h5ad')]
print(f"Found {len(a549_files)} A549 h5ad files")

# Create output directories
results_dir = "a549_perturbation_analysis"
os.makedirs(results_dir, exist_ok=True)
fig_dir = os.path.join(results_dir, "figures")
os.makedirs(fig_dir, exist_ok=True)

# Load all datasets
adatas = []
for file in a549_files:
    file_path = os.path.join(data_dir, file)
    print(f"Loading {file}...")
    try:
        adata = sc.read_h5ad(file_path)
        # Extract accession ID from filename
        accession = file.replace('.h5ad', '')
        adata.obs['sample_id'] = accession
        adatas.append(adata)
    except Exception as e:
        print(f"Error loading {file}: {e}")


# Combine the data
print("Combining data...")
combined = adatas[0].concatenate(adatas[1:], join='outer', index_unique='-')
print(f"Combined data shape: {combined.shape}")

# Basic preprocessing
sc.pp.filter_cells(combined, min_genes=200)
sc.pp.filter_genes(combined, min_cells=10)
sc.pp.normalize_total(combined, target_sum=1e4)
sc.pp.log1p(combined)

import pyarrow.dataset as ds
import gcsfs

fs = gcsfs.GCSFileSystem()
gcs_base_path = "gs://arc-ctc-scbasecamp/2025-02-25/"
feature_type = "GeneFull_Ex50pAS"
metadata_path = os.path.join(gcs_base_path, "metadata", feature_type)
sample_metadata_path = os.path.join(metadata_path, "Homo_sapiens", "sample_metadata.parquet")
sample_metadata = ds.dataset(sample_metadata_path, filesystem=fs, format="parquet").to_table().to_pandas()

# Filter for A549 cell line and extract perturbation information
a549_metadata = sample_metadata[sample_metadata["cell_line"].str.contains("A549", na=False)]

# Map sample_id to perturbation information
id_to_perturbation = dict(zip(a549_metadata["srx_accession"], a549_metadata["perturbation"]))

# Add perturbation information to the anndata object
combined.obs['perturbation'] = combined.obs['sample_id'].map(id_to_perturbation).fillna('unknown')

# Identify control samples (no perturbation) - look for keywords in perturbation column
control_keywords = ['control', 'untreated', 'dmso', 'vehicle', 'none', 'mock']
combined.obs['is_control'] = combined.obs['perturbation'].str.lower().apply(
    lambda x: any(keyword in str(x).lower() for keyword in control_keywords) if pd.notna(x) else False
)

# If no explicit controls are found, try to identify them from the metadata context
if combined.obs['is_control'].sum() == 0:
    print("No explicit control samples found. Analyzing perturbation patterns...")
    
    # Create a summary of perturbation conditions
    perturbation_summary = combined.obs['perturbation'].value_counts()
    print(perturbation_summary)
    
    # Ask user to specify control condition if automatic detection fails
    print("Please manually review perturbation conditions and define controls.")

# Print summary of control vs treatment samples
print(f"Control samples: {combined.obs['is_control'].sum()}")
print(f"Treatment samples: {(~combined.obs['is_control']).sum()}")

# Group perturbations by drug name where possible
# This requires some text processing as perturbation descriptions vary
def extract_drug_name(perturbation_text):
    if pd.isna(perturbation_text):
        return 'unknown'
    
    perturbation_text = str(perturbation_text).lower()
    
    # Skip control conditions
    if any(keyword in perturbation_text for keyword in control_keywords):
        return 'control'
    
    # Try to extract drug names - this would need refinement based on actual data
    # Example logic - this should be adjusted based on your actual data format
    if 'treated with' in perturbation_text:
        parts = perturbation_text.split('treated with')
        if len(parts) > 1:
            drug_part = parts[1].strip()
            # Take the first word as potential drug name
            drug_name = drug_part.split()[0].strip(',.:;')
            return drug_name
    
    # Add more extraction rules as needed
    
    # Default return the first 30 chars if no pattern matched
    return perturbation_text[:30]

combined.obs['drug'] = combined.obs['perturbation'].apply(extract_drug_name)

# For each drug, perform hypothesis testing to identify differentially expressed genes
control_cells = combined[combined.obs['is_control']]
if len(control_cells) == 0:
    print("Error: No control cells identified. Cannot perform differential expression analysis.")
    exit()

# Create results dataframe to store findings
results = pd.DataFrame()

# Get unique drugs (excluding control)
unique_drugs = combined.obs['drug'].unique()
unique_drugs = [drug for drug in unique_drugs if drug != 'control']

print(f"Analyzing differential expression for {len(unique_drugs)} drugs...")

for drug in unique_drugs:
    print(f"Processing drug: {drug}")
    
    # Get cells treated with this drug
    drug_cells = combined[combined.obs['drug'] == drug]
    
    if len(drug_cells) < 10:
        print(f"  Skipping {drug}: too few cells ({len(drug_cells)})")
        continue
    
    # Perform differential expression analysis
    try:
        sc.tl.rank_genes_groups(combined, 'drug', groups=[drug], reference='control', method='wilcoxon')
        
        # Extract results for this drug
        de_genes = sc.get.rank_genes_groups_df(combined, group=drug)
        de_genes['drug'] = drug
        
        # Filter for significantly differentially expressed genes
        significant_genes = de_genes[de_genes['pvals_adj'] < 0.05]
        
        # Add to results
        results = pd.concat([results, significant_genes])
        
        # Create volcano plot for top genes
        plt.figure(figsize=(10, 8))
        plt.scatter(de_genes['logfoldchanges'], -np.log10(de_genes['pvals']), alpha=0.5)
        
        # Highlight significant genes
        significant = (de_genes['pvals_adj'] < 0.05)
        plt.scatter(
            de_genes.loc[significant, 'logfoldchanges'],
            -np.log10(de_genes.loc[significant, 'pvals']),
            color='red', alpha=0.8
        )
        
        # Label top genes
        top_genes = de_genes.nsmallest(10, 'pvals')
        for _, gene in top_genes.iterrows():
            plt.annotate(gene['names'], 
                        (gene['logfoldchanges'], -np.log10(gene['pvals'])),
                        xytext=(5, 5), textcoords='offset points')
        
        plt.axhline(-np.log10(0.05), linestyle='--', color='gray')
        plt.axvline(-1, linestyle='--', color='gray')
        plt.axvline(1, linestyle='--', color='gray')
        
        plt.xlabel('Log Fold Change')
        plt.ylabel('-log10(p-value)')
        plt.title(f'Differential Expression: {drug} vs Control')
        plt.savefig(os.path.join(fig_dir, f'volcano_plot_{drug}.png'))
        plt.close()
        
        # Save top genes list for this drug
        top_n = min(50, len(significant_genes))
        significant_genes.head(top_n).to_csv(
            os.path.join(results_dir, f'top_genes_{drug}.csv'), index=False)
        
    except Exception as e:
        print(f"  Error analyzing {drug}: {e}")

# Save combined results
if not results.empty:
    # Summary of all drugs and their significant genes
    drug_gene_counts = results.groupby('drug').size().reset_index(name='sig_gene_count')
    drug_gene_counts = drug_gene_counts.sort_values('sig_gene_count', ascending=False)
    
    # Save summary
    drug_gene_counts.to_csv(os.path.join(results_dir, 'drug_affected_gene_counts.csv'), index=False)
    
    # Save full results
    results.to_csv(os.path.join(results_dir, 'all_drug_gene_effects.csv'), index=False)
    
    # Create summary figure of drugs by number of affected genes
    plt.figure(figsize=(12, 8))
    sns.barplot(x='drug', y='sig_gene_count', data=drug_gene_counts)
    plt.xticks(rotation=90)
    plt.title('Number of Significantly Affected Genes by Drug')
    plt.tight_layout()
    plt.savefig(os.path.join(fig_dir, 'drug_gene_count_summary.png'))
    plt.close()
    
    # Identify most frequently affected genes across multiple drugs
    gene_drug_counts = results.groupby('names').size().reset_index(name='drug_count')
    gene_drug_counts = gene_drug_counts.sort_values('drug_count', ascending=False)
    
    # Save genes affected by multiple drugs
    gene_drug_counts.head(100).to_csv(os.path.join(results_dir, 'multi_drug_affected_genes.csv'), index=False)
    
    # Create heatmap of top genes across drugs
    top_genes = gene_drug_counts.head(20)['names'].tolist()
    top_drugs = drug_gene_counts.head(15)['drug'].tolist()
    
    # Filter results for top genes and drugs
    heatmap_data = results[
        (results['names'].isin(top_genes)) & 
        (results['drug'].isin(top_drugs))
    ]
    
    if not heatmap_data.empty:
        # Create pivot table for heatmap
        pivot_data = heatmap_data.pivot_table(
            index='names', 
            columns='drug', 
            values='logfoldchanges',
            fill_value=0
        )
        
        # Create heatmap
        plt.figure(figsize=(15, 10))
        sns.heatmap(pivot_data, cmap='RdBu_r', center=0, annot=False)
        plt.title('Log Fold Changes of Top Genes Across Drugs')
        plt.tight_layout()
        plt.savefig(os.path.join(fig_dir, 'gene_drug_heatmap.png'))
        plt.close()

print(f"Analysis complete. Results saved to {results_dir}")

In [None]:
output_file = "a549_combined_data.h5ad"
combined.write_h5ad(output_file)


In [6]:
metadata = []
output_dir = "a549_combined_data"
os.makedirs(output_dir, exist_ok=True)


# List all h5ad files in the directory
a549_files = [file for file in os.listdir(data_dir) if file.endswith('.h5ad')]
print(f"Found {len(a549_files)} A549 h5ad files")

# Save basic metadata for each file
for file in a549_files:
    file_path = os.path.join(data_dir, file)
    try:
        # Load basic info without reading full data
        adata = sc.read_h5ad(file_path, backed='r')
        
        metadata.append({
            'filename': file,
            'filepath': file_path,
            'n_obs': adata.n_obs,
            'n_vars': adata.n_vars,
            'obs_keys': list(adata.obs.keys()),
            'var_keys': list(adata.var.keys()),
        })
        
        print(f"Processed metadata for {file}")
        
    except Exception as e:
        print(f"Error processing {file}: {e}")

# Save metadata as CSV
metadata_df = pd.DataFrame(metadata)
metadata_df.to_csv(os.path.join(output_dir, "a549_files_metadata.csv"), index=False)
print(f"Saved metadata for {len(metadata_df)} files")

# Create a readme file with loading instructions
with open(os.path.join(output_dir, "README.txt"), 'w') as f:
    f.write("A549 Cell Line Data Collection\n")
    f.write("=============================\n\n")
    f.write(f"This directory contains metadata for {len(metadata_df)} A549 cell line h5ad files.\n\n")
    f.write("To recreate the combined dataset, use the following Python code:\n\n")
    f.write("```python\n")
    f.write("import scanpy as sc\n")
    f.write("import pandas as pd\n")
    f.write("import os\n\n")
    f.write("# Load the metadata\n")
    f.write("metadata = pd.read_csv('a549_files_metadata.csv')\n\n")
    f.write("# Load all datasets\n")
    f.write("adatas = []\n")
    f.write("for file_path in metadata['filepath']:\n")
    f.write("    adata = sc.read_h5ad(file_path)\n")
    f.write("    # Add sample ID from filename\n")
    f.write("    adata.obs['sample_id'] = os.path.basename(file_path).replace('.h5ad', '')\n")
    f.write("    adatas.append(adata)\n\n")
    f.write("# Combine datasets\n")
    f.write("combined = adatas[0].concatenate(adatas[1:], join='outer', index_unique='-')\n")
    f.write("print(f'Combined data shape: {combined.shape}')\n")
    f.write("```\n")

print(f"Created documentation in {output_dir}/README.txt")

Found 34 A549 h5ad files
Processed metadata for SRX17915870.h5ad
Processed metadata for SRX17150748.h5ad
Processed metadata for SRX25289884.h5ad
Processed metadata for SRX21897873.h5ad
Processed metadata for SRX25289889.h5ad
Processed metadata for SRX25289882.h5ad
Processed metadata for SRX17150747.h5ad
Processed metadata for SRX25289890.h5ad
Processed metadata for ERX8792190.h5ad
Processed metadata for SRX19215443.h5ad
Processed metadata for SRX25289894.h5ad
Processed metadata for SRX17915869.h5ad
Processed metadata for SRX25289893.h5ad
Processed metadata for SRX17488180.h5ad
Processed metadata for SRX26771412.h5ad
Processed metadata for SRX17150749.h5ad
Processed metadata for SRX25289891.h5ad
Processed metadata for SRX21897869.h5ad
Processed metadata for SRX19215444.h5ad
Processed metadata for SRX22159982.h5ad
Processed metadata for SRX17150750.h5ad
Processed metadata for SRX25289892.h5ad
Processed metadata for SRX25289886.h5ad
Processed metadata for SRX25289887.h5ad
Processed metada