# Gene expression matrix for each B cell state

In [None]:
import scanpy as sc
import pandas as pd
import matplotlib.pyplot as plt
import os

### Define output path for saving CSV files

In [None]:
output_path = ".../Data/Raw_Data/Male_Donor"   # multiome: Update this to your desired output directory
#output_path = ".../Data/Raw_Data/scRNA_Male_Donor"  # scRNA-seq: Update this to your desired output directory

os.makedirs(output_path, exist_ok=True)  # Create the directory if it doesn't exist

### Load the data

In [None]:
## Multiome Male Donor

data = sc.read_h5ad(".../adata_aggregated_gene.leiden_updated.h5ad")  #multiome
#data = sc.read_h5ad(".../human_B_cell_scRNA_seq_230327.umap.leiden_clusters.h5ad") #scRNA

# Plot UMAP
sc.pl.umap(data, color=['leiden'])
plt.show()

In [None]:
# Define a function to extract subsets based on 'leiden' values
def get_subset(data, leiden_values):
    subset = data[data.obs['leiden'].isin(leiden_values)]
    print(subset)
    return subset

### Male donor Multiom clusters

In [None]:

# Extract subsets for each category
GC = get_subset(data, ['3'])
PB = get_subset(data, ['7'])
ABC_d4 = get_subset(data, ['2'])
d0 = get_subset(data, ['5'])
ABC_d2 = get_subset(data, ['1'])
MBC = get_subset(data, ['6'])

# Create a list of subsets for easier handling
subsets = [GC, PB, ABC_d4, d0, ABC_d2, MBC]
subset_names = ['GC', 'PB', 'ABC_d4', 'd0', 'ABC_d2', 'MBC']

# Convert data to dense format and save as CSV
for subset, name in zip(subsets, subset_names):
    mtx = subset.X.toarray()
    mtx_df = pd.DataFrame(mtx, index=subset.obs_names, columns=data.var_names)
    mtx_df.to_csv(os.path.join(output_path, f"{name}.csv"), index=True)
    print(f"Saved {name}.csv with shape {mtx.shape}")

### Male donor scRNA-seq Data

In [None]:
# Extract subsets for each category
GC = get_subset(data, ['1'])
PB = get_subset(data, ['0', '8'])


# Create a list of subsets for easier handling
subsets = [GC, PB]
subset_names = ['GC', 'PB']

# Convert data to dense format and save as CSV
for subset, name in zip(subsets, subset_names):
    mtx = subset.X.toarray()
    mtx_df = pd.DataFrame(mtx, index=subset.obs_names, columns=data.var_names)
    mtx_df.to_csv(os.path.join(output_path, f"{name}.csv"), index=True)
    print(f"Saved {name}.csv with shape {mtx.shape}")