In [None]:
import pandas as pd
import anndata as ad
from pathlib import Path
import os
import sys
import numpy as np
import scanpy as sc

from omegaconf import OmegaConf, DictConfig

import matplotlib.pyplot as plt
from matplotlib_venn import venn2

import seaborn as sns

In [None]:
repo_dir = '/home/projects/amit/annaku/repos/Blueprint'
sys.path.append(os.path.join(repo_dir, 'src'))

In [None]:
from data_loading.utils import load_dataframe_from_file, get_updated_disease_col
from clinical_predictions.clinical_data_loading import (
    load_and_process_clinical_data, 
    add_general_response_cens, 
    add_general_response, 
    add_Kydar_response,
    add_CART_response,
    generate_gen_info
)
from pre_processing.utils import count_number_of_annotation_in_neighborhood, add_number_of_patients_in_neighborhood

# vars

In [None]:
config_path = '/home/projects/amit/annaku/repos/Blueprint/configs/config.yaml'
conf = OmegaConf.load(config_path)

In [None]:
version = conf['data_loading']['version']
data_path = conf['outputs']['output_dir']
clin_file_path = conf['annotation']['clinical_data']['clinical_data_file_path']

print(version,'\n', data_path,'\n', clin_file_path)

In [None]:
md_version = '04-05-2025'

In [None]:
additional_filt = True # pc additional filt (leiden, neighbours)

# TME + PC together

In [None]:
date = '2025-08-25'
adata_proc = ad.read_h5ad(os.path.join(data_path,
                                       f'adata_with_scvi_full_annot_pred_data_v_{version}_ts_{date}.h5ad'))

In [None]:
import gc

gc.collect()

In [None]:
# PC filt

super_pop = conf['annotation']['major_cell_type_column']

count_number_of_annotation_in_neighborhood(adata_proc, super_pop, "CD45")
count_number_of_annotation_in_neighborhood(adata_proc, super_pop, "PC")

adata_proc.obs["bad_pcs"] = (adata_proc.obs[super_pop]=="PC") & (adata_proc.obs['count_of_PC_in_neighborhood'] < adata_proc.obs['count_of_CD45_in_neighborhood'])
print('bad_pcs: ', adata_proc.obs["bad_pcs"].sum())

##

cluster_annot = {}
for c, group in adata_proc.obs[["leiden", super_pop]].groupby("leiden"):
    cluster_annot[c] = group.value_counts().index[0][1]

adata_proc.obs["bad_pcs_leiden"] = (adata_proc.obs[super_pop]=="PC") & (adata_proc.obs['leiden'].apply(lambda x: cluster_annot[x]=="CD45"))
print('bad_pcs_leiden: ', adata_proc.obs["bad_pcs_leiden"].sum())

##

adata_proc.obs["bad_pcs_all"] = ((adata_proc.obs["bad_pcs_leiden"]) | (adata_proc.obs["bad_pcs"]))
print(adata_proc.obs["bad_pcs_all"].sum())

pc_close_other_pc_col = "pc_close_other_pc"
adata_proc.obs[pc_close_other_pc_col] = (adata_proc.obs[super_pop] =='PC') & (~ adata_proc.obs["bad_pcs_all"])
print('pc_close_other_pc: ', adata_proc.obs["pc_close_other_pc"].sum())

##

if additional_filt:

    adata_proc = adata_proc[~adata_proc.obs["bad_pcs_all"]]
    adata_proc

    adata_only_pc = adata_proc[adata_proc.obs['Populations'].isin(['Malignant', 'Normal_PC', 'Interm', 'Normal_Pb'])].copy()
    adata_only_pc.obs['log_total_counts'] = np.log(adata_only_pc.obs['total_counts'])
    adata_only_pc.obs['noisy_malignant_by_umi'] = ((adata_only_pc.obs["log_total_counts"] < 6.5) & 
                                            (adata_only_pc.obs['Populations']=="Malignant")).astype("category")
    
    # exclude noisy by umi malignant cells
    noisy_by_umi_malignant_cells = adata_only_pc[adata_only_pc.obs['noisy_malignant_by_umi']==True].obs.index
    print('noisy malignant cells (!): ' ,len(noisy_by_umi_malignant_cells))

    adata_proc = adata_proc[~adata_proc.obs.index.isin(noisy_by_umi_malignant_cells)]
    adata_proc

In [None]:
# same for cd45

adata_proc.obs["bad_cd45s"] = (adata_proc.obs[super_pop]=="CD45") & (adata_proc.obs['count_of_CD45_in_neighborhood'] < adata_proc.obs['count_of_PC_in_neighborhood'])
adata_proc.obs["bad_cd45s_leiden"] = (adata_proc.obs[super_pop]=="CD45") & (adata_proc.obs['leiden'].apply(lambda x: cluster_annot[x]=="PC"))
adata_proc.obs["bad_cd45s_all"] = ((adata_proc.obs["bad_cd45s_leiden"]) | (adata_proc.obs["bad_cd45s"]))
print(adata_proc.obs["bad_cd45s_all"].sum())
adata_proc = adata_proc[~(adata_proc.obs["bad_pcs_all"] | adata_proc.obs["bad_cd45s_all"])]
adata_proc

In [None]:
display(adata_proc.obs['Populations'].value_counts(dropna = False))

mask = ~adata_proc.obs['Populations'].isin(['UN', 'nan', 'Erythrocytes'])
adata_proc = adata_proc[mask].copy()
adata_proc

In [None]:
# long

sc.pp.neighbors(adata_proc,
                use_rep='X_scVI',
                    n_neighbors=30,
                    key_added='tme')
sc.tl.umap(adata_proc,
            neighbors_key='tme', 
            min_dist = 0.3,
           )

sc.pl.umap(adata_proc,
            color = ['Populations', 'Method'],
           )

In [None]:
#merge with clinical data

adata_proc.obs['Hospital.Code'] = adata_proc.obs['Hospital.Code'].str.lower()
adata_proc.obs['Sample.Code'] = adata_proc.obs['Hospital.Code'].astype(str) + '_' +  adata_proc.obs['Biopsy.Sequence'].astype(int).astype(str)
adata_proc.obs['Populations_with_pc_and_pb'] = adata_proc.obs['Populations'].copy()
adata_proc.obs['Populations_with_pc'] = adata_proc.obs['Populations_with_pc_and_pb'].copy().replace({'Interm':'Healthy_Like',
                                                                                           'Normal_Pb':'Healthy_Like',
                                                                                           'Normal_PC':'Healthy'})

new_hospital_dataset = pd.read_excel(clin_file_path, sheet_name='Clinical Data')
new_hospital_dataset['Sample.Code'] = new_hospital_dataset['Code'].str.lower() + '_' + new_hospital_dataset['Biopsy sequence No.'].astype(int).astype(str)

# merging

merged_obs = adata_proc.obs.reset_index().merge(
    new_hospital_dataset,
    how='left',
    left_on = 'Sample.Code',
    right_on = 'Sample.Code',
    validate='m:1'
).set_index('index')

adata_proc.obs = merged_obs
adata_proc.obs.rename(columns = {'Cytogenetics Risk (1=standard risk, 2=single hit, 3=2+ hits)':'Cyto_Risk'}, inplace=True)

# check disease updating

clinical_disease = load_and_process_clinical_data(Path(clin_file_path), code_lower_case=False, get_hospital_stage=True,
                                                 get_post_treatment=False, get_treatment_history=True,
                                                 get_combination_exposure=False, get_pfs_data=False)

clinical_disease['Code'] = clinical_disease['Code'].str.lower()

adata_proc.obs['Biopsy.Sequence'] = adata_proc.obs['Biopsy.Sequence'].astype(int)
merged = adata_proc.obs.merge(clinical_disease, how='left', 
                                        left_on=['Hospital.Code', 'Biopsy.Sequence'],
                                        right_on=['Code', 'Biopsy sequence No.'])
updated_disease_col = get_updated_disease_col(merged, "Disease", "Disease Stage Hospital", 
                                              update_non_naive_NDMM_by_treatment_history=False, remove_PRMM=False)
updated_disease_col.index = adata_proc.obs_names
adata_proc.obs['Disease'] = updated_disease_col
print('updaed diseases: ',adata_proc.obs['Disease'].value_counts(dropna = False))

# plotting

import matplotlib.pyplot as plt
from matplotlib_venn import venn2

# with filtering of number of cells
samples_with_enough_cells = adata_proc.obs['Sample.Code'].value_counts()[adata_proc.obs['Sample.Code'].value_counts() > 50].index
mask = adata_proc.obs['Sample.Code'].isin(samples_with_enough_cells)
set1 = set(adata_proc.obs['Sample.Code'][mask])
set2 = set(new_hospital_dataset['Sample.Code'])
intersection = set1.intersection(set2)
plt.figure(figsize=(10, 6))
venn2([set1, set2], ('adata.obs Sample.Code\n(samples with > 50 cells)', 'hospital_dataset Sample.Code'))
plt.title('venn of sample codes')
plt.show()

# without filtering of number of cells

set1 = set(adata_proc.obs['Sample.Code'])
set2 = set(new_hospital_dataset['Sample.Code'])
intersection = set1.intersection(set2)
plt.figure(figsize=(10, 6))
venn2([set1, set2], ('adata.obs Sample.Code', 'hospital_dataset Sample.Code'))
plt.title('venn of sample codes')
plt.show()

In [None]:
# gen cols

gen_cols = ['1q21+', 'del(1p)', 'del(13q)', 'del(17p)', 't(11:14)', 't(4:14)',
                 't(14:16)', 't(14:20)',
                 #'Cytogenetics Risk (1=standard risk, 2=single hit, 3=2+ hits)',
                 'Cyto_Risk'
                 ]

del_cols = ['1q21+',
 'del(1p)',
 'del(13q)',
 'del(17p)']

adata_proc.obs['Cyto_Risk'] = adata_proc.obs['Cyto_Risk'].replace({1.:'Standard Risk', 2.:'Single Hit', 3.:'2+ Hits'}).fillna('NA')
adata_proc.obs[gen_cols] = adata_proc.obs[gen_cols].fillna('NA').astype(str)
adata_proc.obs[gen_cols[:-1]] = adata_proc.obs[gen_cols[:-1]].replace({'0.0':'No', '1.0':'Yes'})
adata_proc.obs[gen_cols[:-1]] = adata_proc.obs[gen_cols[:-1]].astype(str)
adata_proc.obs['delampl_info'] = adata_proc.obs.apply(lambda row: generate_gen_info(row, del_cols), axis=1).fillna('NA').replace({'':'NA'})

In [None]:
# response

adata_proc.obs['Last FU Date'] = pd.to_datetime(adata_proc.obs['Last FU Date'], format='mixed',                                                dayfirst=True).dt.strftime('%m/%d/%Y')
adata_proc.obs['Biopsy date'] = pd.to_datetime(adata_proc.obs['Biopsy date'], format='mixed',
                                                dayfirst=True).dt.strftime('%m/%d/%Y')
CAR_T_full_clinical_data_path = Path('/home/projects/amit/annaku/data/mm_2023/clinical_prediction/CART MM responder vs non responder 160424.xlsx')

# cart only
for PFS_THR in [3, 6, 9]:

    adata_proc.obs['index'] = adata_proc.obs.index
    CAR_T_full_clinical_data = load_dataframe_from_file(CAR_T_full_clinical_data_path)
    all_metadata_df = add_CART_response(adata_proc.obs, full_clinical_df=CAR_T_full_clinical_data, pfs_policy=f"{str(PFS_THR)}M PFS")
    all_metadata_df.index = all_metadata_df['index']

    all_metadata_df = all_metadata_df.rename(columns = {'CART_response':f'CART_response_{PFS_THR}M',})
    adata_proc.obs = all_metadata_df

# pfs resp, cens and not cens
for PFS_THR in [3, 4, 6, 9]:

    adata_proc.obs['index'] = adata_proc.obs.index
    all_metadata_df = add_general_response(adata_proc.obs, pfs_thresh_months=PFS_THR)
    all_metadata_df = add_Kydar_response(all_metadata_df, number_of_months=PFS_THR)

    all_metadata_df.index = all_metadata_df['index']

    all_metadata_df = all_metadata_df.rename(columns = {'general_pfs_response':f'general_pfs_response_{PFS_THR}M',
                                                        'Kydar_response':f'Kydar_response_{PFS_THR}M',
                                                        }
                                                        )
    # with cens
    all_metadata_df = add_general_response_cens(all_metadata_df, pfs_thresh_months=PFS_THR)
    all_metadata_df[f'Kydar_response_cens_{PFS_THR}M'] = all_metadata_df[f'general_pfs_response_cens_{PFS_THR}M']
    all_metadata_df.loc[all_metadata_df['Project'] != 'Kydar', f'Kydar_response_cens_{PFS_THR}M'] = None

    adata_proc.obs = all_metadata_df

In [None]:
(all_metadata_df.index == adata_proc.obs.index).all()

In [None]:
adata_proc.obs = all_metadata_df

In [None]:
# mark samples doubled between MARS and SPID (to exclude downstream after checking consistance between batches)

df = adata_proc.obs.copy()
sample_method_counts = df.groupby('Sample.Code')['Method'].nunique()
duplicated_samples = sample_method_counts[sample_method_counts > 1].index.tolist()
print(f"{len(duplicated_samples)} samples sequenced with multiple methods:")
print(duplicated_samples)

malignant_df = (df[df['Sample.Code'].isin(duplicated_samples)]
                .groupby(['Sample.Code', 'Method'], observed=True)
                .agg(n_malignant=('Populations', lambda x: (x == 'Malignant').sum()),
                     total_cells=('Populations', 'size'))
                .reset_index())

print("\nMalignant cell counts per sample-method:")
print(malignant_df)

remove_list = []

for sample in duplicated_samples:
    sample_data = malignant_df[malignant_df['Sample.Code'] == sample]
    
    if len(sample_data) != 2:
        continue
    
    min_idx = sample_data['n_malignant'].idxmin()
    method_to_remove = sample_data.loc[min_idx, 'Method']
    
    remove_list.append({
        'Sample.Code': sample,
        'Method': method_to_remove
    })

remove_df = pd.DataFrame(remove_list)
    
print("\nSamples to remove (method with fewer malignant cells):")
print(remove_df)

adata_proc.obs['cells_rem_dupl_between_methods'] = False

for _, row in remove_df.iterrows():
    mask = (adata_proc.obs['Sample.Code'] == row['Sample.Code']) & \
           (adata_proc.obs['Method'] == row['Method'])
    adata_proc.obs.loc[mask, 'cells_rem_dupl_between_methods'] = True

print(f"\nCells marked: {adata_proc.obs['cells_rem_dupl_between_methods'].sum()}")
print(f"Total cells: {adata_proc.n_obs}")

In [None]:
# save adata

object_columns = adata_proc.obs.select_dtypes(include=['object']).columns
for col in object_columns:
    adata_proc.obs[col] = adata_proc.obs[col].astype(str)
datetime_columns = adata_proc.obs.select_dtypes(include=['datetime64']).columns
for col in datetime_columns:
    adata_proc.obs[col] = adata_proc.obs[col].dt.strftime('%Y-%m-%d %H:%M:%S')
name = os.path.join(data_path, f'adata_PC_and_TME_with_ann_merged_v_{version}_UMAP.h5ad')
adata_proc.write_h5ad(name)
print(f'saved to {name}')

In [None]:
# counts h5ad file
filename = f'pp_adata_data_v_{version}.h5ad'
adata_pp = ad.read_h5ad(os.path.join(data_path, filename)) # raw after basic QC

name = os.path.join(data_path, f'adata_PC_and_TME_with_ann_merged_v_{version}_UMAP.h5ad')
adata_proc = ad.read_h5ad(name)

adata = ad.AnnData(X=adata_pp.X.copy(), obs=adata_pp.obs.copy(), var=adata_pp.var.copy(),
                       obsm=adata_pp.obsm, obsp=adata_pp.obsp, uns=adata_pp.uns
                       )
cells_to_left = adata_proc.obs.index
adata = adata[cells_to_left, :]
adata.obs = adata_proc.copy().obs
adata

In [None]:
# add umap data to adata with counts

adata.obsm['X_scVI'] = adata_proc.obsm['X_scVI']
adata.obsm['X_umap'] = adata_proc.obsm['X_umap']
adata.obsp['distances'] = adata_proc.obsp['distances']
adata.obsp['connectivities'] = adata_proc.obsp['connectivities']
adata.obsp['tme_distances'] = adata_proc.obsp['tme_distances']
adata.obsp['tme_connectivities'] = adata_proc.obsp['tme_connectivities']
adata.uns['neighbors'] = adata_proc.uns['neighbors']
adata.var['HVG_scVI'] = adata.var_names.isin(adata_proc.var_names)

In [None]:
# save adata

object_columns = adata.obs.select_dtypes(include=['object']).columns
for col in object_columns:
    adata.obs[col] = adata.obs[col].astype(str)
datetime_columns = adata.obs.select_dtypes(include=['datetime64']).columns
for col in datetime_columns:
    adata.obs[col] = adata.obs[col].dt.strftime('%Y-%m-%d %H:%M:%S')
name = os.path.join(data_path, f'adata_PC_and_TME_with_ann_merged_v_{version}.h5ad')
adata.write_h5ad(name)

print(f'saved to {name}')

# PC only

In [None]:
name = os.path.join(data_path, f'adata_PC_and_TME_with_ann_merged_v_{version}_UMAP.h5ad')
adata_proc = ad.read_h5ad(name)
adata_proc = adata_proc[adata_proc.obs['Populations'].isin(['Malignant', 'Normal_PC', 'Interm', 'Normal_Pb'])].copy()

In [None]:
#slow

sc.pp.neighbors(adata_proc, use_rep='X_scVI', n_neighbors=30,
key_added='pc')
sc.tl.umap(adata_proc, min_dist=0.3,
           neighbors_key='pc')

In [None]:
fig, axs = plt.subplots(1, 2, figsize=(16, 6))
sc.pl.umap(adata_proc, color='Method', 
           #palette=pal_method, 
           size=1.5, 
           frameon=False,
           ax=axs[0],
           show=False
           )
sc.pl.umap(adata_proc, color='Populations', 
           #palette=pal_method, 
           size=1., 
           frameon=False,
           ax=axs[1],
           show=False
           )
plt.tight_layout()
plt.show()

In [None]:
# # save adata

# object_columns = adata_proc.obs.select_dtypes(include=['object']).columns
# for col in object_columns:
#     adata_proc.obs[col] = adata_proc.obs[col].astype(str)
# datetime_columns = adata_proc.obs.select_dtypes(include=['datetime64']).columns
# for col in datetime_columns:
#     adata_proc.obs[col] = adata_proc.obs[col].dt.strftime('%Y-%m-%d %H:%M:%S')
# name = os.path.join(data_path, f'adata_PC_with_ann_merged_v_{version}_UMAP.h5ad')
# adata_proc.write_h5ad(name)

# print(f'saved to {name}')

In [None]:
# # remove samples doubled between MARS and SPID (forgot to do before)

# name = os.path.join(data_path, f'adata_PC_with_ann_merged_v_{version}_UMAP.h5ad')
# adata_proc = ad.read_h5ad(name)

In [None]:
# sc.pl.umap(adata_proc, color =['FCRL5'], size = 5)

# sc.pl.dotplot(adata_proc,
#               groupby = ['Populations'],
#               var_names=['FCRL5', 'FCRL2', 'GPRC5D', 'TNFRSF17'],
#               mean_only_expressed = True,
#               categories_order=['Normal_Pb', 'Interm', 'Normal_PC', 'Malignant'])

In [None]:
df = adata_proc.obs.copy()
sample_method_counts = df.groupby('Sample.Code')['Method'].nunique()
duplicated_samples = sample_method_counts[sample_method_counts > 1].index.tolist()
print(f"{len(duplicated_samples)} samples sequenced with multiple methods:")
print(duplicated_samples)

malignant_df = (df[df['Sample.Code'].isin(duplicated_samples)]
                .groupby(['Sample.Code', 'Method'], observed=True)
                .agg(n_malignant=('Populations', lambda x: (x == 'Malignant').sum()),
                     total_cells=('Populations', 'size'))
                .reset_index())

print("\nMalignant cell counts per sample-method:")
print(malignant_df)

remove_list = []

for sample in duplicated_samples:
    sample_data = malignant_df[malignant_df['Sample.Code'] == sample]
    
    if len(sample_data) != 2:
        continue
    
    min_idx = sample_data['n_malignant'].idxmin()
    method_to_remove = sample_data.loc[min_idx, 'Method']
    
    remove_list.append({
        'Sample.Code': sample,
        'Method': method_to_remove
    })

remove_df = pd.DataFrame(remove_list)
    
print("\nSamples to remove (method with fewer malignant cells):")
print(remove_df)

adata_proc.obs['cells_rem_dupl_between_methods'] = False

for _, row in remove_df.iterrows():
    mask = (adata_proc.obs['Sample.Code'] == row['Sample.Code']) & \
           (adata_proc.obs['Method'] == row['Method'])
    adata_proc.obs.loc[mask, 'cells_rem_dupl_between_methods'] = True

print(f"\nCells marked: {adata_proc.obs['cells_rem_dupl_between_methods'].sum()}")
print(f"Total cells: {adata_proc.n_obs}")

In [None]:
# save adata

object_columns = adata_proc.obs.select_dtypes(include=['object']).columns
for col in object_columns:
    adata_proc.obs[col] = adata_proc.obs[col].astype(str)
datetime_columns = adata_proc.obs.select_dtypes(include=['datetime64']).columns
for col in datetime_columns:
    adata_proc.obs[col] = adata_proc.obs[col].dt.strftime('%Y-%m-%d %H:%M:%S')
name = os.path.join(data_path, f'adata_PC_with_ann_merged_v_{version}_UMAP.h5ad')
adata_proc.write_h5ad(name)
print(f'saved to {name}')

In [None]:
# counts h5ad file
filename = f'pp_adata_data_v_{version}.h5ad'
adata_pp = ad.read_h5ad(os.path.join(data_path, filename)) # raw after basic QC

name = os.path.join(data_path, f'adata_PC_with_ann_merged_v_{version}_UMAP.h5ad')
adata_proc = ad.read_h5ad(name)

adata = ad.AnnData(X=adata_pp.X.copy(), obs=adata_pp.obs.copy(), var=adata_pp.var.copy(),
                       obsm=adata_pp.obsm, obsp=adata_pp.obsp, uns=adata_pp.uns
                       )
cells_to_left = adata_proc.obs.index
adata = adata[cells_to_left, :]
adata.obs = adata_proc.copy().obs
adata

In [None]:
# add umap data to adata with counts

adata.obsm['X_scVI'] = adata_proc.obsm['X_scVI']
adata.obsm['X_umap'] = adata_proc.obsm['X_umap']
adata.obsp['distances'] = adata_proc.obsp['distances']
adata.obsp['connectivities'] = adata_proc.obsp['connectivities']
adata.obsp['tme_distances'] = adata_proc.obsp['tme_distances']
adata.obsp['tme_connectivities'] = adata_proc.obsp['tme_connectivities']
adata.obsp['pc_distances'] = adata_proc.obsp['pc_distances']
adata.obsp['pc_connectivities'] = adata_proc.obsp['pc_connectivities']
adata.uns['neighbors'] = adata_proc.uns['neighbors']
adata.var['HVG_scVI'] = adata.var_names.isin(adata_proc.var_names)

In [None]:
# save adata

object_columns = adata.obs.select_dtypes(include=['object']).columns
for col in object_columns:
    adata.obs[col] = adata.obs[col].astype(str)
datetime_columns = adata.obs.select_dtypes(include=['datetime64']).columns
for col in datetime_columns:
    adata.obs[col] = adata.obs[col].dt.strftime('%Y-%m-%d %H:%M:%S')
name = os.path.join(data_path, f'adata_PC_with_ann_merged_v_{version}.h5ad')
adata.write_h5ad(name)

print(f'saved to {name}')

# save for zscores calculation in R

In [None]:
name = os.path.join(data_path, f'adata_PC_with_ann_merged_v_{version}.h5ad')
adata_counts = ad.read_h5ad(name)
adata_counts = adata_counts[adata_counts.obs['cells_rem_dupl_between_methods'] == False]
adata_counts

In [None]:
adata_counts.obs['Code'] = adata_counts.obs['Hospital.Code'].astype(str) + '_' + adata_counts.obs['Biopsy.Sequence'].astype(str).str.lower()
columns_to_extract = ['Amp.Batch.ID', 'Seq.Batch.ID', 'Method', 'Tissue', 'Disease', 'Hospital.Code', 'Biopsy.Sequence', 'Code', 'Populations'
                      ]
cells_all = adata_counts.obs[columns_to_extract]
cells_all.to_csv(data_path + f'/zscore_outputs/cells_all_v_{version}.csv')

In [None]:
genes = adata_counts.var_names.to_numpy()
np.savetxt(data_path + f'/zscore_outputs/genes_forz_v_{version}.txt', genes, fmt='%s')
cells = adata_counts.obs_names.to_numpy()
np.savetxt(data_path + f'/zscore_outputs/cells_forz_v_{version}.txt', cells, fmt='%s')
mat_ds = adata_counts.X.T
import scipy.io
scipy.io.mmwrite(data_path + f'/zscore_outputs/matrix_forz_v_{version}.mtx', mat_ds)