# Generating ParTI compatable data 
ParTI requires quite a specific input format of data.
- expression matrix as pure numerical matrix, rows = cells, columns = genes 
- list of gene names (in the order they appear)
- discrete clinical attributes 


In [5]:
import pandas as pd
from scipy.io import mmread, mmwrite
from scipy.sparse import csr_matrix
import os
import scanpy as sc  # For reading AnnData objects
import numpy as np


In [3]:
# loading adata_path
adata_path = r"C:\Users\DG1\Desktop\DALLAB\Experimenting\Data\Antibiotic resistance\BacDrop\further processing\adata_balanced_gentamicin_distribution_min_per_bin.h5ad"

In [6]:
# Define a hard-coded date string to append to all output files
date = "1507"  #DDMM

# Load the AnnData object
adata = sc.read_h5ad(adata_path)

# 1. Expression matrix: rows = cells, columns = genes (pure numerical matrix)
# If sparse, convert to dense
if isinstance(adata.X, np.ndarray):
    expr_matrix = adata.X
else:
    expr_matrix = adata.X.toarray()

# 2. List of gene names (in the order they appear)
gene_names = adata.var_names.tolist()

# 3. Discrete clinical attributes (from adata.obs)
# We'll select all categorical columns (object or category dtype)
discrete_cols = [col for col in adata.obs.columns if adata.obs[col].dtype.name in ['category', 'object']]
discrete_attributes = adata.obs[discrete_cols].copy()

# Save outputs in ParTI-compatible format, appending the date string to filenames
# Expression matrix as .csv (cells x genes)
expr_df = pd.DataFrame(expr_matrix, columns=gene_names)
expr_df.to_csv(f"ParTI_expression_matrix_{date}.csv", index=False)

# Gene names as .txt (one per line)
with open(f"ParTI_gene_names_{date}.txt", "w") as f:
    for g in gene_names:
        f.write(f"{g}\n")

# Discrete attributes as .csv
discrete_attributes.to_csv(f"ParTI_discrete_attributes_{date}.csv", index=False)

print("ParTI-compatible files generated:")
print(" - ParTI_expression_matrix.csv")
print(" - ParTI_gene_names.txt")
print(" - ParTI_discrete_attributes.csv")


ParTI-compatible files generated:
 - ParTI_expression_matrix.csv
 - ParTI_gene_names.txt
 - ParTI_discrete_attributes.csv


Loading cell expression matrix as a csv