In [1]:
import scanpy as sc
from scipy.sparse import csc_matrix
import pandas as pd

In [2]:
data_dir = "/home/igarzonalva/Proyecto_SC_TNBC/GSE161529/Integration/adata/adata_GenAnno.h5ad"
ADJACENCIES_FNAME = "/home/igarzonalva/Proyecto_SC_TNBC/GSE161529/grn/ER_network.tsv"

In [3]:
adata = sc.read_h5ad(data_dir)

In [4]:
def fetch_exp_matrix(adata, tumor):
    adata_tumor = adata[adata.obs['subtype'] == tumor,:].copy()
    
    return(pd.DataFrame(
        data = adata_tumor.X.toarray(),
        index = adata_tumor.obs_names.values,
        columns = adata_tumor.var_names.values))

In [5]:
exp_matrix = fetch_exp_matrix(adata, 'ER')

In [6]:
exp_matrix.shape # should be cells x genes

(61800, 18088)

In [7]:
exp_matrix.head()

Unnamed: 0,A1BG,A1BG-AS1,A2M,A2M-AS1,A2ML1,A4GALT,AAAS,AACS,AADAC,AADAT,...,ZW10,ZWILCH,ZWINT,ZXDA,ZXDB,ZXDC,ZYG11A,ZYG11B,ZYX,ZZEF1
AAACCCAAGGACGGAG-1-MH0151-,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
AAACCCACAGCGTTTA-1-MH0151-,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
AAACCCAGTATCAAGA-1-MH0151-,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.944043,0.0,0.944043,0.0,0.0,0.0,0.944043,0.0,0.0
AAACGAAAGACCAACG-1-MH0151-,0.658898,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.382763,0.0
AAACGAAGTCGAACGA-1-MH0151-,0.0,0.0,2.73564,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.105329,0.0


In [8]:
adjacencies = pd.read_csv(ADJACENCIES_FNAME, sep='\t', names = ['TF','target','importance'])


In [9]:
adjacencies.head()

Unnamed: 0,TF,target,importance
0,AEBP1,COL6A3,55.260441
1,AEBP1,COL1A2,53.162171
2,AEBP1,COL1A1,52.131083
3,AEBP1,CALD1,50.072608
4,JUN,FOS,46.435242


In [10]:
from pyscenic.utils import modules_from_adjacencies

In [None]:
modules = list(modules_from_adjacencies(adjacencies, exp_matrix))


2025-01-04 17:40:17,935 - pyscenic.utils - INFO - Calculating Pearson correlations.

	Dropout masking is currently set to [False].


In [None]:
import loompy

In [None]:
# Define file path
loom_file = "expression_matrix.loom"

# Convert DataFrame to a NumPy array (if not already)
expression = exp_matrix.T

# Prepare row and column attributes
row_attrs = {
    'Gene': genes
}
col_attrs = {
    'Cell': cells
}

# Create Loom file
loompy.create(loom_file, expression, row_attrs=row_attrs, col_attrs=col_attrs)

print(f"Loom file saved at {loom_file}")


In [None]:
from dask_jobqueue import SLURMCluster
from distributed import Client

In [None]:
portdash = 40748
cluster = SLURMCluster(queue = "short", cores=16, processes=1, 
                   memory="32GB", walltime="05:00:00",
                   scheduler_options={"dashboard_address": f":{portdash}"})
cluster.scale(12)
client = Client(cluster)
print(client.scheduler.address)
scheduler_addr = client.scheduler.address


In [None]:
# Define paths as Python variables for ease of use
adj_path = "/mnt/ER_network.tsv"
f_db_names = "/mnt/Common_files/databases/*.feather"
f_motif_path = "/mnt/Common_files/motifs-v10nr_clust-nr.hgnc-m0.001-o0.0.tbl"
exp_matrix = "/mnt/expression_matrix.loom"
output_path = "/mnt/regulons.csv"

In [None]:

!singularity exec \
  -B /home/igarzonalva/Proyecto_SC_TNBC/GSE161529/grn:/mnt \
  aertslab-pyscenic-scanpy-0.12.1-1.9.1.sif \
  pyscenic ctx \
  /mnt/ER_network.tsv \
  /mnt/Common_files/databases/*.feather \
  --annotations_fname /mnt/Common_files/motifs-v10nr_clust-nr.hgnc-m0.001-o0.0.tbl \
  --expression_mtx_fname /mnt/some_expression.tsv \
  --output /mnt/regulons.csv \
  --num_workers 6


In [None]:
singularity exec -B /path/on/hpc:/mnt \
    aertslab-pyscenic-0.12.1.sif \
    pyscenic grn /mnt/expr_mat.tsv /mnt/allTFs_hg38.txt \
    -o /mnt/expr_mat.adjacencies.tsv --num_workers 6


In [None]:
f_db_names = "/home/igarzonalva/Proyecto_SC_TNBC/GSE161529/grn/Common_files/databases/*.feather"
f_motif_path = "/home/igarzonalva/Proyecto_SC_TNBC/GSE161529/grn/Common_files/motifs-v10nr_clust-nr.hgnc-m0.001-o0.0.tbl"
adj_path = "/home/igarzonalva/Proyecto_SC_TNBC/GSE161529/grn/ER_network.tsv"

In [None]:

!pyscenic ctx {adj_path} \
    {f_db_names} \
    --annotations_fname {f_motif_path} \
    --expression_mtx_fname {loom_file} \
    --output regulons.csv \
    --mask_dropouts \
    --num_workers 20 \
    --mode dask_cluster \
    --client_or_address {scheduler_addr}


In [None]:
client.close()
cluster.close()