In [1]:
import os
import sys
import glob
import time
import yaml
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import sparse
import scanpy as sc
import loompy as lp
sc.settings.verbosity = 3

import PyWGCNA

  def twobit_to_dna(twobit: int, size: int) -> str:
  def dna_to_twobit(dna: str) -> int:
  def twobit_1hamming(twobit: int, size: int) -> List[int]:


In [None]:
def adj_mtx_to_list(adj_mtx):
    """Converts adjacency matrix to list of edges.

    Parameters
    ----------
    adj_mtx : pd.DataFrame
        Adjacency matrix of the graph. Index and columns are the nodes.
    
    Returns
    -------
    pd.DataFrame
        List of edges with columns 'source', 'target', and 'weight'.
    """
    adj_mtx = adj_mtx.copy()
    adj_mtx = adj_mtx.stack().reset_index()
    adj_mtx.columns = ['source', 'target', 'weight']
    adj_mtx = adj_mtx[adj_mtx['weight'] != 0]
    return adj_mtx

In [124]:
# Define run arguments
h5ad_in = "/cellar/users/aklie/data/igvf/topic_grn_links/subsets/for_regulon_inference/Bridge_Satpathy/29Aug23/3744376661_20230829-175127/balanced_genotype_microglia/0.05/raw/balanced_genotype_microglia-B6J.0.05.raw.h5ad"
out_dir = "/cellar/users/aklie/projects/igvf/topic_grn_links/grn_inference/hdwgcna/results/Bridge_Satpathy/scratch"
network_type = "signed"
layer = "normalized_counts"

In [125]:
# Read in the h5ad and make sure the correct counts are used
adata = sc.read_h5ad(h5ad_test)
if layer is not None:
    adata.X = adata.layers[layer]

In [122]:
# Get a dataframe for the expression data
dat = adata.to_df()
dat.head()

gene,Xkr4,Lypla1,Gm37988,Tcea1,Rgs20,Atp6v1h,4732440D04Rik,Rb1cc1,St18,Pcmtd1,...,mt-Rnr1,mt-Rnr2,mt-Nd1,mt-Nd2,mt-Co1,mt-Co2,mt-Atp6,mt-Co3,mt-Nd4,mt-Cytb
AAACATGCAGTTTGGC_igvf_b01_LeftCortex_113,3.0,0.0,0.0,0.0,0.0,2.0,1.0,0.0,0.0,0.0,...,1.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
AAACATGCATTATGAC_igvf_b01_LeftCortex_113,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0
AAACCAACAACACTTG_igvf_b01_LeftCortex_113,0.0,0.0,0.5,0.5,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0
AAACCGAAGTTGGGCC_igvf_b01_LeftCortex_113,1.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0
AAACCGCGTATTTGCC_igvf_b01_LeftCortex_113,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0


In [62]:
# Pick a soft threshold automatically and save the df
power, sft = PyWGCNA.WGCNA.pickSoftThreshold(
    data=dat,
    networkType=network_type,
)

[96mpickSoftThreshold: calculating connectivity for given powers...[0m
will use block size  6277
    Power  SFT.R.sq     slope truncated R.sq      mean(k)    median(k)  \
0       1   0.48925 -9.998215       0.929911  3399.893007  3371.748593   
1       1   0.48925 -9.998215       0.929911  3399.893007  3371.748593   
2       2  0.653225 -6.108008       0.954581  1856.003624  1817.194264   
3       3  0.766853 -4.692969       0.965941  1022.019853   982.489046   
4       3  0.766853 -4.692969       0.965941  1022.019853   982.489046   
5       4  0.844691 -3.951521        0.97661   568.350621   533.026146   
6       5  0.889288 -3.482717       0.980309   319.625411   289.999387   
7       5  0.889288 -3.482717       0.980309   319.625411   289.999387   
8       6  0.925922 -3.154624         0.9862   182.059082   158.329399   
9       7  0.956353 -2.905103       0.995037   105.218685    86.749803   
10      7  0.956353 -2.905103       0.995037   105.218685    86.749803   
11      8  0.

In [113]:
# Calculate an adjacency matrix based on this --> just does 1+cor / 2 for signed
adjacency = PyWGCNA.WGCNA.adjacency(dat, power=power, adjacencyType=network_type)

[96mcalculating adjacency matrix ...[0m
	Done..



In [114]:
# Convert to a pandas dataframe
adjacency_df = pd.DataFrame(adjacency, columns=dat.columns.values, index=dat.columns.values)

In [126]:
# Get an adjacency list for this and save it
corr_adj_list = adj_mtx_to_list(adjacency_df)
corr_adj_list.to_csv(os.path.join(out_dir, "corr_adj_list.tsv"), index=False, sep="\t")

In [66]:
# Get the topological overlap matrix
TOM = PyWGCNA.WGCNA.TOMsimilarity(adjacency, TOMType=network_type)

[96mcalculating TOM similarity matrix ...[0m
	Done..



In [117]:
# Clean up the TOM and save it
TOM.columns = dat.columns.values
TOM.index = dat.columns.values

In [119]:
# Convert to an adjacency list and save it
tom_adj_list = adj_mtx_to_list(TOM)
tom_adj_list.to_csv(os.path.join(out_dir, "tom_adj_list.tsv"), index=False, sep="\t")

# DONE!

---

# Scratch

In [70]:
dissTOM = 1 - TOM
dissTOM = dissTOM.round(decimals=8)

In [71]:
from scipy.spatial.distance import squareform

In [72]:
a = squareform(dissTOM.values, checks=False)

In [74]:
a.shape

(19697226,)