### TF - motif bias matrix

1. downloading gene - motif name mapping in here:
   https://resources.aertslab.org/cistarget/motif_collections/v10nr_clust_public/snapshots/motifs-v10-nr.hgnc-m0.00001-o0.0.tbl
2. then load the motif -> gene enrichment mapping in here:
   https://resources.aertslab.org/cistarget/databases/homo_sapiens/hg38/refseq_r80/mc_v10_clust/gene_based/the
   hg38_10kbp_up_10kbp_down_full_tx_v10_clust.genes_vs_motifs.scores.feather
3. then we replace the motif name with gene names
4. then we take the max enrichment if same gene-motif
5. then renormalize this per gene and save it as our new attention bias

---

### PPI bias matrix

1. we get the human PPI matrix from stringdb here:
   https://string-db.org/cgi/download?sessionId=bwVBywlOX6i8&species_text=Homo+sapiens
2. we get the same one for other species (e.g. mus musculus):
   https://stringdb-downloads.org/download/protein.links.v12.0/10090.protein.links.v12.0.txt.gz
3. we open them and use ensembl's biomart to map ENSP to ENSG. we merge all
   similar pairs
4. we map it to a grn with associated scores.
5. we use them as our new PPI bias matrix


In [1]:
import pandas as pd
from scdataloader.utils import load_genes
from scdataloader.utils import getBiomartTable
import numpy as np
from tqdm import tqdm
import torch
from scipy import sparse as sp

%load_ext autoreload
%autoreload 2


[92m→[0m connected lamindb: jkobject/scprint2


In [2]:
genedf = load_genes()

In [3]:
LOC = "../../data/main/"  # "./data/main/"

In [4]:
da = pd.read_feather(LOC + "main_scenic+_database.feather")
da = da.set_index("motifs")


tbl = pd.read_csv(
    LOC + "motifs-v10-nr.hgnc-m0.00001-o0.0.tbl",
    sep="\t",
    header=None,
    names=[
        "motif_id",
        "motif_name",
        "motif_description",
        "source_name",
        "source_version",
        "gene_name",
        "motif_similarity_qvalue",
        "similar_motif_id",
        "similar_motif_description",
        "orthologous_identity",
        "orthologous_gene_name",
        "orthologous_species",
        "description",
    ],
)
tbl = tbl.set_index("motif_id")
tbl = tbl[tbl.gene_name.isin(genedf.symbol)]

  tbl = pd.read_csv(


In [5]:
rn = {}
toadd = {}
for i in tqdm(da.index):
    try:
        res = tbl.loc[i, "gene_name"]
    except KeyError:
        da = da.drop(i)
    if type(res) != str:
        for v in res.values[1:]:
            toadd.update({v: da.loc[i].values})
        res = res.values[0]
    rn.update({i: res})

100%|██████████| 5876/5876 [00:46<00:00, 125.53it/s]


In [6]:
da = da.rename(index=rn)
da = pd.concat([da, pd.DataFrame(toadd, index=da.columns).T])
da = da.loc[~da.index.duplicated(keep="first")]

In [38]:
da.to_parquet(LOC + "main_scenic+_2.parquet")

In [39]:
da = pd.read_parquet(LOC + "main_scenic+_2.parquet")

In [7]:
biomart = getBiomartTable(
    attributes=["ensembl_gene_id", "hgnc_symbol"], bypass_attributes=True
)

downloading gene names from biomart
['ensembl_gene_id', 'hgnc_symbol']


In [8]:
mp = {}
for v, k in biomart.iloc[:, :2].values:
    if v is np.nan or k is np.nan:
        continue
    if k in mp:
        if v not in mp[k]:
            mp[k] += [v]
    else:
        mp[k] = [v]

In [9]:
MAX = 5
MIN = 0

da = (
    MIN
    + (da - da.min(1).values[:, None])
    * (MAX - MIN)
    / (da.max(1) - da.min(1)).values[:, None]
)
da.shape

(1564, 27090)

In [16]:
new_index = []
new_data = []

# Remap indices
for idx in da.index:
    if idx in mp:
        for mapped_idx in mp[idx]:
            new_index.append(mapped_idx)
            new_data.append(da.loc[idx].values)
new_data = np.array(new_data)
new_data.shape

(1692, 27090)

In [18]:
da = pd.DataFrame(new_data, index=new_index, columns=da.columns)

In [23]:
new_columns = []
new_data = []
da = da.T

new_index = []
new_data = []

# Remap indices
for idx in da.index:
    if idx in mp:
        for mapped_idx in mp[idx]:
            new_index.append(mapped_idx)
            new_data.append(da.loc[idx].values)
new_data = np.array(new_data)
new_data.shape

(26639, 1692)

In [25]:
da = pd.DataFrame(new_data.T, index=da.columns, columns=new_index)

In [None]:
da = da[~da.index.duplicated(keep="first")]
da = da.T
da = da[~da.index.duplicated(keep="first")]
da = da.T

## protein


In [None]:
string = pd.read_csv(LOC + "9606.protein.links.v12.0.txt.gz", sep=" ")

In [None]:
string.protein1 = string.protein1.str.split(".").str[1]
string.protein2 = string.protein2.str.split(".").str[1]

In [None]:
rel = {}
for a, b in string.iloc[:, :2].values:
    a, b = rn.get(a, ""), rn.get(b, "")
    if a and b:
        if a in rel:
            rel[a].add(b)
        else:
            rel[a] = set([b])
        if b in rel:
            rel[b].add(a)
        else:
            rel[b] = set([a])
len(rel)

19193

In [None]:
res = pd.DataFrame(
    data=np.zeros((len(rel), len(rel))),
    columns=list(rel.keys()),
    index=list(rel.keys()),
)

In [None]:
res.drop(columns=[""], inplace=True)
res.drop(index=[""], inplace=True)

In [None]:
for i, j in tqdm.tqdm(rel.items()):
    res.loc[i, list(j)] = 1

19193it [00:20, 917.76it/s] 


In [None]:
res.mean().mean()

0.03608647264434039

In [None]:
res.to_parquet(LOC + "stringdb_bias.parquet")

In [None]:
res = pd.read_parquet(LOC + "stringdb_bias.parquet")

In [36]:
res = res + res.T

In [35]:
res.sum(1)

ENSG00000075292    1221.0
ENSG00000172531    3264.0
ENSG00000180745     466.0
ENSG00000156886     287.0
ENSG00000185069     671.0
                    ...  
ENSG00000178093    1031.0
ENSG00000203950       0.0
ENSG00000126953     852.0
ENSG00000185985     719.0
ENSG00000180210    1276.0
Length: 19193, dtype: float64

In [41]:
len(set(da.columns))

23165

In [None]:
set(da.columns)

Unnamed: 0,ENSG00000121410,ENSG00000268895,ENSG00000148584,ENSG00000175899,ENSG00000245105,ENSG00000166535,ENSG00000256069,ENSG00000184389,ENSG00000128274,ENSG00000118017,ENSG00000291836,ENSG00000081760,ENSG00000250420,ENSG00000114771,ENSG00000197953,ENSG00000242908,ENSG00000188984,ENSG00000204518,ENSG00000240602,ENSG00000109576,ENSG00000103591,ENSG00000115977,ENSG00000087884,ENSG00000127837,ENSG00000129673,ENSG00000131043,ENSG00000205002,ENSG00000124608,ENSG00000266967,ENSG00000157426,ENSG00000149313,ENSG00000008311,ENSG00000215458,ENSG00000276072,ENSG00000181409,ENSG00000281376,ENSG00000183044,ENSG00000165029,ENSG00000154263,ENSG00000251595,ENSG00000144452,ENSG00000179869,ENSG00000238098,ENSG00000107331,ENSG00000167972,ENSG00000198691,ENSG00000154265,ENSG00000154262,ENSG00000064687,ENSG00000141338,ENSG00000154258,ENSG00000231749,ENSG00000085563,ENSG00000135776,ENSG00000276582,ENSG00000005471,ENSG00000004846,ENSG00000115657,ENSG00000131269,ENSG00000197150,ENSG00000150967,ENSG00000278183,ENSG00000124574,ENSG00000121270,ENSG00000140798,ENSG00000243064,ENSG00000023839,ENSG00000108846,ENSG00000125257,ENSG00000114770,ENSG00000223882,ENSG00000275331,ENSG00000291057,ENSG00000273808,ENSG00000006071,ENSG00000069431,ENSG00000101986,ENSG00000173208,ENSG00000117528,ENSG00000119688,ENSG00000164163,ENSG00000204574,ENSG00000033050,ENSG00000161204,ENSG00000160179,ENSG00000118777,ENSG00000172350,ENSG00000138075,ENSG00000143921,ENSG00000143994,ENSG00000144827,ENSG00000106077,ENSG00000100997,ENSG00000131969,ENSG00000139826,ENSG00000248487,ENSG00000114786,ENSG00000114779,ENSG00000168792,ENSG00000204427,...,ENSG00000124203,ENSG00000267343,ENSG00000127903,ENSG00000196267,ENSG00000152475,ENSG00000022976,ENSG00000198040,ENSG00000197608,ENSG00000176723,ENSG00000223547,ENSG00000213799,ENSG00000196605,ENSG00000105750,ENSG00000267041,ENSG00000281626,ENSG00000236609,ENSG00000197385,ENSG00000106479,ENSG00000261221,ENSG00000290382,ENSG00000257446,ENSG00000234284,ENSG00000221923,ENSG00000228623,ENSG00000213793,ENSG00000159904,ENSG00000214029,ENSG00000213988,ENSG00000167232,ENSG00000146757,ENSG00000184635,ENSG00000197360,ENSG00000213973,ENSG00000124201,ENSG00000106400,ENSG00000174276,ENSG00000278574,ENSG00000117174,ENSG00000186187,ENSG00000180233,ENSG00000237004,ENSG00000225264,ENSG00000183579,ENSG00000177993,ENSG00000105428,ENSG00000149506,ENSG00000284588,ENSG00000188372,ENSG00000116996,ENSG00000042813,ENSG00000186075,ENSG00000170044,ENSG00000109917,ENSG00000019995,ENSG00000132485,ENSG00000235079,ENSG00000121988,ENSG00000169249,ENSG00000152467,ENSG00000130182,ENSG00000158691,ENSG00000219891,ENSG00000196812,ENSG00000269293,ENSG00000121413,ENSG00000291625,ENSG00000121903,ENSG00000166529,ENSG00000182318,ENSG00000187987,ENSG00000197037,ENSG00000197062,ENSG00000140265,ENSG00000186814,ENSG00000235109,ENSG00000140987,ENSG00000180532,ENSG00000131848,ENSG00000197213,ENSG00000137185,ENSG00000168612,ENSG00000163012,ENSG00000132801,ENSG00000288360,ENSG00000162415,ENSG00000130449,ENSG00000214941,ENSG00000214655,ENSG00000272589,ENSG00000086827,ENSG00000174442,ENSG00000122952,ENSG00000198205,ENSG00000198455,ENSG00000070476,ENSG00000203995,ENSG00000162378,ENSG00000285443,ENSG00000074755,ENSG00000036549
ENSG00000168214,0.060400,0.060800,0.197400,0.128600,0.082600,0.149600,0.095200,0.150200,0.125000,0.119600,0.147600,0.181400,0.126000,0.090200,0.083600,0.177000,0.156400,0.086200,0.097400,0.100200,0.093000,0.087600,0.130600,0.135800,0.076400,0.150000,0.230000,0.125400,0.141200,0.079000,0.069000,0.154000,0.117000,0.121800,0.162000,0.143400,0.109000,0.164800,0.094600,0.081600,0.146800,0.116800,0.063400,0.102400,0.131800,0.145000,0.202000,0.133000,0.153200,0.089200,0.116800,0.094800,0.191400,0.140200,0.082000,0.076000,0.175000,0.172800,0.128400,0.089000,0.136200,0.130400,0.110600,0.151200,0.112400,0.146600,0.079400,0.138400,0.097200,0.086600,0.111200,0.081800,0.074200,0.067000,0.070000,0.099800,0.187200,0.142800,0.078800,0.116400,0.139400,0.086400,0.136400,0.067200,0.178400,0.187400,0.254000,0.126400,0.122400,0.107600,0.115000,0.155000,0.122400,0.108400,0.143600,0.072600,0.104400,0.084200,0.135400,0.127000,...,0.136800,0.115000,0.145200,0.138200,0.172400,0.128600,0.124000,0.091200,0.065200,0.116600,0.187400,0.073800,3.040000,0.074000,0.166400,0.071200,0.069800,0.124800,0.148600,0.124400,0.119000,0.108400,0.100600,0.085000,0.137200,0.131400,0.100800,0.131000,0.068800,0.152600,0.236000,0.222000,0.117000,0.070400,0.068200,0.058600,0.122800,0.093800,0.136000,0.162400,0.188000,0.222000,0.129800,0.126200,0.070400,0.110600,0.129000,0.143000,0.076000,0.091600,0.111400,0.262000,0.115000,0.085200,0.121200,0.131200,0.076600,0.078200,0.143400,0.176200,0.198200,0.078400,0.111400,0.101800,0.131000,0.166400,0.154200,0.083600,0.194000,0.145200,0.154800,0.127200,0.116000,0.125400,0.103200,0.114400,0.190800,0.978000,0.146800,0.128400,0.240000,0.142200,0.092000,0.090000,0.111400,0.166000,0.061800,0.256000,0.122000,0.119800,0.086400,0.141000,0.226000,0.177200,0.135400,0.067000,0.081400,0.151200,0.131000,0.131400
ENSG00000176692,0.387407,0.362222,0.851852,0.650370,0.445185,0.371852,0.304444,0.429630,0.537778,0.396296,0.371111,0.785185,0.400741,0.478519,0.400000,0.403704,0.619259,0.242222,0.325185,0.417778,0.814815,0.560741,0.339259,0.628889,0.419259,0.434815,0.390370,0.358519,0.268889,0.540000,0.962963,0.529630,0.837037,0.408889,0.417037,0.340741,0.495556,0.443704,0.532593,0.599259,0.512593,0.455556,1.429630,0.038074,1.718518,0.455556,0.384444,0.442963,0.388889,0.544444,0.393333,0.242222,0.662222,0.440741,0.911111,0.343704,0.718518,0.282222,0.520741,0.356296,0.382222,0.393333,0.601481,0.301481,0.405926,0.377778,0.445185,0.711852,0.474074,0.658519,0.630370,0.508148,0.488148,0.524444,0.320000,0.557778,0.197778,0.453333,0.556296,0.275556,0.537037,0.197037,0.504444,0.300000,0.522222,1.281482,0.339259,0.434815,0.465185,0.354815,0.401481,0.345926,0.426667,0.351111,0.471111,0.442963,0.510370,0.289630,0.291852,0.333333,...,0.457037,0.483704,1.029630,0.477037,0.593333,0.422222,0.549630,0.499259,0.286667,0.302963,0.365926,0.245185,0.640000,0.263704,0.408148,0.731852,0.685926,0.762963,0.312593,0.859259,0.209630,0.514815,0.279259,0.320000,0.417778,0.933333,0.503704,0.308889,0.387407,0.518519,0.428148,0.562963,0.368148,0.396296,0.341482,0.405926,0.289630,0.431852,0.320741,0.337037,0.582222,0.447407,0.582963,0.417778,0.181481,0.464444,0.532593,0.353333,0.369630,0.307407,0.367407,0.319259,0.357037,0.291852,0.468148,0.572593,0.308148,0.297778,0.554074,0.721481,0.600741,0.337037,0.499259,0.436296,0.690370,0.374074,0.402222,0.288889,0.437778,0.551111,0.705926,0.438519,0.380000,0.397037,0.424444,0.251111,1.340741,0.539259,0.386667,0.458519,0.243704,0.725185,0.199259,0.080741,0.374815,0.335556,0.372593,0.322963,0.254074,0.437037,0.325185,0.276296,0.279259,0.297037,0.325926,0.367407,0.807407,0.428889,0.344444,0.412593
ENSG00000100811,0.039437,0.016690,0.402817,0.323944,0.024859,0.299296,0.294366,0.282394,0.286620,0.290141,0.299296,0.283803,0.287324,0.316197,0.309155,0.302113,0.282394,0.290845,0.309155,0.318310,0.303521,0.280986,0.277465,0.025352,0.283099,0.283099,0.301408,0.510563,0.296479,0.286620,0.310563,0.287324,0.307746,0.356338,0.310563,0.292958,0.287324,0.295070,0.572535,0.309859,0.596479,0.338732,0.280986,0.351408,0.283803,0.438028,0.514789,0.307746,0.294366,0.295775,0.302817,0.306338,0.316197,0.288732,0.294366,0.306338,0.411972,0.316901,0.303521,0.363380,0.290141,0.286620,0.295775,0.318310,0.285211,0.297887,0.289437,0.292254,0.281690,0.302817,0.488028,0.290845,0.289437,0.282394,0.288732,0.300000,0.294366,0.314789,0.297887,0.296479,0.321127,0.302817,0.032394,0.311268,0.558451,0.295070,0.550704,0.290845,0.283803,0.289437,0.311268,0.273239,0.288028,0.287324,0.016479,0.015634,0.552817,0.293662,0.283803,0.291549,...,0.276056,0.280282,0.285211,0.295775,0.293662,0.283803,0.291549,0.012113,0.278873,0.502817,0.280282,0.301408,0.291549,0.591549,0.322535,0.291549,0.279577,0.292254,0.281690,0.284507,0.501408,0.538732,0.334507,0.300704,0.276761,0.291549,0.278873,0.295070,0.291549,0.010845,0.311268,0.009930,0.291549,0.284507,0.022606,0.008521,0.302113,0.309859,0.290845,0.303521,0.024648,0.031620,0.288732,0.461268,0.279577,0.288028,0.711268,0.293662,0.643662,0.300000,0.326761,0.315493,0.298592,0.528873,0.292958,0.293662,0.297887,0.302817,0.287324,0.285915,0.318310,0.288028,0.557042,0.279577,0.545775,0.278873,0.285211,0.283803,0.287324,0.296479,0.304930,0.547887,0.321127,0.280282,0.300704,0.288028,0.547183,0.450704,0.304225,0.301408,0.278873,0.554225,0.311972,0.309859,0.282394,0.576761,0.328873,0.290141,0.016268,0.288028,0.280986,0.304930,0.299296,0.289437,0.010423,0.283099,0.291549,0.322535,0.291549,0.657042
ENSG00000112333,0.677861,0.096393,0.424751,0.297886,0.250000,0.294776,0.438433,0.139303,0.373134,0.263060,0.442786,0.393035,0.291667,0.262438,0.347015,0.438433,0.235075,0.322761,0.416667,0.289801,0.238806,0.572139,0.269279,0.092662,0.095149,0.236940,0.424129,0.297264,0.286070,0.518035,0.206468,0.493781,0.233831,0.284826,0.526741,0.336443,0.342662,0.276119,0.400498,0.288557,0.439677,0.452736,0.362562,0.000000,0.424129,0.251244,0.671642,0.383706,0.097637,0.277363,0.218284,0.267413,0.883085,0.589552,0.248134,0.294776,0.552861,0.117537,0.315298,0.060261,0.176617,0.371269,0.284826,0.283582,0.380597,0.346393,0.358831,0.102612,0.294776,0.411070,0.268657,0.268657,0.330224,0.039614,0.075871,0.257463,0.424751,0.287935,0.273010,0.250622,0.368781,0.055473,0.353234,0.449005,0.345149,0.430970,0.086443,0.292289,0.049192,0.321517,0.263060,0.404851,0.342662,0.340796,0.250000,0.064677,0.145522,0.125622,0.294154,0.311567,...,0.305970,0.042102,0.331468,0.313433,0.158582,0.320895,0.294776,0.282338,0.329602,0.328358,0.312811,0.061194,0.358209,0.258706,0.287313,0.371891,0.315920,0.387438,0.384328,0.570274,0.062189,0.287935,0.272388,0.305348,0.366294,0.322139,0.311567,0.281716,0.304726,0.287935,0.269901,0.360075,0.275497,0.069030,0.087065,0.062127,0.378109,0.483209,0.305348,0.314055,0.501244,0.479478,0.491915,0.315920,0.148632,0.394279,0.274876,0.470771,0.282960,0.218905,0.610696,0.387438,0.100124,0.471393,0.279229,0.423507,0.031405,0.321517,0.143657,0.435945,0.258085,0.268657,0.271144,0.268657,0.634328,0.317786,0.337065,0.306592,0.366915,0.269901,0.303483,0.245025,0.312811,0.041853,0.269901,0.634328,0.267413,0.447139,0.452736,0.341418,0.470149,0.291045,0.074627,0.063433,0.210821,0.330846,0.282960,0.294776,0.050684,0.394901,0.226368,0.212687,0.296020,0.294776,0.347637,0.329602,0.250622,0.388060,0.297886,0.431592
ENSG00000161940,0.183955,0.179851,0.402985,0.414179,0.159328,0.432836,0.988806,0.194403,0.470149,0.507463,0.582090,0.304851,0.600746,0.787313,0.488806,0.249627,0.317537,0.216791,0.328358,0.325373,0.372761,0.578358,0.347388,0.260821,0.250373,0.297015,0.417910,0.288806,0.192164,0.187687,0.641791,0.832090,0.578358,0.589552,0.309702,0.578358,0.582090,0.768657,0.291045,0.207090,0.421642,0.302239,0.388060,0.131343,0.690298,0.615672,1.022388,0.466418,0.188433,0.306343,0.421642,0.169030,0.421642,0.462687,0.701493,0.552239,0.360821,0.364552,0.597015,0.370522,0.243657,0.514925,0.645522,0.257836,0.485075,1.194030,0.395522,0.548507,0.278358,0.481343,0.305224,0.253358,0.263806,0.276493,0.533582,0.511194,0.214925,0.325746,0.458955,0.410448,0.518657,0.347761,0.349254,0.183955,0.809702,0.652985,0.481343,0.690298,0.340299,0.245522,0.447761,0.455224,0.238060,0.578358,0.417910,0.282090,0.282090,0.269030,0.421642,0.776119,...,0.297015,0.559702,1.033582,0.910448,0.255597,0.247761,0.216045,1.197761,0.260821,0.267910,0.432836,0.199627,0.529851,0.125000,0.320149,0.563433,0.444030,0.391791,0.425373,0.492537,0.191045,0.388060,0.187687,0.921642,0.462687,0.286194,0.244030,0.341418,0.451493,0.342910,0.247388,0.372761,0.406716,0.503731,0.406716,0.282463,0.310448,0.200000,0.318284,0.344030,0.384328,0.354851,0.563433,0.285075,0.263806,0.329478,0.477612,0.254478,0.492537,0.245522,0.462687,0.264552,0.421642,0.514925,0.809702,0.333955,0.791045,0.239179,0.574627,0.339179,1.350746,0.380597,0.514925,0.414179,0.507463,0.295896,0.402985,0.308209,0.369776,0.462687,0.354478,0.320522,0.391791,0.406716,0.440298,0.263433,0.373134,1.085821,0.384328,0.399254,0.150373,0.604478,0.200746,0.286940,1.261194,1.000000,0.388060,0.289552,0.201493,0.466418,0.250746,0.342910,0.548507,0.406716,0.320149,0.310821,0.936567,0.347761,0.522388,0.828358
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ENSG00000102984,0.118713,0.171053,0.324561,0.174561,0.087719,0.157895,0.193567,0.174269,0.193860,0.152047,0.082164,0.150585,0.196784,0.104386,0.247368,0.220175,0.187719,0.309942,0.180117,0.121345,0.260526,0.111696,0.229240,0.147661,0.169006,0.167836,0.160819,0.286550,0.085673,0.181579,0.228070,0.275146,0.215205,0.153509,0.156140,0.144152,0.371345,0.173977,0.224561,0.135673,0.221930,0.231871,0.085673,0.136257,0.167836,0.171053,0.602339,0.235380,0.246491,0.207895,0.191520,0.119298,0.235088,0.236550,0.199415,0.235673,0.236550,0.077193,0.135673,0.183041,0.164327,0.172515,0.195906,0.184795,0.250000,0.701754,0.109064,0.276901,0.117251,0.224854,0.388889,0.217251,0.225146,0.214327,0.292398,0.263743,0.209064,0.170175,0.238012,0.152632,0.126608,0.103509,0.144152,0.135380,0.347953,0.224854,0.189474,0.184211,0.225439,0.153216,0.217251,0.099708,0.150000,0.138596,0.165789,0.280702,0.280702,0.276608,0.076608,0.197368,...,0.125731,0.176901,1.096491,0.272222,0.191520,0.218129,0.130702,0.339181,0.109942,0.128655,0.125146,0.153216,0.345029,0.249708,0.214327,0.190351,0.108187,0.201754,0.085673,0.172807,0.112573,0.151754,0.251462,0.216082,0.250585,0.244737,0.238012,0.179532,0.115789,0.174561,0.202632,0.198830,0.145029,0.118421,0.121053,0.083626,0.179825,0.119006,0.397661,0.204678,0.336257,0.290351,0.261404,0.252339,0.076316,0.174854,0.251754,0.209357,0.248246,0.170175,0.136257,0.165789,0.228655,0.228655,0.169006,0.327485,0.175731,0.104094,0.268421,0.107602,0.160526,0.170175,0.208187,0.287134,0.190936,0.170175,0.199708,0.154678,0.678363,0.234503,0.202047,0.240936,0.107895,0.104094,0.164327,0.165497,0.280994,0.188596,0.155556,0.095614,0.134795,0.165789,0.086842,0.120468,0.391813,0.312865,0.126023,0.132749,0.074561,0.183041,0.117544,0.216667,0.183333,0.152047,0.267544,0.063158,0.232164,0.281287,0.163158,0.155263
ENSG00000160352,0.172273,0.172273,0.184545,0.270909,0.061818,0.249545,0.212727,0.188182,0.382727,0.287727,0.151364,0.161364,0.148182,0.121818,0.139545,0.335455,0.268636,0.181364,0.307273,0.193182,0.323182,0.257273,0.136364,0.171364,0.149091,0.261364,0.225000,0.254545,0.388636,0.261364,0.301364,0.239545,0.399091,0.211364,0.241364,0.155000,0.360909,0.277273,0.247727,0.196364,0.276364,0.348182,0.190000,0.182273,0.190000,0.246818,0.154545,0.317273,0.082727,0.395455,0.393182,0.283636,0.252727,0.236818,0.238636,0.235455,0.308636,0.171364,0.095909,0.325455,0.264091,0.124545,0.176818,0.162273,0.240455,0.246818,0.411364,0.295909,0.192273,0.284545,0.322727,0.190909,0.171364,0.177727,0.313636,0.178636,0.135000,0.195455,0.285909,0.252273,0.193182,0.227727,0.308182,0.251364,0.248182,0.325909,0.293636,0.280000,0.167273,0.190909,0.180455,0.165000,0.185909,0.267273,0.238636,0.261818,0.261818,0.344091,0.194545,0.228636,...,0.334545,0.174091,0.133182,0.247273,0.330455,0.210455,0.320000,0.214545,0.134545,0.245000,0.203636,0.215909,0.302273,0.327273,0.220909,0.285000,0.330455,0.132273,0.297727,0.156818,0.206818,0.410455,0.090909,0.140909,0.183636,0.161818,0.103182,0.315455,0.105455,0.119545,0.155000,0.224545,0.253182,0.282727,0.235000,0.166364,0.196364,0.096364,0.233636,0.274091,0.397727,0.287273,0.315455,0.227273,0.083182,0.215000,0.140909,0.185455,0.253636,0.178182,0.177273,0.312727,0.245909,0.463636,0.414545,0.202273,0.131818,0.249091,0.185000,0.265909,0.193182,0.215909,0.272273,0.251818,0.271364,0.392273,0.236364,0.184091,0.224091,0.193182,0.332727,0.285000,0.203182,0.176818,0.329545,0.264545,0.198182,0.294545,0.196364,0.122273,0.189091,0.211818,0.152273,0.135909,0.194091,0.244091,0.205909,0.164545,0.092273,0.374545,0.325909,0.230909,0.169091,0.314545,0.147727,0.386364,0.179545,0.232727,0.121818,0.116818
ENSG00000152217,0.062857,0.079643,0.557143,1.482143,0.000000,2.642857,0.305714,1.210714,2.303571,0.957143,1.175000,0.903571,1.410714,0.375000,0.689286,0.503571,1.303571,1.692857,4.464286,0.197500,1.075000,0.578571,0.086429,0.297500,0.189286,0.000000,2.885714,0.241429,0.950000,1.110714,0.700000,0.337857,0.839286,0.910714,1.225000,1.989286,1.303571,0.546429,1.614286,0.410714,1.703571,0.960714,0.142143,0.031893,0.667857,1.021428,0.914286,0.589286,0.000000,0.885714,1.853572,0.842857,0.796429,1.710714,0.742857,0.007786,1.732143,0.842857,0.617857,0.240714,1.207143,0.932143,0.685714,0.614286,1.310714,1.428571,1.389286,0.867857,0.560714,1.446429,0.675000,0.807143,0.421429,0.732143,1.746428,0.442857,0.000000,0.957143,1.060714,0.700000,0.603571,0.639286,1.257143,0.000000,1.167857,0.957143,1.389286,0.975000,0.946429,0.578571,1.100000,0.190357,1.060714,1.232143,0.517857,0.000000,0.199643,0.158929,0.400000,0.403571,...,1.296429,0.935714,0.596429,1.178571,1.728572,0.330357,0.792857,1.157143,0.457143,0.710714,1.050000,0.371429,1.100000,1.367857,0.228214,0.252500,1.139286,0.375000,0.351071,0.989286,0.700000,1.596429,0.293214,1.292857,0.632143,0.150714,0.207143,0.346786,1.089286,1.200000,0.210714,0.564286,1.085714,0.742857,0.323571,0.400000,1.842857,0.650000,1.575000,1.017857,0.091071,0.351071,1.678571,1.403571,1.282143,1.025000,0.078929,1.167857,0.796429,1.471429,0.657143,1.153571,0.839286,0.957143,0.950000,1.750000,1.103571,0.113571,3.103571,0.782143,1.128571,0.254643,1.417857,0.286071,0.464286,1.653571,0.246786,0.082500,0.578571,0.266071,1.810714,0.653571,0.907143,0.989286,1.625000,0.682143,1.875000,1.753571,0.189286,0.219286,0.000000,1.085714,0.000000,0.313929,0.235357,0.360714,0.642857,0.000000,0.019464,0.000000,0.000000,0.000000,0.122857,0.950000,0.846429,0.853571,0.256786,0.685714,2.457143,1.567857
ENSG00000255073,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.541237,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.200000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.342784,0.000000,0.143557,0.000000,0.000000,0.329897,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.265464,0.466495,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.940722,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.994845,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.822165,0.023995,0.000000,0.000000,0.000000,0.000000,0.896907,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.048711,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.033247,0.000000,0.211340,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.682990,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.053608,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.219588,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000


In [43]:
# # Align the indices and columns of res with da, and vice versa, filling missing values with 0
all_indices = sorted(set(da.index).union(set(res.index)))
#
da_aligned = da.reindex(index=all_indices, columns=all_indices, fill_value=0)
res_aligned = res.reindex(index=all_indices, columns=all_indices, fill_value=0)
#
# # Add res to da
da = da_aligned + res_aligned

In [44]:
da.shape

(19269, 19269)

In [45]:
biomart = getBiomartTable(
    attributes=["mmusculus_homolog_ensembl_gene", "ensembl_gene_id"],
    bypass_attributes=True,
)
biomart

downloading gene names from biomart
['mmusculus_homolog_ensembl_gene', 'ensembl_gene_id']


Unnamed: 0,mmusculus_homolog_ensembl_gene,ensembl_gene_id
0,,ENSG00000210049
1,,ENSG00000211459
2,,ENSG00000210077
3,,ENSG00000210082
4,,ENSG00000209082
...,...,...
77642,,ENSG00000232679
77643,ENSMUSG00000088001,ENSG00000200033
77644,,ENSG00000228437
77645,,ENSG00000229463


In [46]:
biomart[~biomart.mmusculus_homolog_ensembl_gene.isna()]

Unnamed: 0,mmusculus_homolog_ensembl_gene,ensembl_gene_id
5,ENSMUSG00000064341,ENSG00000198888
9,ENSMUSG00000064345,ENSG00000198763
15,ENSMUSG00000064351,ENSG00000198804
18,ENSMUSG00000064354,ENSG00000198712
20,ENSMUSG00000064356,ENSG00000228253
...,...,...
77627,ENSMUSG00000032913,ENSG00000198799
77639,ENSMUSG00000102439,ENSG00000143631
77640,ENSMUSG00000049133,ENSG00000143520
77641,ENSMUSG00000039384,ENSG00000143507


In [None]:
mp = {}
for v, k in biomart.values:
    if v is np.nan or k is np.nan:
        continue
    if k in mp:
        if v not in mp[k]:
            mp[k] += [v]
    else:
        mp[k] = [v]

In [53]:
da2 = da.copy()
da2 = da2.loc[da2.index.isin(mp.keys()), da2.columns.isin(mp.keys())]
da2.index = [mp[i][0] for i in da2.index]
da2.columns = [mp[i][0] for i in da2.columns]

In [66]:
da2 = da2[~da2.index.duplicated(keep="first")]
da2 = da2.T
da2 = da2[~da2.index.duplicated(keep="first")]
da2 = da2.T

In [71]:
sparse

Unnamed: 0,ENSG00000000003,ENSG00000000005,ENSG00000000419,ENSG00000000457,ENSG00000000460,ENSG00000000938,ENSG00000000971,ENSG00000001036,ENSG00000001084,ENSG00000001167,ENSG00000001460,ENSG00000001461,ENSG00000001497,ENSG00000001561,ENSG00000001617,ENSG00000001626,ENSG00000001629,ENSG00000001630,ENSG00000001631,ENSG00000002016,ENSG00000002330,ENSG00000002549,ENSG00000002586,ENSG00000002587,ENSG00000002726,ENSG00000002745,ENSG00000002746,ENSG00000002822,ENSG00000002834,ENSG00000002919,ENSG00000002933,ENSG00000003056,ENSG00000003096,ENSG00000003137,ENSG00000003147,ENSG00000003249,ENSG00000003393,ENSG00000003400,ENSG00000003402,ENSG00000003436,ENSG00000003509,ENSG00000003756,ENSG00000003987,ENSG00000003989,ENSG00000004059,ENSG00000004139,ENSG00000004142,ENSG00000004399,ENSG00000004455,ENSG00000004468,ENSG00000004478,ENSG00000004487,ENSG00000004534,ENSG00000004660,ENSG00000004700,ENSG00000004766,ENSG00000004776,ENSG00000004777,ENSG00000004779,ENSG00000004799,ENSG00000004809,ENSG00000004838,ENSG00000004846,ENSG00000004848,ENSG00000004864,ENSG00000004866,ENSG00000004897,ENSG00000004939,ENSG00000004961,ENSG00000004975,ENSG00000005001,ENSG00000005007,ENSG00000005020,ENSG00000005022,ENSG00000005059,ENSG00000005073,ENSG00000005075,ENSG00000005100,ENSG00000005102,ENSG00000005108,ENSG00000005156,ENSG00000005175,ENSG00000005187,ENSG00000005189,ENSG00000005194,ENSG00000005206,ENSG00000005238,ENSG00000005243,ENSG00000005249,ENSG00000005302,ENSG00000005339,ENSG00000005379,ENSG00000005381,ENSG00000005421,ENSG00000005436,ENSG00000005448,ENSG00000005469,ENSG00000005471,ENSG00000005483,ENSG00000005486,...,ENSMUSG00000045140,ENSMUSG00000047246,ENSMUSG00000018537,ENSMUSG00000038453,ENSMUSG00000070337,ENSMUSG00000069662,ENSMUSG00000048827,ENSMUSG00000022579,ENSMUSG00000108348,ENSMUSG00000022055,ENSMUSG00000069744,ENSMUSG00000038541,ENSMUSG00000073913,ENSMUSG00000078695,ENSMUSG00000010362,ENSMUSG00000020677,ENSMUSG00000062564,ENSMUSG00000060397,ENSMUSG00000044933,ENSMUSG00000096549,ENSMUSG00000020527,ENSMUSG00000020530,ENSMUSG00000071478,ENSMUSG00000051452,ENSMUSG00000034449,ENSMUSG00000020532,ENSMUSG00000032292,ENSMUSG00000058385,ENSMUSG00000071653,ENSMUSG00000018405,ENSMUSG00000038199,ENSMUSG00000096010,ENSMUSG00000067771,ENSMUSG00000018882,ENSMUSG00000045792,ENSMUSG00000066672,ENSMUSG00000064333,ENSMUSG00000062142,ENSMUSG00000108948,ENSMUSG00000075113,ENSMUSG00000050577,ENSMUSG00000066747,ENSMUSG00000075377,ENSMUSG00000050505,ENSMUSG00000075186,ENSMUSG00000030680,ENSMUSG00000078320,ENSMUSG00000000562,ENSMUSG00000108398,ENSMUSG00000034185,ENSMUSG00000107877,ENSMUSG00000070902,ENSMUSG00000031452,ENSMUSG00000087512,ENSMUSG00000073234,ENSMUSG00000090840,ENSMUSG00000073598,ENSMUSG00000079304,ENSMUSG00000105734,ENSMUSG00000096537,ENSMUSG00000068167,ENSMUSG00000109737,ENSMUSG00000107252,ENSMUSG00000079497,ENSMUSG00000110221,ENSMUSG00000114865,ENSMUSG00000098590,ENSMUSG00000048752,ENSMUSG00000095975,ENSMUSG00000049761,ENSMUSG00000043795,ENSMUSG00000091780,ENSMUSG00000030030,ENSMUSG00000114245,ENSMUSG00000110170,ENSMUSG00000049098,ENSMUSG00000114004,ENSMUSG00000053297,ENSMUSG00000037689,ENSMUSG00000109305,ENSMUSG00000115219,ENSMUSG00000039233,ENSMUSG00000115302,ENSMUSG00000095026,ENSMUSG00000078630,ENSMUSG00000027676,ENSMUSG00000041857,ENSMUSG00000115798,ENSMUSG00000116563,ENSMUSG00000096530,ENSMUSG00000103523,ENSMUSG00000117809,ENSMUSG00000091614,ENSMUSG00000117748,ENSMUSG00000018451,ENSMUSG00000092349,ENSMUSG00000047150,ENSMUSG00000064220,ENSMUSG00000057156,ENSMUSG00000006818
ENSG00000000003,0,2.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
ENSG00000000005,2.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2.0,0,0,2.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
ENSG00000000419,0,0,0,0,0,2.0,0,0,0,0,0,0,0,2.0,0,2.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2.0,2.0,0,0,2.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2.0,2.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2.0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
ENSG00000000457,0,0,0,0,2.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
ENSG00000000460,0,0,0,2.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ENSMUSG00000121607,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,2.0,0,0,0,0,0,2.0,0,2.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
ENSMUSG00000121608,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,2.0,0,0,0,0,0,0,0,0,0,0,0,0,2.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
ENSMUSG00000121904,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2.0,0,0,0,2.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2.0,0,0,0,0,0,0,0,0,0,0,0,0,2.0,0,0,0,0,0,0,0,0,0,0,0,2.0,0,2.0
ENSMUSG00001074846,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [70]:
sparse.index.nunique()

35989

In [68]:
sparse = pd.merge(
    da,
    da2,
    left_index=True,
    right_index=True,
    how="outer",
)
sparse.fillna(0, inplace=True)

In [None]:
sparse = sparse.astype(pd.SparseDtype("float", 0))

In [None]:

# Convert to scipy sparse
sparse_matrix = sp.csr_matrix(sparse.values)

# Save the matrix and indices separately
sp.save_npz(LOC + "bias_sparse.npz", sparse_matrix)
pd.Series(sparse.index).to_csv(LOC + "bias_sparse_index.csv")
pd.Series(sparse.columns).to_csv(LOC + "bias_sparse_columns.csv")

AttributeError: 'Series' object has no attribute 'to_parquet'

In [80]:
del sparse, sparse_matrix, da, da2

In [None]:
sparse_matrix = sp.load_npz(LOC + "bias_sparse.npz")
index = pd.read_csv(LOC + "bias_sparse_index.csv").iloc[:, 1].values

In [9]:
del sparse

In [7]:
bias = ad.AnnData(X=np.zeros((1, len(index))), var=index)

In [10]:
bias.varp['GRN'] = sparse_matrix

In [11]:
bias = GRNAnnData(bias)

In [23]:
bias.var.columns = ["index"]
bias.var = bias.var.set_index("index")

In [24]:
bias.write(LOC+'bias.h5ad')

In [26]:
bias = read_h5ad(LOC+'bias.h5ad')

In [None]:
bias[bias.var.index.isin()]

In [25]:
import anndata as ad
from grnndata import GRNAnnData, read_h5ad
from bengrn import BenGRN