In [79]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import scanpy as sc
from muon import MuData
import muon as mu

### Read Data

In [10]:
DataPathway = "/media/data/single_cell/brent10070/side_project/SHARE_seq/share.h5mu"
mdata = mu.read(DataPathway)

In [13]:
mdata

### Split Datasets

##### Train : Validation : Test = 70% : 10% : 20%

In [91]:
Train_Validation_barcode, Test_barcode =  train_test_split(mdata.obs_names, test_size =.20, shuffle  = True) 
Train_barcode, Validation_barcode =  train_test_split(Train_Validation_barcode, test_size =.125, shuffle  = True) 
len(Train_barcode), len(Validation_barcode), len(Test_barcode)

(24150, 3450, 6900)

In [93]:
Train = mdata[Train_barcode]
Validation = mdata[Validation_barcode]
Test = mdata[Test_barcode]


### PCA for scRNA-seq and Get the 30 Neighbors

In [133]:
sc.tl.pca(Train.mod['RNA'])
sc.pp.neighbors(Train.mod['RNA'], n_neighbors=31, n_pcs=30)
Train.mod['RNA']

AnnData object with n_obs × n_vars = 24150 × 22537
    uns: 'pca', 'neighbors'
    obsm: 'X_pca'
    varm: 'PCs'
    layers: 'counts', 'data'
    obsp: 'distances', 'connectivities'

In [234]:
Distance_index = Train.mod['RNA'].obsp['distances'].toarray().argsort(axis=1)[:, -30:]
col_names = ["Neigh_{}".format(i) for i in range(1, 31)]
knn_neighbor = pd.DataFrame(Distance_index, columns=col_names)
knn_neighbor.index = [Train.mod['RNA'].obs_names]
knn_neighbor

Unnamed: 0,Neigh_1,Neigh_2,Neigh_3,Neigh_4,Neigh_5,Neigh_6,Neigh_7,Neigh_8,Neigh_9,Neigh_10,...,Neigh_21,Neigh_22,Neigh_23,Neigh_24,Neigh_25,Neigh_26,Neigh_27,Neigh_28,Neigh_29,Neigh_30
R1.48.R2.04.R3.79.P1.07,10817,3171,19518,4548,19946,21305,22705,9641,18973,13496,...,689,13031,12300,3399,11863,3657,20890,18843,17670,22230
R1.16.R2.22.R3.86.P1.05,10852,7119,21368,16467,23377,4183,13960,4592,8395,2324,...,7006,19583,10186,18111,12859,5637,13813,3796,17938,19475
R1.51.R2.21.R3.60.P1.05,20637,12449,13879,97,13727,13283,5102,19193,5562,17170,...,13475,18976,14864,22744,20531,7181,3935,3880,11372,3006
R1.96.R2.42.R3.80.P1.05,12884,12220,12263,11084,17864,12576,23457,11207,19086,21392,...,21328,15206,2414,23248,14715,17043,17414,17113,1899,9876
R1.89.R2.02.R3.15.P1.06,3000,1317,10762,14054,16834,1128,18332,21909,20596,23896,...,16908,21318,13895,17209,7470,19742,22072,14419,14924,24081
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
R1.57.R2.14.R3.30.P1.05,15622,13478,3268,14476,18124,6230,10450,7941,23974,23651,...,2385,11751,9693,12066,11206,10484,9187,5348,13127,3517
R1.44.R2.90.R3.16.P1.06,9013,2764,13136,15637,3064,20176,2900,20016,21663,11082,...,1447,11062,3204,12370,119,19401,21418,9215,11797,12537
R1.84.R2.45.R3.63.P1.06,21559,8374,18631,22962,3874,18112,1260,19511,6415,21037,...,4355,20705,4514,14649,5811,19916,3913,18395,2573,3712
R1.46.R2.82.R3.65.P1.08,10020,8187,8991,23020,5755,14365,5792,2172,15566,10211,...,908,21003,20458,13205,14683,20908,23652,21326,6313,8347


### Get 30 neighbors expression values of the cell

In [307]:
sc.get.var_df(Train.mod['RNA'], keys=Train.mod['RNA'].obs_names[knn_neighbor.iloc[0]].tolist())

Unnamed: 0,R1.39.R2.68.R3.16.P1.07,R1.91.R2.86.R3.51.P1.07,R1.03.R2.03.R3.02.P1.07,R1.34.R2.53.R3.32.P1.06,R1.28.R2.48.R3.02.P1.07,R1.86.R2.41.R3.69.P1.06,R1.48.R2.78.R3.16.P1.08,R1.42.R2.50.R3.87.P1.07,R1.57.R2.55.R3.82.P1.07,R1.90.R2.22.R3.84.P1.07,...,R1.56.R2.76.R3.45.P1.07,R1.25.R2.59.R3.80.P1.06,R1.91.R2.08.R3.47.P1.06,R1.28.R2.11.R3.78.P1.07,R1.22.R2.21.R3.26.P1.07,R1.90.R2.01.R3.39.P1.07,R1.07.R2.93.R3.79.P1.07,R1.74.R2.80.R3.32.P1.06,R1.66.R2.86.R3.22.P1.08,R1.74.R2.63.R3.38.P1.07
0610007P14Rik,-0.093023,-0.093023,-0.093023,-0.093023,-0.093023,-0.093023,-0.093023,-0.093023,-0.093023,-0.093023,...,-0.093023,-0.093023,-0.093023,-0.093023,-0.093023,-0.093023,-0.093023,-0.093023,-0.093023,-0.093023
0610009B22Rik,-0.049560,-0.049560,-0.049560,-0.049560,-0.049560,-0.049560,-0.049560,-0.049560,-0.049560,-0.049560,...,-0.049560,-0.049560,-0.049560,-0.049560,-0.049560,-0.049560,-0.049560,-0.049560,-0.049560,-0.049560
0610009L18Rik,-0.035161,-0.035161,-0.035161,-0.035161,-0.035161,-0.035161,-0.035161,-0.035161,-0.035161,-0.035161,...,-0.035161,-0.035161,-0.035161,-0.035161,-0.035161,-0.035161,-0.035161,-0.035161,-0.035161,-0.035161
0610009O20Rik,-0.119182,-0.119182,-0.119182,-0.119182,-0.119182,-0.119182,-0.119182,-0.119182,-0.119182,-0.119182,...,-0.119182,-0.119182,-0.119182,-0.119182,-0.119182,-0.119182,-0.119182,-0.119182,-0.119182,-0.119182
0610010F05Rik,-0.167639,-0.167639,-0.167639,-0.167639,-0.167639,-0.167639,-0.167639,-0.167639,-0.167639,-0.167639,...,-0.167639,-0.167639,-0.167639,5.767242,-0.167639,-0.167639,6.633583,-0.167639,-0.167639,-0.167639
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
mt-Nd3,-0.005384,-0.005384,-0.005384,-0.005384,-0.005384,-0.005384,-0.005384,-0.005384,-0.005384,-0.005384,...,-0.005384,-0.005384,-0.005384,-0.005384,-0.005384,-0.005384,-0.005384,-0.005384,-0.005384,-0.005384
mt-Nd4,-0.293290,-0.293290,-0.293290,-0.293290,1.088859,-0.293290,-0.293290,-0.293290,-0.293290,3.163324,...,-0.293290,-0.293290,-0.293290,-0.293290,-0.293290,-0.293290,-0.293290,-0.293290,-0.293290,-0.293290
mt-Nd4l,-0.007240,-0.007240,-0.007240,-0.007240,-0.007240,-0.007240,-0.007240,-0.007240,-0.007240,-0.007240,...,-0.007240,-0.007240,-0.007240,-0.007240,-0.007240,-0.007240,-0.007240,-0.007240,-0.007240,-0.007240
mt-Nd5,1.262533,-0.475177,-0.475177,-0.475177,0.357018,-0.475177,-0.475177,-0.475177,0.947750,-0.475177,...,-0.475177,0.723110,-0.475177,-0.475177,0.559728,-0.475177,-0.475177,-0.475177,-0.042628,-0.475177
