# Notes

# Load Modules

In [1]:
import GraphPCA as sg
import scanpy as sc
import anndata as ad
import numpy as np
import pandas as pd
import squidpy as sq
import scipy
from sklearn.cluster import KMeans
from sklearn.metrics import pairwise_distances as pair
from sklearn.metrics import adjusted_rand_score as ari_score
from sklearn.neighbors import kneighbors_graph

In [2]:
import numpy as np
import pandas as pd
import matplotlib as mpl
mpl.rcParams['pdf.fonttype'] = 42
mpl.rcParams['ps.fonttype'] = 42
mpl.rcParams['font.sans-serif'] = 'Arial'
import matplotlib.pyplot as plt
import seaborn as sns
from pprint import pprint
%matplotlib inline

# Load data

In [2]:
raw_counts = pd.read_csv('../../../STdata/xenium_cancer_test/expression_matrix.csv', index_col=0)
raw_counts.head()

Unnamed: 0,7020,7021,7022,7023,7024,7025,7026,7027,7028,7029,...,114310,114311,114312,114314,114315,114316,114317,114318,114319,114320
ABCC11,0,0,0,0,0,0,0,1,0,0,...,0,0,0,0,1,0,0,0,0,0
ACTA2,29,18,14,3,1,9,4,2,0,3,...,1,1,2,6,2,5,2,0,1,0
ACTG2,6,3,2,2,0,2,0,1,0,2,...,2,1,2,3,1,1,13,3,1,1
ADAM9,2,2,0,0,0,5,2,2,0,2,...,6,7,0,0,5,3,0,1,0,0
ADGRE5,1,0,0,0,1,0,0,2,0,1,...,0,0,0,0,0,3,1,0,1,0


In [None]:
ontrac_input = pd.read_csv('../../../STdata/xenium_cancer_test/dataset.csv', index_col=0)
ontrac_input.index = ontrac_input.index.astype('str')
print(ontrac_input.shape)
ontrac_input.head()

(6912, 4)


Unnamed: 0_level_0,Sample,Cell_Type,x,y
Cell_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
7020,sample1_rep1,Perivascular-Like,3094.089441,1693.307495
7021,sample1_rep1,Perivascular-Like,3099.526868,1702.666418
7022,sample1_rep1,Unlabeled,3108.176074,1686.984894
7023,sample1_rep1,Macrophages_1,3108.833496,1707.505566
7024,sample1_rep1,Endothelial,3109.45564,1691.19671


In [21]:
# create adata
adata = ad.AnnData(raw_counts.T)
adata.obs = adata.obs.join(ontrac_input[['Cell_Type','Sample']])
location = ontrac_input[['x','y']].values
adata.uns["spatial"] = location

# Constructing the spatial network
n_neighbors = 7
graph = kneighbors_graph(np.asarray(location), int(n_neighbors), metric='euclidean',
                                 metric_params={}, include_self=False)
graph = 0.5 * (graph + graph.T)

adata.uns["adj"] = graph
adata.obsm["spatial"] = location

# Normalization
sc.pp.filter_genes(adata, min_cells=20)
sc.experimental.pp.normalize_pearson_residuals(adata)
sc.pp.scale(adata)

print(adata.X.shape)
print(graph.shape)

(6912, 310)
(6912, 6912)


In [22]:
%%time

Z,_ = sg.Run_GPCA(adata, network=adata.uns['adj'], n_components = 50, method = "knn", _lambda = 0.6,n_neighbors=7,
               save_reconstruction=True)
adata.obsm["GraphPCA"] = Z

In [None]:
estimator = KMeans(n_clusters=4)
res = estimator.fit(Z[:,:])
lable_pred=res.labels_
adata.obs["GPCA_pred"]= lable_pred
adata.obs["GPCA_pred"] = adata.obs["GPCA_pred"].astype('category')

In [None]:
sample_df = ontrac_input
sample_df = sample_df.join(adata.obs['GPCA_pred'].astype('category'))

with sns.axes_style('white', rc={
        'xtick.bottom': True,
        'ytick.left': True
}), sns.plotting_context('paper',
                         rc={
                             'axes.titlesize': 8,
                             'axes.labelsize': 8,
                             'xtick.labelsize': 6,
                             'ytick.labelsize': 6,
                             'legend.fontsize': 6
                         }):
    fig, ax = plt.subplots()
    sns.scatterplot(data = sample_df, x='x',y='y', hue=sample_df['GPCA_pred'], ax=ax)

In [None]:
adata.obs.to_csv('xenium_GraphPCA.csv.gz')