In [2]:
import os.path as path
from pathlib import Path

import numpy as np
import pandas as pd

import anndata as ad
import scanpy as sc
import squidpy as sq

import csv
import gzip
import os
import scipy.io

import cv2

cd = Path(path.abspath(""))

#Part 1: 

Here, we read the Visium feature-barcode matrix, which tells us the level of gene expression at each spatial point on the Visium image. (I think)

In [7]:
matrix_dir_path = os.path.join(cd, "resources", "filtered_feature_bc_matrix")
mat_filtered = scipy.io.mmread(path.join(matrix_dir_path, "matrix.mtx.gz"))

# list of transcript ids, e.g. 'ENSG00000187634'
features_path = path.join(matrix_dir_path, "features.tsv.gz")
feature_ids = [row[0]  for row  in csv.reader(gzip.open(features_path, mode="rt"), delimiter="\t")]

# list of gene names, e.g. 'SAMD11'
gene_names = [row[1]  for row  in csv.reader(gzip.open(features_path, mode="rt"), delimiter="\t")]

# list of feature_types, e.g. 'Gene Expression'
feature_types = [row[2]  for row  in csv.reader(gzip.open(features_path, mode="rt"), delimiter="\t")]

# list of barcodes, e.g. 'AAACATACAAAACG-1'
barcodes_path = os.path.join(matrix_dir_path, "barcodes.tsv.gz")
barcodes = [row[0]  for row  in csv.reader(gzip.open(barcodes_path, mode="rt"), delimiter="\t")]


Now, we load the decompressed data into a single combined feature-barcode matrix for easier processing.

In [9]:
fbc_matrix = pd.DataFrame.sparse.from_spmatrix(mat_filtered)
fbc_matrix.columns = barcodes
fbc_matrix.insert(loc=0, column="feature_id", value=feature_ids)
fbc_matrix.insert(loc=1, column="gene", value=gene_names)
fbc_matrix.insert(loc=2, column="feature_type", value=feature_types)

In [10]:
fbc_matrix

Unnamed: 0,feature_id,gene,feature_type,AACAATGGAACCACAT-1,AACAATGTGCTCCGAG-1,AACACCAGCCTACTCG-1,AACACCATTCGCATAC-1,AACACCGAATGTCTCA-1,AACACGCAGATAACAA-1,AACACTCGTGAGCTTC-1,...,TGTTCGCTTCTAATCC-1,TGTTCGTACACGGCCA-1,TGTTCGTGGCGTCGTG-1,TGTTGCCAGTCGCCTG-1,TGTTGCCGTTCGACCA-1,TGTTGGCCTGTAGCGG-1,TGTTGGTGCGCACGAG-1,TGTTGGTGCGCTTCGC-1,TGTTGGTGCGGAATCA-1,TGTTGGTGGACTCAGG-1
0,ENSG00000187634,SAMD11,Gene Expression,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,ENSG00000188976,NOC2L,Gene Expression,1,0,0,0,0,0,1,...,0,0,0,0,0,1,1,1,0,0
2,ENSG00000187961,KLHL17,Gene Expression,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,ENSG00000187583,PLEKHN1,Gene Expression,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,ENSG00000187642,PERM1,Gene Expression,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
18115,CD4,CD4,Antibody Capture,68679,33584,39114,54248,48609,56826,61884,...,43931,62451,49115,53829,41438,70051,74627,66590,61814,51561
18116,ITGAM,ITGAM,Antibody Capture,6525,37556,31822,6282,6487,11115,26053,...,12896,5484,4798,5626,11187,6949,7982,6373,7497,10556
18117,CD27,CD27,Antibody Capture,22402,15723,19114,25501,23853,29594,23057,...,21310,28071,26988,25196,22841,25997,27928,29879,28185,17410
18118,CCR7,CCR7,Antibody Capture,34870,20814,22500,30801,29853,32579,32568,...,22137,31986,21805,25120,24928,34779,35177,33277,29445,24156


Now that we have the transcriptomic data, we will load in the corresponding full-res Xenium image and attempt to perform cell segmentation on it.

In [21]:
img = sq.im.ImageContainer(path.join(cd, "resources", "CytAssist_FFPE_Protein_Expression_Human_Glioblastoma_tissue_image.tif"))

