In [218]:
import os
import numpy as np
import pandas as pd
import anndata as ad
import scanpy as sc
import tifffile as tiff
import matplotlib.pyplot as plt

import warnings
warnings.filterwarnings("ignore")

sc.settings.verbosity = 0
sc.settings.set_figure_params(dpi=300, dpi_save=500)

In [219]:
data_dir = '../../../Data/Spatial/Transcriptomics/Visium_DLPFC_Maynard2021/'
sample_group_list = [['151507', '151508', '151509', '151510'],
                     ['151669', '151670', '151671', '151672'],
                     ['151673', '151674', '151675', '151676']]
radius_list = [[48., 48., 49., 48.], 
               [48., 48., 48., 48.], 
               [48., 48., 48., 48.]]

In [220]:
sample_idx = 2
slice_idx = 3
sample = sample_group_list[sample_idx][slice_idx]
radius = radius_list[sample_idx][slice_idx]

adata = sc.read_10x_h5(data_dir + f'{sample}/{sample}_filtered_feature_bc_matrix.h5')
# sc.pp.filter_genes(adata, min_cells=1)
adata.var_names_make_unique()
adata

AnnData object with n_obs × n_vars = 3460 × 33538
    var: 'gene_ids', 'feature_types', 'genome'

In [221]:
coords = pd.read_csv(data_dir + f'{sample}/spatial/tissue_positions_list.csv', header=None, index_col=None)
coords.columns = ['barcodes', 'in_tissue', 'x', 'y', 'image_row', 'image_col']
coords.set_index('barcodes', inplace=True)
coords.head()

Unnamed: 0_level_0,in_tissue,x,y,image_row,image_col
barcodes,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
ACGCCTGACACGCGCT-1,0,0,0,2624,2673
TACCGATCCAACACTT-1,0,1,1,2745,2741
ATTAAAGCGGACGAGC-1,0,0,2,2625,2810
GATAAGGGACGATTAG-1,0,1,3,2746,2878
GTGCAAATCACCAATA-1,0,0,4,2626,2948


In [222]:
adata.obs = coords.loc[adata.obs_names].copy()
adata.obsm['spatial'] = adata.obs[['y', 'x']].astype(float).values
adata.obsm['spatial_img'] = adata.obs[['image_col', 'image_row']].astype(float).values
adata.obsm['radius'] = radius * np.ones((adata.shape[0], 1))  

In [223]:
# read the annotation
Ann_df = pd.read_csv(data_dir + f'{sample}/meta_data.csv', sep=',', index_col=0, header=0)

if not all(Ann_df.index.isin(adata.obs_names)):
    raise ValueError("Some rows in the annotation file are not present in the adata.obs_names")

adata.obs['imagerow'] = Ann_df.loc[adata.obs_names, 'imagerow']
adata.obs['imagecol'] = Ann_df.loc[adata.obs_names, 'imagecol']
adata.obs['Manual_Annotation'] = Ann_df.loc[adata.obs_names, 'ManualAnnotation']
adata

AnnData object with n_obs × n_vars = 3460 × 33538
    obs: 'in_tissue', 'x', 'y', 'image_row', 'image_col', 'imagerow', 'imagecol', 'Manual_Annotation'
    var: 'gene_ids', 'feature_types', 'genome'
    obsm: 'spatial', 'spatial_img', 'radius'

In [224]:
adata.write_h5ad(data_dir + f'{sample}/{sample}_adata.h5ad')

In [225]:
adata = ad.read_h5ad(data_dir + f'{sample}/{sample}_adata.h5ad')
adata

AnnData object with n_obs × n_vars = 3460 × 33538
    obs: 'in_tissue', 'x', 'y', 'image_row', 'image_col', 'imagerow', 'imagecol', 'Manual_Annotation'
    var: 'gene_ids', 'feature_types', 'genome'
    obsm: 'radius', 'spatial', 'spatial_img'

In [226]:
# image = tiff.imread(data_dir + f'{sample}/spatial/full_image.tif')
# image.shape