In [None]:
import scanpy as sc
import anndata as ad
from scipy.sparse import csr_matrix
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt

Replace the below with wherever you saved the downloaded the Alzheimer's ST data, available from [here](https://www.synapse.org/#!Synapse:syn22153884/wiki/603937)

In [None]:
chen_data_directory = ''

In [None]:
# Load the metadata
chen_meta = pd.read_csv(chen_data_directory + 'spot_metadata.tsv.gz', delimiter = '\t')

# Load the raw counts. This takes a long time, but I don't know how else to load the spot data.
chen_df = pd.read_csv(chen_data_directory + 'raw_counts.txt.gz')
chen_df.rename(columns={'newindex':'Spot'}, inplace=True)

In [None]:
# Define the anndata object
adata_chen = ad.AnnData(X=csr_matrix(chen_df[chen_df.columns[1:]].to_numpy()),
                      obs=pd.DataFrame(index=chen_df['Spot']),
                      var=pd.DataFrame(index=chen_df.columns[1:]))

In [None]:
# Subset the data to the spots where the meta data is defined
adata_chen = adata_chen[chen_meta.index, :]
adata_chen.obs = chen_meta

# Cleaning up
adata_chen.obs.index = chen_meta['Spot']
adata_chen.obs.drop('Spot', axis=1, inplace=True)

In [None]:
# If you want to plot the data using e.g. Squidpy, you need to save the coordinates to obsm
adata_chen.obsm['spatial'] = adata_chen.obs[['coord_X', 'coord_Y']].to_numpy()

In [None]:
# Filter out genes with low expression
sc.pp.filter_genes(adata_chen, min_cells=10) # Could probably set min_cells = 1, given how Visium works

In [None]:
# We need to deal with the NaN values before log-transforming
adata_chen.X[np.isnan(adata_chen.X.toarray())] = 0.0

In [None]:
adata_chen.layers['counts'] = adata_chen.X.copy() # Save raw counts
sc.pp.normalize_total(adata_chen, inplace=True, target_sum=1e4)  # Normalise
sc.pp.log1p(adata_chen) # Log-transform

In [None]:
# Final bit of housekeeping
adata_chen = adata_chen[~adata_chen.obs['AT'].isnull()]

In [None]:
# Save data
adata_chen.write(chen_data_directory + 'chen20_merged.h5ad', compression='gzip')