### This script is for generating metadata files needed for Seurat workflow for spatial datasets

In [1]:
import xml.etree.ElementTree as ET
import re
import pandas as pd

In [2]:
# Read image in svg format
dir_name = './ME11/'
svg_file = dir_name + 'ME11.svg'
svg = ET.parse(svg_file)
root = svg.getroot()

In [5]:
# Prepare tissue_positions_list.csv file that compatible with Seurat
spots = pd.DataFrame(columns=['in_tissue', 'array_row','array_column', 
                              'pxl_col_in_fullres', 'pxl_row_in_fullres'])
col_reversed = False

spot_width = 0
spot_height = 0

for i, g in enumerate(root.findall("{http://www.w3.org/2000/svg}rect")):
    x = float(g.attrib['x'])
    y = float(g.attrib['y'])
    spot_width = float(g.attrib['width'])
    spot_height = float(g.attrib['height'])
    
    if(g.attrib['class'] == 'st1'):
        in_tissue = 1
    else:
        in_tissue = 0
    
    row = int(i/50)+1
    col = int(i%50)+1
    if(col_reversed == True):
        col = 50-col+1
    
    x_c = x + spot_width/2
    y_c = y + spot_height/2
    spots = spots.append({'in_tissue': in_tissue, 'array_row': row-1, 'array_column': col-1, 
                          'pxl_col_in_fullres': int(round(y_c)), 'pxl_row_in_fullres': int(round(x_c))}, ignore_index=True)

spots.index = (spots['array_column']+1).astype(int).astype(str) + 'x' + (spots['array_row']+1).astype(int).astype(str)

In [7]:
# Read barcode.txt
barcode_txt = 'spatial_barcodes.txt'
barcodes = pd.read_csv(barcode_txt, sep='\t',  lineterminator='\n', header=None)

In [8]:
barcodes.columns = ['barcode', 'x', 'y']
barcodes.index = barcodes['x'].astype(str) + 'x' + barcodes['y'].astype(str)
barcodes = barcodes.drop(columns=['x', 'y'])

In [9]:
barcodes.head()

Unnamed: 0,barcode
1x1,AACGTGATAACGTGAT
1x2,AACGTGATAAACATCG
1x3,AACGTGATATGCCTAA
1x4,AACGTGATAGTGGTCA
1x5,AACGTGATACCACTGT


In [10]:
spots = pd.concat([barcodes, spots], axis=1)

In [11]:
spots.head()

Unnamed: 0,barcode,in_tissue,array_row,array_column,pxl_col_in_fullres,pxl_row_in_fullres
1x1,AACGTGATAACGTGAT,0,0,0,30,1373
1x2,AACGTGATAAACATCG,0,1,0,52,1373
1x3,AACGTGATATGCCTAA,0,2,0,74,1373
1x4,AACGTGATAGTGGTCA,0,3,0,96,1373
1x5,AACGTGATACCACTGT,0,4,0,118,1373


In [12]:
# Select spots on tissue
spots_tissue = spots.loc[spots['in_tissue'] == 1]
print(spots_tissue.shape)
spots_tissue.head()

(2029, 6)


Unnamed: 0,barcode,in_tissue,array_row,array_column,pxl_col_in_fullres,pxl_row_in_fullres
1x8,AACGTGATCATCAAGT,1,7,0,184,1373
1x9,AACGTGATCGCTGATC,1,8,0,206,1373
1x10,AACGTGATACAAGCTA,1,9,0,228,1373
1x11,AACGTGATCTGTAGCC,1,10,0,250,1373
1x12,AACGTGATAGTACAAG,1,11,0,272,1373


In [13]:
spots.to_csv(dir_name + '/spatial/' + 'tissue_positions_list.csv', index=False, header=False)

In [18]:
# Generate scalefactors_json.json file
import json

scalefactors = {"spot_diameter_fullres": spot_width, 
                "tissue_hires_scalef": 1.0, 
                "fiducial_diameter_fullres": spot_width, 
                "tissue_lowres_scalef": 1.0}

with open(dir_name + '/spatial/' + 'scalefactors_json.json', 'w') as outfile:
    json.dump(scalefactors, outfile)