## lasso file preprocessing

    This notebook demonstrate:
    1.merge different parts of the same slice and save in compressed csv format(.gz);
    2.convert bin1 data to bin20, bin50 and bin100 data;
    3.construct an AnnData object suitable for downstream analysis;
    4.visualize raw coordinate images;
    5.registration/alignment;
    6.visualize the coordinate image after registration/alignment.

### 1. merge different parts of the same slice and save in compressed csv format(.gz)

In [9]:
import pandas as pd
import stDrosophila as sd
# concat file
file1 = "/media/yao/Elements SE/BGI_Paper/L3_new/L3_a/lasso/lasso_L3_a_bin1_raw/L3_a_S72_1.gem.gz"
file2 = "/media/yao/Elements SE/BGI_Paper/L3_new/L3_a/lasso/lasso_L3_a_bin1_raw/L3_a_S72_2.gem.gz"
data1 = sd.io.read_lasso(filename=file1)
data2 = sd.io.read_lasso(filename=file2)
concat_data = pd.concat([data1, data2], axis=0)
concat_data.sort_values(by=["geneID", "x", "y", "MIDCounts"], inplace=True)
concat_data.to_csv("/media/yao/Elements SE/BGI_Paper/L3_new/L3_a/lasso/lasso_L3_a_bin1/L3_a_S72.gem.gz",
                   index=False, sep="\t", compression="gzip")
print(concat_data)

        geneID      x      y  MIDCounts
35       128up   3491  12626          1
22       128up   3683  12425          1
9        128up   3711  12952          1
24       128up   3720  12940          1
45       128up   3721  12618          1
...        ...    ...    ...        ...
6389360    zye   9988  13194          1
6390106    zye   9989  13465          1
6389609    zye   9993  13563          1
6389969    zye   9993  13566          1
6389425    zye  10003  13134          1

[8420948 rows x 4 columns]


### 2. convert bin1 data to bin20, bin50 and bin100 data (the following takes bin50 as an example）

In [3]:
import os
import stDrosophila as sd
folder = "/media/yao/Elements SE/BGI_Paper/E16_18_d/E16_18_d_bin1"
files = [os.path.join(root, filename)for root, dirs, files in os.walk(folder)for filename in files]
files.sort()
print(files)
for file in files:
    bin1_data = sd.io.read_lasso(filename=file)
    file_name = file.split("/")[-1][-11:-9]

    print(f"----{file_name} start!")
    data = sd.pp.bin1tobinx(bin1_data, 20,
                            save=f"/media/yao/Elements SE/BGI_Paper/E16_18_d/E16_18_d_bin20/E16_18_d_S{file_name}.gem.gz")
    del data
    print(f"----{file_name} successfully!")

['/media/yao/Elements SE/BGI_Paper/E16_18_d/E16_18_d_bin1/E16-18h_d_S01_bin1.gem', '/media/yao/Elements SE/BGI_Paper/E16_18_d/E16_18_d_bin1/E16-18h_d_S02_bin1.gem', '/media/yao/Elements SE/BGI_Paper/E16_18_d/E16_18_d_bin1/E16-18h_d_S03_bin1.gem', '/media/yao/Elements SE/BGI_Paper/E16_18_d/E16_18_d_bin1/E16-18h_d_S04_bin1.gem', '/media/yao/Elements SE/BGI_Paper/E16_18_d/E16_18_d_bin1/E16-18h_d_S05_bin1.gem', '/media/yao/Elements SE/BGI_Paper/E16_18_d/E16_18_d_bin1/E16-18h_d_S06_bin1.gem', '/media/yao/Elements SE/BGI_Paper/E16_18_d/E16_18_d_bin1/E16-18h_d_S07_bin1.gem', '/media/yao/Elements SE/BGI_Paper/E16_18_d/E16_18_d_bin1/E16-18h_d_S08_bin1.gem', '/media/yao/Elements SE/BGI_Paper/E16_18_d/E16_18_d_bin1/E16-18h_d_S09_bin1.gem', '/media/yao/Elements SE/BGI_Paper/E16_18_d/E16_18_d_bin1/E16-18h_d_S10_bin1.gem', '/media/yao/Elements SE/BGI_Paper/E16_18_d/E16_18_d_bin1/E16-18h_d_S11_bin1.gem', '/media/yao/Elements SE/BGI_Paper/E16_18_d/E16_18_d_bin1/E16-18h_d_S12_bin1.gem', '/media/yao/Ele

### 3. construct an AnnData object suitable for downstream analysis (the following takes bin20 as an example）

In [5]:
import os
import stDrosophila as sd

folder = "/media/yao/Elements SE/BGI_Paper/E16_18_d/E16_18_d_bin20/raw_lasso"
files = [os.path.join(root, filename)for root, dirs, files in os.walk(folder)for filename in files]
files.sort()
for file in files:
    data = sd.io.read_lasso(filename=file)
    file_name = file.split("/")[-1]

    #print(f"----{file_name} start!")
    z = int(file_name[-9:-7])
    adata = sd.io.lasso2adata(data=data, slice=file_name[:-7], z_gap=7, z=z)
    adata.write_h5ad(f"/media/yao/Elements SE/BGI_Paper/E16_18_d/E16_18_d_bin20/raw_adata/{adata.obs['slice'][0]}.h5ad")
    #print(f"----{file_name} successfully!")

... storing 'slice' as categorical
... storing 'slice' as categorical
... storing 'slice' as categorical
... storing 'slice' as categorical
... storing 'slice' as categorical
... storing 'slice' as categorical
... storing 'slice' as categorical
... storing 'slice' as categorical
... storing 'slice' as categorical
... storing 'slice' as categorical
... storing 'slice' as categorical
... storing 'slice' as categorical
... storing 'slice' as categorical
... storing 'slice' as categorical
... storing 'slice' as categorical
... storing 'slice' as categorical
... storing 'slice' as categorical
... storing 'slice' as categorical
... storing 'slice' as categorical
... storing 'slice' as categorical


### 4. visualize raw coordinate images

In [7]:
import os
import anndata as ad
import stDrosophila as sd
folder = "/media/yao/Elements SE/BGI_Paper/E16_18_d/E16_18_d_bin1/raw_adata"
files = [os.path.join(root, filename) for root, dirs, files in os.walk(folder) for filename in files]
files.sort()
adata_list = [ad.read(file) for file in files]
sd.pl.spatial_plot(adata=adata_list, cluster_col=None, save="/media/yao/Elements SE/BGI_Paper/E16_18_d/E16_18_d_bin1/E16_18_d_bin1_raw_coords.png",
                   slice_col="slice", spot_size=1)

<Figure size 432x288 with 0 Axes>

### 5. registration/alignment (It is recommended to run the following code in the .py file)

In [None]:
# multi-slices Example (slice_alignment_bigBin):
import os
import anndata as ad
import stDrosophila as sd
import torch
## Enter raw anndata data(slices)
folder = "/media/yao/Elements SE/BGI_Paper/L3_new/L3_a/raw_adata/L3_a_bin20"
files = [os.path.join(root, filename) for root, dirs1, files in os.walk(folder) for filename in files]
files.sort()
slices = [ad.read(file) for file in files]

## Enter raw anndata data(slices_big)
folder_big = "/media/yao/Elements SE/BGI_Paper/L3_new/L3_a/raw_adata/L3_a_bin100"
files_big = [os.path.join(root, filename) for root, dirs2, files in os.walk(folder_big) for filename in files]
files_big.sort()
slices_big = [ad.read(file) for file in files_big]

## Alignment
align_slices, align_slices_big = sd.tl.slice_alignment_bigBin(slices=slices, slices_big=slices_big, alpha=0.1, numItermax=200,
                                                              numItermaxEmd=1000000, device=torch.device("cuda:0"), verbose=True)

## Save the slices after alignment
opath = "/media/yao/Elements SE/BGI_Paper/L3_new/L3_a/align_adata/L3_a_bin20"
if not os.path.exists(opath):
    os.mkdir(opath)
for slice in align_slices:
    subSave = os.path.join(opath, f"{slice.obs['slice'][0]}.h5ad")
    slice.write_h5ad(subSave)

## Save the slices_big after alignment
opath_big = "/media/yao/Elements SE/BGI_Paper/L3_new/L3_a/align_adata/L3_a_bin100"
if not os.path.exists(opath_big):
    os.mkdir(opath_big)
for slice in align_slices_big:
    subSave_big = os.path.join(opath_big, f"{slice.obs['slice'][0]}.h5ad")
    slice.write_h5ad(subSave_big)


Whether CUDA is currently available: True
Device: NVIDIA GeForce RTX 3060
GPU total memory: 11 GB


 Alignment :   0%|          | 0/57 [00:00<?, ?it/s]

### 6. visualize the coordinate image after registration/alignment

In [2]:
import os
import anndata as ad
import stDrosophila as sd
folder = "/media/yao/Elements SE/BGI_Paper/L3_new/L3_a/align_adata/L3_a_bin20"
files = [os.path.join(root, filename) for root, dirs, files in os.walk(folder) for filename in files]
files.sort()
adata_list = [ad.read(file) for file in files]
sd.pl.spatial_plot(adata=adata_list, cluster_col=None, save="/media/yao/Elements SE/BGI_Paper/L3_new/L3_a/align_adata/L3_a_bin20_align_coords.png",
                   slice_col="slice", spot_size=1)

<Figure size 432x288 with 0 Axes>