In [1]:
import glob
import os
import sys
from itertools import cycle
from pathlib import Path, PureWindowsPath

import cv2
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import tifffile
import skimage
import pandas as pd
from skimage.filters import threshold_li
from tqdm.notebook import tqdm, trange
from skimage import exposure, io
from joblib import Parallel, delayed
import napari
import anndata as ad
import scanorama
import scanpy as sc

In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
path = Path.cwd().parent / 'data' / 'meta' / 'pixels.csv'
df_pixels = pd.read_csv(path)

In [4]:
df_pixels = df_pixels.loc[:,~df_pixels.isna().any(0)]

In [5]:
df_pixels

Unnamed: 0,gapdh,actb,il8,il6,ccl11,col1a1,nanog,sox9,eef2,spp1,runx1,pdl1,ConA,PhA,WGA,Cell Type,FOV,X,Y,Id
0,0.000000,0.000000,0.000000,0.434159,0.000000,0.000000,0.000000,0.000000,0.002801,0.000000,0.001002,0.000000,0.014214,0.032741,0.007044,bm,78,153,664,1
1,0.007665,0.000000,0.259067,0.310300,0.073579,0.005240,0.000000,0.000000,0.000000,0.000000,0.000401,0.006426,0.000000,0.026044,0.014232,bm,78,154,661,1
2,0.006676,0.001078,0.413027,0.017601,0.000000,0.017420,0.000000,0.007618,0.005155,0.009117,0.003106,0.000000,0.023384,0.034230,0.022714,bm,78,155,657,1
3,0.022748,0.002561,0.405625,0.089961,0.000000,0.000000,0.000000,0.002918,0.000000,0.000000,0.000000,0.000000,0.000000,0.026044,0.009919,bm,78,156,663,1
4,0.041292,0.000000,0.345670,0.000000,0.000000,0.006090,0.000000,0.000000,0.001345,0.000000,0.000000,0.000000,0.004585,0.013394,0.010782,bm,78,157,662,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3787119,0.039299,0.006544,0.097046,0.040503,0.000000,0.010634,0.315862,0.005411,0.000000,0.041063,0.004695,0.000000,0.000000,0.077322,0.000000,uc,35,1578,1299,1
3787120,0.050249,0.000000,0.314346,0.000000,0.051908,0.004908,0.499310,0.038859,0.012443,0.013688,0.010531,0.005310,0.036207,0.093154,0.003444,uc,35,1580,1299,1
3787121,0.026159,0.002045,0.124473,0.000000,0.025482,0.006135,0.336552,0.030497,0.000000,0.004026,0.003488,0.000000,0.044828,0.093653,0.000000,uc,35,1581,1294,1
3787122,0.014600,0.013906,0.075949,0.000000,0.070784,0.006544,0.322759,0.023119,0.000000,0.012077,0.008518,0.000000,0.041379,0.089237,0.000000,uc,35,1581,1297,1


# Get intensity

In [6]:
df_intensity = df_pixels.iloc[:,:-5]

In [7]:
# Create annData from dataframe
adata = sc.AnnData(df_intensity.values)
adata.var_names = df_intensity.columns.tolist()  # add variable name

# Add obs information
adata.obs["Cell Type"] = df_pixels['Cell Type'].tolist()
adata.obs["Cell"] = df_pixels.Id.tolist()
adata.obs["FOV"] = df_pixels.FOV.tolist()
adata.obs["X"] = df_pixels.X.tolist()
adata.obs["Y"] = df_pixels.Y.tolist()

  adata = sc.AnnData(df_intensity.values)


In [8]:
adatas  = []
for batch in adata.obs["FOV"].unique():
    adata_subset = adata[
        adata.obs["FOV"] == batch
    ]
    sc.pp.scale(adata_subset, max_value=4)
    adatas.append(adata_subset)


scanorama.integrate_scanpy(adatas, sketch=True, dimred=15)

  view_to_actual(adata)


Found 15 genes among all datasets
[[0.     0.9605 0.7112 0.6387 0.5801 0.6331]
 [0.     0.     0.8399 0.754  0.6379 0.6442]
 [0.     0.     0.     0.9404 0.7987 0.7399]
 [0.     0.     0.     0.     0.8344 0.7796]
 [0.     0.     0.     0.     0.     0.869 ]
 [0.     0.     0.     0.     0.     0.    ]]
Processing datasets (0, 1)
Processing datasets (2, 3)
Processing datasets (4, 5)
Processing datasets (1, 2)
Processing datasets (3, 4)
Processing datasets (2, 4)
Processing datasets (3, 5)
Processing datasets (1, 3)
Processing datasets (2, 5)
Processing datasets (0, 2)
Processing datasets (1, 5)
Processing datasets (0, 3)
Processing datasets (1, 4)
Processing datasets (0, 5)
Processing datasets (0, 4)


In [11]:
adatas_cor = ad.concat(adatas)

In [12]:
path = Path.cwd().parent / 'data' / 'meta' / 'pixels.h5ad'
adatas_cor.write(path)