In [1]:
import os 
import sys
from os import listdir
from os.path import isfile, join
import numpy as np
import matplotlib.pyplot as plt
from tqdm.notebook import trange, tqdm
import skimage
from skimage import io, color, exposure
from skimage.util import img_as_ubyte
from skimage.transform import resize
from sklearn.preprocessing import MinMaxScaler
import matplotlib
from pathlib import Path
import re
import h5py
import cv2 as cv
from scipy import signal
import scipy.ndimage
import seaborn as sns
import pandas as pd
import scanpy as sc
import anndata as ad
import cv2

from matplotlib.pyplot import rc_context
sc.settings.verbosity = 3


In [2]:
p_dir = (Path().cwd().parents[0]).absolute()
data_dir = p_dir / 'data'

In [3]:
%load_ext autoreload
%autoreload 2

module_path = str(p_dir / "src")

if module_path not in sys.path:
    sys.path.append(module_path)

import utils as my_utils

# Read images

In [4]:
import umap
h5_data_dir = p_dir / 'data' / 'h5'

def get_imgs(experiment, name):
    with h5py.File(h5_data_dir / f'{experiment}.hdf5', 'r') as f:
        imgs = f[name][:]
        labels = list(f[name].attrs['labels'])
        fov = f[name].attrs['fov']
    return imgs, labels, fov

def get_imgs_index(imgs, index_list):
    
    imgs_subset = imgs[index_list, :,:]
    img_combined = np.sum(imgs_subset, axis=0)
    return contrast_stretching(img_combined)

def get_mass(file):
    labels = ['total', 'rest']
    with open(file) as infile:
        next(infile)
        for line in infile:
            mass = float(line.split("\t")[1])
            mass = np.round(mass, 1)
            labels.append(mass)
    return labels

def normalize(imgs, labels, dataset):
    data_all = imgs.reshape(-1, imgs.shape[1]*imgs.shape[1]).transpose()
    
    pseudo_count=1
    data_all_norm = (data_all+pseudo_count)/(np.percentile(data_all,50,axis=1,keepdims=True)+pseudo_count)
    data_all_norm = MinMaxScaler().fit_transform(data_all_norm)

    # Transform to annadata type
    adata = ad.AnnData(data_all_norm, dtype=data_all_norm.dtype)
    adata.var_names = np.array(labels).astype(str)
    adata.obs['Dataset'] = dataset
    return adata

def umap_transform(adata):
    sc.tl.pca(adata)
    sc.pp.neighbors(adata)
    sc.tl.umap(adata,n_components=3)
    embedding = adata.obsm['X_umap']
    
    # Transform feature range
    fg_umap_norm = MinMaxScaler().fit_transform(embedding)
    fg_umap_norm[:,0] = MinMaxScaler(feature_range=(0, 100)).fit_transform(fg_umap_norm[:,0][:,None])[:,0]
    fg_umap_norm[:,1] = MinMaxScaler(feature_range=(-128, 127)).fit_transform(fg_umap_norm[:,1][:,None])[:,0]
    fg_umap_norm[:,2] = MinMaxScaler(feature_range=(-128, 127)).fit_transform(fg_umap_norm[:,2][:,None])[:,0]
    
    adata.obsm['X_umap'] = fg_umap_norm

In [5]:
masses = get_mass(data_dir / 'metadata' / 'peak.TXT')

In [6]:
experiment = 'lung'

ts_data_dir = p_dir / 'data' / 'tof-sims' / experiment
images_dir = p_dir / 'figures' / 'surface_plot'

datasets = listdir(ts_data_dir)
datasets = [dataset for dataset in datasets if 'auto' not in dataset]

In [None]:
m_start = 5
adatas = []
for dataset in datasets:
    # Read images and get PO3 channel
    print(dataset)
    imgs, labels, fov = get_imgs(experiment, dataset)
    shape = imgs[0].shape[0]
    fov = int(fov)
    
    # Get pixels from reshaped image
    imgs_stack = imgs[m_start:,:,:]
    # imgs_stack = resize(imgs_stack, (imgs_stack.shape[0], fov, fov), anti_aliasing=True)
    # labels = masses[m_start:]
    labels = labels[m_start:]
    
    adata_subset = normalize(imgs_stack, labels, dataset)
    # Append to adatas
    adatas.append(adata_subset)
    print(adata_subset.X.shape)
    
adata = ad.concat(adatas)

B5_1


In [8]:
adata

AnnData object with n_obs × n_vars = 7864320 × 61
    obs: 'Dataset'

In [None]:
umap_transform(adata)

computing PCA
    with n_comps=50
    finished (0:00:50)
computing neighbors
    using 'X_pca' with n_pcs = 50


In [None]:
for dataset in datasets:
    print(dataset)
    adata_subset = adata[adata.obs.Dataset == dataset, :]
    fov = int(np.sqrt(len(adata_subset)))
    
    X_umap = adata_subset.obsm['X_umap']
    
    data_rgb_img = X_umap.reshape(fov, fov, 3)
    data_rgb_img = color.lab2rgb(data_rgb_img)
    
    sns.set(style='white')
    sns.set_color_codes('deep')
    plt.figure(figsize=(10,10))
    plt.imshow(data_rgb_img)
    plt.xticks([])
    plt.yticks([])


In [20]:
# for dataset in datasets[-1:]:
#     print(dataset)
#     adata_subset = adata[adata.obs.Dataset == dataset, :]
#     fov = int(np.sqrt(len(adata_subset)))
    
#     X_umap = adata_subset.obsm['X_umap']
#     X_umap = MinMaxScaler().fit_transform(X_umap)
    
#     colors = np.clip(X_umap, 0, 1)

#     # Plot rgb umap
#     fig = plt.figure()
#     ax = fig.add_subplot(111, projection='3d')
#     ax.scatter(xs=X_umap[:,0], ys=X_umap[:,1], zs=X_umap[:,2], s=10,  c=colors, lw=0)
#     ax.set_xlabel('R')
#     ax.set_ylabel('G')
#     ax.set_zlabel('B')
#     plt.show()
    
#     x = np.arange(0, fov)
#     y = np.arange(0,  fov)
#     xv, yv = np.meshgrid(x, y)

#     X = xv.reshape(1, -1)[0]
#     Y = yv.reshape(1, -1)[0]

#     assert len(X) == len(X_umap)

#     img = np.zeros((fov, fov, 3), dtype=np.uint8)

#     image_LAB = cv2.cvtColor(img, cv2.COLOR_RGB2LAB)
#     X_umap_lab = image_LAB.reshape(-1, 3)
    
#     # PLot lab umap
#     colors = MinMaxScaler().fit_transform(X_umap_lab)
#     fig = plt.figure()
#     ax = fig.add_subplot(111, projection='3d')
#     ax.scatter(xs=X_umap[:,0], ys=X_umap[:,1], zs=X_umap[:,2], s=10,  c=np.clip(colors,a_min=0, a_max=1), lw=0)
#     ax.set_xlabel('L')
#     ax.set_ylabel('A')
#     ax.set_zlabel('B')
#     plt.show()
    
#     # plot rgb image
#     fig, ax = plt.subplots(figsize=(7,7))
#     ax.imshow(img)
#     ax.axis('off')
    
#     # Plot lab image
#     fig, ax = plt.subplots(figsize=(7,7))
#     ax.imshow(image_LAB)
#     ax.axis('off')
#     break