In [1]:
%load_ext autoreload
%autoreload 2

In [5]:
import matplotlib.pyplot as plt
import json
import os
import cv2
import time
import numpy as np
from csbdeep.utils import Path, normalize
import pandas as pd
import anndata as ad
from tqdm import tqdm
import pickle
import sys
sys.path.append("/data_nfs/je30bery/ALS_MELC_Data_Analysis/segmentation/")
sys.path.append("/data_nfs/je30bery/ALS_MELC_Data_Analysis/marker_expression/")
sys.path.append("/data/bionets/je30bery/ALS_MELC_Data_Analysis/segmentation/")
sys.path.append("/data/bionets/je30bery/ALS_MELC_Data_Analysis/marker_expression/")
from melc_segmentation import MELC_Segmentation
from initial_analysis import ExpressionAnalyzer
import anndata as ad
import warnings
warnings.filterwarnings("ignore")


data = "melanoma"

f = open("config.json")
config = json.load(f)
data_path = config[data]
seg_results_path = config["segmentation_results"]
os.makedirs(os.path.join(seg_results_path, "anndata_files"), exist_ok=True)
seg = MELC_Segmentation(data_path, membrane_markers=None) 
# membrane_marker: str/None 
# radius: multiple of cell radius
comorbidity_info = False

antibody_gene_symbols = {
    'ADAM10': 'ADAM10',
    'Bcl-2': 'BCL2',
    'CD10': 'MME',
    'CD107a': 'LAMP1',
    'CD13': 'ANPEP',
    'CD138': 'SDC1',
    'CD14': 'CD14',
    'CD1a': 'CD1A',
    'CD2': 'CD2',
    'CD25': 'IL2RA',
    'CD271': 'NGFR',
    'CD3': ['CD3D', 'CD3E', 'CD3G'],
    'CD36': 'CD36',
    'CD4': 'CD4',
    'CD44': 'CD44',
    'CD45': 'PTPRC',
    'CD45RA': 'PTPRC',
    'CD45RO': 'PTPRC',
    'CD5': 'CD5',
    'CD56': 'NCAM1',
    'CD6': 'CD6',
    'CD63': 'CD63',
    'CD66abce': ['CD66A', 'CD66B', 'CD66C', 'CD66E'],
    'CD7': 'CD7',
    'CD71': 'TFRC',
    'CD8': ['CD8A', 'CD8B'],
    'CD9': 'CD9',
    'CD95': 'FAS',
    'Collagen IV': ['COL4A1', 'COL4A2'],
    'Cytokeratin-14': 'KRT14',
    'EBF-P': 'EBF1',
    'EGFR': 'EGFR',
    'EGFR-AF488': 'EGFR',
    'HLA-ABC': ['HLA-A', 'HLA-B', 'HLA-C'],
    'HLA-DR': ['HLA-DRA', 'HLA-DRB1', 'HLA-DRB3', 'HLA-DRB4', 'HLA-DRB5'],
    'KIP1': 'CDKN1B',
    'Ki67': 'MKI67',
    'L302': 'NCR3LG1',
    'MCSP': 'CSPG4',
    'Melan-A': 'MLANA',
    'Nestin-AF488': 'NES',
    'Notch-1': 'NOTCH1',
    'Notch-3': 'NOTCH3',
    'PPARgamma': 'PPARG',
    'PPB': 'TP63',
    'RIM3': 'RIMS3',
    'TAP73': 'TP73',
    'Vimentin': 'VIM',
    'p63': 'TP63',
    'phospho-Connexin': 'GJA1'
}

os.makedirs(os.path.join(seg_results_path, "anndata_files"), exist_ok=True)

segment = "cell"
result_dict = dict()

if comorbidity_info:
    comorbidities = pd.read_csv("/data_slow/je30bery/data/ALS/ALS_comorbidities.txt", delimiter=";")
    comorbidities = comorbidities.set_index("pat_id")

EA = ExpressionAnalyzer(data_path=data_path, segmentation_results_dir_path=seg_results_path, membrane_markers=None, markers_of_interest=list(antibody_gene_symbols.keys()))
EA.run(segment=segment, profile=None)
expression_data = EA.expression_data.sort_index()
expression_data = expression_data.fillna(0)
#expression_data = expression_data.drop_duplicates()

for i, fov in enumerate(tqdm(seg.fields_of_view)):
    #if os.path.exists(os.path.join(seg_results_path, "anndata_files", f"adata_{segment}_{fov}.pickle")):
     #   continue
    
    if "ipynb" in fov:
        continue

    seg.field_of_view = fov
    if os.path.exists(os.path.join(seg_results_path, f"{fov}_nuclei.pickle")):
        
        with open(os.path.join(seg_results_path, f"{fov}_nuclei.pickle"), "rb") as handle:
            where_nuc = pickle.load(handle)
        with open(os.path.join(seg_results_path, f"{fov}_cell.pickle"), "rb") as handle:
            where_cell = pickle.load(handle)
        nuc = np.load(os.path.join(seg_results_path, f"{fov}_nuclei.npy"))
        cell = np.load(os.path.join(seg_results_path, f"{fov}_cells.npy"))
    else:
        nuc, cell, where_nuc, where_cell = seg.run()

    where_dict = where_nuc if segment == "nuclei" else where_cell   
    where_dict = dict(sorted(where_dict.items()))   
            
    group =  np.unique(expression_data.loc[fov]["Group"].astype(str).values)[0]
    pat_id = np.unique(expression_data.loc[fov]["Sample"].astype(str).values)[0]

    exp_fov = expression_data.loc[fov].copy()
    exp_fov = exp_fov.drop(["Sample", "Group"], axis=1)
    
    to_drop = list()
    new_columns = list()
    for antibody in exp_fov.columns:
        col = "-".join(antibody.split("-")[:-1])
        if not col in list(antibody_gene_symbols.keys()):
            to_drop.append(antibody)
        else:
            new_columns.append(col)

    exp_fov = exp_fov.drop(to_drop, axis=1)
    exp_fov.columns = new_columns
    adata = ad.AnnData(exp_fov)
    adata.var = pd.DataFrame(np.array(new_columns), columns=["gene_symbol"])
    
    adata.obsm["cellLabelInImage"] = np.array([int(a) for a in list(exp_fov.index)])

    adata.varm["antibody"] = pd.DataFrame(exp_fov.columns, columns=["antibody"])
    adata.obsm["cellSize"] = np.array([len(where_dict[k][0]) for k in where_dict])      
    adata.obsm["Group"] = np.array([group] * len(adata.obsm["cellSize"]))
            
    adata.uns["patient_id"] = pat_id

    adata.obsm["patient_label"] = np.array([pat_id] * len(adata.obsm["cellSize"]))


    if comorbidity_info:
        """
        for c in comorbidities.columns:
            if "ALS" in sample:
                adata.obsm[str(c)] = np.array([str(comorbidities.loc[sample, c])]* exp_fov.shape[0])
                adata.uns[str(c)] = str(comorbidities.loc[sample, c])
            else:
                adata.obsm[str(c)] = np.array(["unknown"] * exp_fov.shape[0])
                adata.uns[str(c)] = "unknown"
        """
        pass

    adata.obsm["field_of_view"] = np.array([fov] * exp_fov.shape[0]) 
    #adata.uns["field_of_view"] = fov
    

    if "spatial" not in adata.uns:
        adata.uns["spatial"] = {}  # Create the "spatial" key if it doesn't exist

    adata.obsm["control_mean_expression"] = np.array([EA.expression_data[EA.expression_data["Group"] == "Healthy"].iloc[:, :-2].mean(axis=0).values] * exp_fov.shape[0])
    adata.obsm["control_std_expression"] = np.array([EA.expression_data[EA.expression_data["Group"] == "Healthy"].iloc[:, :-2].std(axis=0).values] * exp_fov.shape[0])       
    adata.uns["control_mean_expression"] = EA.expression_data[EA.expression_data["Group"] == "Healthy"].iloc[:, :-2].mean(axis=0).values
    adata.uns["control_std_expression"] = EA.expression_data[EA.expression_data["Group"] == "Healthy"].iloc[:, :-2].std(axis=0).values


    adata.uns["cell_coordinates"] = where_dict
    adata.uns["spatial"]["segmentation"] = nuc if segment == "nuclei" else cell

    result_dict[i] = adata

with open(os.path.join(seg_results_path, "anndata_files", f"adata_{segment}.pickle"), 'wb') as handle:
    pickle.dump(result_dict, handle, protocol=pickle.HIGHEST_PROTOCOL)

Segmenting:   0%|                                                                | 0/4 [00:00<?, ?it/s]

Found model '2D_versatile_fluo' for 'StarDist2D'.


2023-12-06 15:41:30.224457: E tensorflow/compiler/xla/stream_executor/cuda/cuda_driver.cc:268] failed call to cuInit: CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected
2023-12-06 15:41:30.224615: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:168] retrieving CUDA diagnostic information for host: idea-weneg.aibe.uni-erlangen.de
2023-12-06 15:41:30.224627: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:175] hostname: idea-weneg.aibe.uni-erlangen.de
2023-12-06 15:41:30.224768: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:199] libcuda reported version is: 545.23.8
2023-12-06 15:41:30.224798: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:203] kernel reported version is: 545.23.8
2023-12-06 15:41:30.224805: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:309] kernel version seems to match DSO: 545.23.8


Loading network weights from 'weights_best.h5'.
Loading thresholds from 'thresholds.json'.
Using default values: prob_thresh=0.479071, nms_thresh=0.3.



  0%|                                                                         | 0/2812 [00:00<?, ?it/s][A
  0%|                                                                 | 4/2812 [00:00<01:14, 37.51it/s][A
  0%|▏                                                                | 9/2812 [00:00<01:05, 42.77it/s][A
  0%|▎                                                               | 14/2812 [00:00<01:05, 42.76it/s][A
  1%|▍                                                               | 19/2812 [00:00<01:03, 44.07it/s][A
  1%|▌                                                               | 24/2812 [00:00<01:01, 45.16it/s][A
  1%|▋                                                               | 29/2812 [00:00<01:01, 45.10it/s][A
  1%|▊                                                               | 34/2812 [00:00<01:01, 45.23it/s][A
  1%|▉                                                               | 39/2812 [00:00<01:01, 44.78it/s][A
  2%|█                              

DEL CD11a CD11a-PE
DEL CD11c CD11c-PE
DEL CD16 CD16-FITC
DEL CD274 CD274-PE
DEL CD276 CD276-FITC
DEL CD279 CD279-PE
DEL CD29 CD29-FITC
DEL CD38 CD38-PE
DEL CD39 CD39-FITC
DEL CD40 CD40-PE
DEL CD49d CD49d-PE
DEL CD54 CD54-FITC
DEL CD68 CD68-FITC
DEL CD69 CD69-PE
DEL CD81 CD81-FITC
DEL CD90 CD90-FITC
DEL CHD1 CHD1-AF555
DEL E-Cadherin E-Cadherin-FITC
DEL Fibroblast Fibroblast-FITC
DEL Nestin Nestin-AF488
DEL  PBS
DEL PMEL17 PMEL17-PE
DEL  Propidium iodide
DEL S100 S100-FITC
DEL TNFR1 TNFR1-PE
DEL TNFR2 TNFR2-PE


Calculating expression:  25%|███████████                                 | 1/4 [00:13<00:41, 13.81s/it]

DEL CD11a CD11a-PE
DEL CD11c CD11c-PE
DEL CD16 CD16-FITC
DEL CD274 CD274-PE
DEL CD276 CD276-FITC
DEL CD279 CD279-PE
DEL CD29 CD29-FITC
DEL CD38 CD38-PE
DEL CD39 CD39-FITC
DEL CD40 CD40-PE
DEL CD49d CD49d-PE
DEL CD54 CD54-FITC
DEL CD68 CD68-FITC
DEL CD69 CD69-PE
DEL CD81 CD81-FITC
DEL CD90 CD90-FITC
DEL CHD1 CHD1-AF555
DEL E-Cadherin E-Cadherin-FITC
DEL Fibroblast Fibroblast-FITC
DEL Nestin Nestin-AF488
DEL  PBS
DEL PMEL17 PMEL17-PE
DEL  Propidium iodide
DEL S100 S100-FITC
DEL TNFR1 TNFR1-PE
DEL TNFR2 TNFR2-PE


Calculating expression:  50%|██████████████████████                      | 2/4 [00:26<00:26, 13.27s/it]

DEL CD11a CD11a-PE
DEL CD11c CD11c-PE
DEL CD16 CD16-FITC
DEL CD274 CD274-PE
DEL CD276 CD276-FITC
DEL CD279 CD279-PE
DEL CD29 CD29-FITC
DEL CD38 CD38-PE
DEL CD39 CD39-FITC
DEL CD40 CD40-PE
DEL CD49d CD49d-PE
DEL CD54 CD54-FITC
DEL CD68 CD68-FITC
DEL CD69 CD69-PE
DEL CD81 CD81-FITC
DEL CD90 CD90-FITC
DEL CHD1 CHD1-AF555
DEL E-Cadherin E-Cadherin-FITC
DEL Fibroblast Fibroblast-FITC
DEL Nestin Nestin-AF488
DEL  PBS
DEL PMEL17 PMEL17-PE
DEL  Propidium iodide
DEL S100 S100-FITC
DEL TNFR1 TNFR1-PE
DEL TNFR2 TNFR2-PE


Calculating expression:  75%|█████████████████████████████████           | 3/4 [00:39<00:13, 13.23s/it]

DEL CD11a CD11a-PE
DEL CD11c CD11c-PE
DEL CD16 CD16-FITC
DEL CD274 CD274-PE
DEL CD276 CD276-FITC
DEL CD279 CD279-PE
DEL CD29 CD29-FITC
DEL CD38 CD38-PE
DEL CD39 CD39-FITC
DEL CD40 CD40-PE
DEL CD49d CD49d-PE
DEL CD54 CD54-FITC
DEL CD68 CD68-FITC
DEL CD69 CD69-PE
DEL CD81 CD81-FITC
DEL CD90 CD90-FITC
DEL CHD1 CHD1-AF555
DEL E-Cadherin E-Cadherin-FITC
DEL Fibroblast Fibroblast-FITC
DEL Nestin Nestin-AF488
DEL  PBS
DEL PMEL17 PMEL17-PE
DEL  Propidium iodide
DEL S100 S100-FITC
DEL TNFR1 TNFR1-PE
DEL TNFR2 TNFR2-PE


Calculating expression: 100%|████████████████████████████████████████████| 4/4 [00:53<00:00, 13.32s/it]
 25%|█████████████████                                                   | 1/4 [00:00<00:02,  1.25it/s]

del CD11a
del CD11c
del CD16
del CD274
del CD276
del CD279
del CD29
del CD38
del CD39
del CD40
del CD49d
del CD54
del CD68
del CD69
del CD81
del CD90
del CHD1
del E-Cadherin
del Fibroblast
del Nestin
del 
del PMEL17
del S100
del TNFR1
del TNFR2


 50%|██████████████████████████████████                                  | 2/4 [00:01<00:01,  1.33it/s]

del CD11a
del CD11c
del CD16
del CD274
del CD276
del CD279
del CD29
del CD38
del CD39
del CD40
del CD49d
del CD54
del CD68
del CD69
del CD81
del CD90
del CHD1
del E-Cadherin
del Fibroblast
del Nestin
del 
del PMEL17
del S100
del TNFR1
del TNFR2


 75%|███████████████████████████████████████████████████                 | 3/4 [00:02<00:00,  1.32it/s]

del CD11a
del CD11c
del CD16
del CD274
del CD276
del CD279
del CD29
del CD38
del CD39
del CD40
del CD49d
del CD54
del CD68
del CD69
del CD81
del CD90
del CHD1
del E-Cadherin
del Fibroblast
del Nestin
del 
del PMEL17
del S100
del TNFR1
del TNFR2


100%|████████████████████████████████████████████████████████████████████| 4/4 [00:03<00:00,  1.32it/s]

del CD11a
del CD11c
del CD16
del CD274
del CD276
del CD279
del CD29
del CD38
del CD39
del CD40
del CD49d
del CD54
del CD68
del CD69
del CD81
del CD90
del CHD1
del E-Cadherin
del Fibroblast
del Nestin
del 
del PMEL17
del S100
del TNFR1
del TNFR2





In [6]:
result_dict[0].X

array([[0.        , 0.01388889, 0.04166667, ..., 0.        , 0.2020202 ,
        0.23611111],
       [0.        , 0.26468455, 0.01232777, ..., 0.09427121, 0.03698332,
        0.02248006],
       [0.        , 0.76396396, 0.05945946, ..., 0.28288288, 0.15315315,
        0.07027027],
       ...,
       [0.        , 0.00947867, 0.01895735, ..., 0.00236967, 0.04739336,
        0.05924171],
       [0.        , 0.        , 0.0504065 , ..., 0.00325203, 0.15284553,
        0.07642276],
       [0.        , 0.01449275, 0.35362319, ..., 0.        , 0.33913043,
        0.52173913]])