In [48]:
import os
import re
import sys
from pathlib import Path

import h5py
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scanpy as sc
import skimage
from skimage import io
from sklearn import preprocessing
from tqdm.notebook import tqdm, trange
import anndata as ad
import cv2
import scanorama
from sklearn.model_selection import train_test_split
import seaborn as sns

In [49]:
# Import spatial omics library
import athena as ath
from spatialOmics import SpatialOmics

# import default graph builder parameters
from athena.graph_builder.constants import GRAPH_BUILDER_DEFAULT_PARAMS

In [50]:
d_dir = (Path().cwd().parents[0].parents[0]).absolute()
data_dir = d_dir / "09_datasets"

p_dir = (Path().cwd().parents[0]).absolute()

In [51]:
%load_ext autoreload
%autoreload 2

module_path = str(p_dir / "src")

if module_path not in sys.path:
    sys.path.append(module_path)


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [52]:
import graph
import torch
import torch_geometric.utils
import networkx as nx
import lightning.pytorch as pl
import torch.utils.data as data

spatial_omics_folder = (Path().cwd().parents[0]).absolute() / 'data' / 'spatial_omics_graph'
process_path = (Path().cwd().parents[0]).absolute() / 'data' / 'torch_graph_data'

# Create data loader

In [61]:
from torch_geometric.loader import DataLoader
seed = torch.Generator().manual_seed(42)

name = 'NIH_roi'

# Crate dataset
dataset = graph.GraphDataset(process_path / name, process_path / name / 'info.csv', 7, 3)

train_set, val_set, test_set = graph.train_test_val_split(dataset)

# Create Dataloader
train_loader = DataLoader(train_set, batch_size=32, shuffle=True)
val_loader = DataLoader(val_set, batch_size=32, shuffle=True)
test_loader = DataLoader(test_set, batch_size=32, shuffle=False)


In [62]:
print(f'Dataset: {dataset}:')
print('======================')
print(f'Number of graphs: {len(dataset)}')
print(f'Number of features: {dataset.num_features}')
print(f'Number of classes: {dataset.num_classes}')

Dataset: GraphDataset(442):
Number of graphs: 442
Number of features: 7
Number of classes: 3


In [63]:
print(f'Train set: {len(train_set)}, val set: {len(test_set)}, val set: {len(val_set)}')

Train set: 213, val set: 176, val set: 53


In [64]:
for step, data in enumerate(test_loader):
    print(f'Step {step + 1}:')
    print('=======')
    print(f'Number of graphs in the current batch: {data.num_graphs}')
    print(data)
    print()
    data.label
    break

Step 1:
Number of graphs in the current batch: 32
DataBatch(edge_index=[2, 1541148], num_nodes=223612, x=[223612, 7], pos=[223612, 2], node_types=[223612], label=[32], train_mask=[223612], test_mask=[223612], batch=[223612], ptr=[33])



In [65]:
data[0]

Data(edge_index=[2, 20194], x=[2896, 7], pos=[2896, 2], node_types=[2896], label=[1], train_mask=[2896], test_mask=[2896], num_nodes=2896)

# Graph learning

In [57]:
from lightning.pytorch.accelerators import find_usable_cuda_devices
import wandb

In [58]:
AVAIL_GPUS = [0]
BATCH_SIZE = 64 if AVAIL_GPUS else 32
# Path to the folder where the pretrained models are saved
CHECKPOINT_PATH = (Path().cwd().parents[0]).absolute() / 'data' / "saved_models" / f"GNNs_{name}"
CHECKPOINT_PATH.mkdir(parents=True, exist_ok=True)
NUM_WORKERS = int(os.cpu_count() / 2)

# Setting the seed
pl.seed_everything(42)

INFO: Global seed set to 42
INFO:lightning.fabric.utilities.seed:Global seed set to 42


42

In [59]:
models = ['MLP', 'GCN', 'GraphConv', 'GAT', 'GINConv', 'SAGEConv']
for model_name in models:
    run = wandb.init(project='snowflake_032423_NIH', name=model_name)
    model, result, trainer = graph.train_node_classifier(model_name, train_set, val_set, test_set, 
                                                             dataset, CHECKPOINT_PATH, AVAIL_GPUS, 
                                                             hidden_channels=16, num_layers=3)
    run.finish()

VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.016666666666666666, max=1.0…

INFO: Global seed set to 42
INFO:lightning.fabric.utilities.seed:Global seed set to 42
  rank_zero_warn(
INFO: GPU available: True (cuda), used: True
INFO:lightning.pytorch.utilities.rank_zero:GPU available: True (cuda), used: True
INFO: TPU available: False, using: 0 TPU cores
INFO:lightning.pytorch.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO: IPU available: False, using: 0 IPUs
INFO:lightning.pytorch.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO: HPU available: False, using: 0 HPUs
INFO:lightning.pytorch.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO: Global seed set to 42
INFO:lightning.fabric.utilities.seed:Global seed set to 42
INFO: LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
INFO:lightning.pytorch.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
INFO: 
  | Name        | Type             | Params
-------------------------------------------------
0 | model       | MLPModel         | 451   
1 | loss_module | CrossEntro

0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
lr-Adam,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
test_acc,▁
train_acc_epoch,▁▄▇▇▇▇██████████████████████████████████
train_acc_step,▁▃▆▆▇▇▇▇█▇▇▇██▇▇██▇▇██▇▇██▇▇██▇▇██▇▇██▇█
train_loss_epoch,█▅▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_loss_step,█▇▄▄▃▃▂▃▂▃▂▃▂▃▂▃▂▃▂▃▂▃▂▃▂▃▃▃▂▂▃▂▂▃▂▂▂▂▂▁
trainer/global_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
val_acc,▁▆▇▇████████████████████████████████████

0,1
epoch,100.0
lr-Adam,0.005
test_acc,0.76284
train_acc_epoch,0.5639
train_acc_step,0.57175
train_loss_epoch,0.8142
train_loss_step,0.77016
trainer/global_step,700.0
val_acc,0.77132


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.016933333330477276, max=1.0…

INFO: Global seed set to 42
INFO:lightning.fabric.utilities.seed:Global seed set to 42
  rank_zero_warn(
INFO: GPU available: True (cuda), used: True
INFO:lightning.pytorch.utilities.rank_zero:GPU available: True (cuda), used: True
INFO: TPU available: False, using: 0 TPU cores
INFO:lightning.pytorch.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO: IPU available: False, using: 0 IPUs
INFO:lightning.pytorch.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO: HPU available: False, using: 0 HPUs
INFO:lightning.pytorch.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO: Global seed set to 42
INFO:lightning.fabric.utilities.seed:Global seed set to 42
INFO: LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
INFO:lightning.pytorch.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
INFO: 
  | Name        | Type             | Params
-------------------------------------------------
0 | model       | GNNModel         | 515   
1 | loss_module | CrossEntro

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
lr-Adam,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
test_acc,▁
train_acc_epoch,▁▆▇█████████████████████████████████████
train_acc_step,▁▄▆▅▇▆▆▆▇▆▆▆▇▇▆▆▇▇▇▆▇▇▇▆▇▇▆▆▇▇▆▆▇▇▇▆▇▇▇█
train_loss_epoch,█▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_loss_step,█▆▃▃▂▃▂▃▂▃▂▃▂▂▂▃▂▂▂▃▂▂▂▃▂▂▂▃▂▂▂▃▂▂▂▃▂▂▂▁
trainer/global_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
val_acc,▁▄▇▇████████████████████████████████████

0,1
epoch,100.0
lr-Adam,0.005
test_acc,0.82967
train_acc_epoch,0.835
train_acc_step,0.87548
train_loss_epoch,0.40488
train_loss_step,0.33318
trainer/global_step,700.0
val_acc,0.82356


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.01691666666495924, max=1.0)…

INFO: Global seed set to 42
INFO:lightning.fabric.utilities.seed:Global seed set to 42
  rank_zero_warn(
INFO: GPU available: True (cuda), used: True
INFO:lightning.pytorch.utilities.rank_zero:GPU available: True (cuda), used: True
INFO: TPU available: False, using: 0 TPU cores
INFO:lightning.pytorch.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO: IPU available: False, using: 0 IPUs
INFO:lightning.pytorch.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO: HPU available: False, using: 0 HPUs
INFO:lightning.pytorch.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO: Global seed set to 42
INFO:lightning.fabric.utilities.seed:Global seed set to 42
INFO: LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
INFO:lightning.pytorch.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
INFO: 
  | Name        | Type             | Params
-------------------------------------------------
0 | model       | GNNModel         | 931   
1 | loss_module | CrossEntro

0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
lr-Adam,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
test_acc,▁
train_acc_epoch,▁▆▇▇▇▇██████████████████████████████████
train_acc_step,▁▁▄▃▅▄▅▄▅▅▅▄▆▅▅▄▆▅▅▅▆▅▅▅▆▅▅▅▆▆▅▅▆▆▅▅▆▆▅█
train_loss_epoch,█▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_loss_step,▇█▄▅▃▄▃▄▃▄▃▄▂▃▃▄▂▃▃▃▂▃▃▃▂▃▃▃▂▃▃▃▂▃▃▃▂▃▃▁
trainer/global_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
val_acc,▁▅▆▆▇▇▇▇▇▇▇▇▇█▇█████████████████████████

0,1
epoch,100.0
lr-Adam,0.005
test_acc,0.83726
train_acc_epoch,0.83914
train_acc_step,0.87369
train_loss_epoch,0.40654
train_loss_step,0.34765
trainer/global_step,700.0
val_acc,0.82648


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.016666666666666666, max=1.0…

INFO: Global seed set to 42
INFO:lightning.fabric.utilities.seed:Global seed set to 42
  rank_zero_warn(
INFO: GPU available: True (cuda), used: True
INFO:lightning.pytorch.utilities.rank_zero:GPU available: True (cuda), used: True
INFO: TPU available: False, using: 0 TPU cores
INFO:lightning.pytorch.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO: IPU available: False, using: 0 IPUs
INFO:lightning.pytorch.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO: HPU available: False, using: 0 HPUs
INFO:lightning.pytorch.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO: Global seed set to 42
INFO:lightning.fabric.utilities.seed:Global seed set to 42
INFO: LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
INFO:lightning.pytorch.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
INFO: 
  | Name        | Type             | Params
-------------------------------------------------
0 | model       | GAT              | 1.7 K 
1 | loss_module | CrossEntro

0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
lr-Adam,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
test_acc,▁
train_acc_epoch,▁▅▇▇▇███████████████████████████████████
train_acc_step,▁▃▅▅▆▆▆▅▆▆▆▆▇▆▆▆▇▆▆▆▇▇▆▆▇▇▆▆▇▇▆▆▇▇▆▆▇▇▆█
train_loss_epoch,█▅▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_loss_step,█▆▃▃▂▃▂▃▂▂▂▃▂▂▂▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁
trainer/global_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
val_acc,▁▄▇▇▇▇██████████████████████████████████

0,1
epoch,100.0
lr-Adam,0.005
test_acc,0.83253
train_acc_epoch,0.8363
train_acc_step,0.87449
train_loss_epoch,0.40773
train_loss_step,0.33306
trainer/global_step,700.0
val_acc,0.82282


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.01693333333435779, max=1.0)…

INFO: Global seed set to 42
INFO:lightning.fabric.utilities.seed:Global seed set to 42
  rank_zero_warn(
INFO: GPU available: True (cuda), used: True
INFO:lightning.pytorch.utilities.rank_zero:GPU available: True (cuda), used: True
INFO: TPU available: False, using: 0 TPU cores
INFO:lightning.pytorch.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO: IPU available: False, using: 0 IPUs
INFO:lightning.pytorch.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO: HPU available: False, using: 0 HPUs
INFO:lightning.pytorch.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO: Global seed set to 42
INFO:lightning.fabric.utilities.seed:Global seed set to 42
INFO: LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
INFO:lightning.pytorch.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
INFO: 
  | Name        | Type             | Params
-------------------------------------------------
0 | model       | GIN              | 1.0 K 
1 | loss_module | CrossEntro

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
lr-Adam,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
test_acc,▁
train_acc_epoch,▁▆▇▇████████████████████████████████████
train_acc_step,▁▄▆▅▆▆▆▆▇▆▆▆▇▇▇▆▇▇▇▆▇▇▇▆▇▇▆▆▇▇▆▆▇▇▆▆▇▇▆█
train_loss_epoch,█▄▃▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_loss_step,█▆▄▄▃▃▂▃▂▃▂▃▂▂▂▃▂▂▂▃▂▂▂▃▂▂▂▃▂▂▂▃▂▂▂▃▂▂▂▁
trainer/global_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
val_acc,▁▆▇▇▇███████████████████████████████████

0,1
epoch,100.0
lr-Adam,0.005
test_acc,0.83856
train_acc_epoch,0.84309
train_acc_step,0.88024
train_loss_epoch,0.38798
train_loss_step,0.31363
trainer/global_step,700.0
val_acc,0.82908


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.016933333330477276, max=1.0…

INFO: Global seed set to 42
INFO:lightning.fabric.utilities.seed:Global seed set to 42
  rank_zero_warn(
INFO: GPU available: True (cuda), used: True
INFO:lightning.pytorch.utilities.rank_zero:GPU available: True (cuda), used: True
INFO: TPU available: False, using: 0 TPU cores
INFO:lightning.pytorch.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO: IPU available: False, using: 0 IPUs
INFO:lightning.pytorch.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO: HPU available: False, using: 0 HPUs
INFO:lightning.pytorch.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO: Global seed set to 42
INFO:lightning.fabric.utilities.seed:Global seed set to 42
INFO: LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
INFO:lightning.pytorch.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
INFO: 
  | Name        | Type             | Params
-------------------------------------------------
0 | model       | GNNModel         | 931   
1 | loss_module | CrossEntro

0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
lr-Adam,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
test_acc,▁
train_acc_epoch,▁▆▇▇████████████████████████████████████
train_acc_step,▁▃▆▅▆▆▆▆▇▆▆▆▇▆▆▆▇▇▆▆▇▇▆▆▇▇▆▆▇▇▆▆▇▇▆▆▇▇▆█
train_loss_epoch,█▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_loss_step,█▆▃▄▃▃▃▃▂▃▂▃▂▃▂▃▂▂▂▃▂▂▂▃▂▂▂▃▂▂▂▂▂▂▂▂▂▂▂▁
trainer/global_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
val_acc,▁▅▇▇▇▇▇▇████████████████████████████████

0,1
epoch,100.0
lr-Adam,0.005
test_acc,0.83227
train_acc_epoch,0.83781
train_acc_step,0.87283
train_loss_epoch,0.39816
train_loss_step,0.33831
trainer/global_step,700.0
val_acc,0.8254


In [60]:
# metrics = pd.read_csv(f"{trainer.logger.log_dir}/metrics.csv")
# del metrics["step"]
# metrics.set_index("epoch", inplace=True)
# display(metrics.dropna(axis=1, how="all").head())
# sns.relplot(data=metrics, kind="line")