In [None]:
import pandas as pd                     # Data manipulation and analysis (DataFrames)
import matplotlib.pyplot as plt         # Plotting and figure generation
import seaborn as sns                   # Statistical visualization with clean styles
import scanpy as sc                     # Single-cell / spatial omics analysis toolkit
import numpy as np                      # Numerical computing and arrays
from sklearn.decomposition import PCA   # Dimensionality reduction (principal components)
from sklearn.neighbors import NearestNeighbors  # k-NN search / graph construction
import igraph as ig                     # Graph data structures and algorithms
import leidenalg as la                  # Leiden community detection (graph clustering)
from umap.umap_ import fuzzy_simplicial_set  # UMAP fuzzy graph construction (advanced)
import umap                             # UMAP embeddings for DR/visualization
import os                               # OS utilities (paths, files)
import math                             # Math functions
import warnings                         # Control/suppress warnings
warnings.filterwarnings('ignore')       # Hide non-critical warnings for cleaner logs
import gzip                             # Read/write .gz compressed files
import json                             # JSON I/O (configs, metadata)
import pickle                           # Serialize/deserialize Python objects

# import tensorflow as tf               # (Optional) Deep learning backend (disabled)
import sys                              # Access Python path, argv, etc.
sys.path.append('/home/shamini/')       # Add custom code directory to Python path

# --- Duplicates below (already imported above) ---
from sklearn.decomposition import PCA   # (Duplicate) PCA already imported
from sklearn.neighbors import NearestNeighbors  # (Duplicate) already imported
import igraph as ig                     # (Duplicate) already imported
import leidenalg as la                  # (Duplicate) already imported
# from umap.umap_ import fuzzy_simplicial_set  # (Duplicate) already imported
import umap                             # (Duplicate) already imported

from sklearn.model_selection import train_test_split  # Split data into train/test sets

In [None]:
colors_palette = [
    '#ebac23', #\n",
    '#b80058', #lipstick 0,140,249 \n",
    '#008cf9', #azure 0,110,0 \n",
    '#006e00', #green 0,187,173 \n",
    '#00bbad', #caribbean 209,99,230 \n",
    '#d163e6', #lavender 178,69,2 \n",
    '#b24502', #brown 255,146,135 \n",
    '#ff9287', #coral 89,84,214 \n",
    '#5954d6', #indigo 0,198,248 \n",
    '#00c6f8', #turquoise 135,133,0 \n",
    '#878500', #olive 0,167,108 \n",
    '#00a76c', #jade 189,189,189 \n",
    '#274d52', #plantation 199,162,166 \n",
    '#c7a2a6', #eunry 129,139,112 \n",
    '#818b70', #battleship 96,78,60 \n",
    '#604e3c', #kabul 140,159,183 \n",
    '#8c9fb7', #balihai 121,104,128 \n",
    '#796880', #rum,\n",
    '#56641a', #fernfrond 192,175,251 \n",
    '#c0affb', #perfume 230,161,118 \n",
    '#e6a176', #apricot 0,103,138 \n",
    '#00678a', #orient 152,68,100 \n",
    '#984464', #vinrouge 94,204,171 \n",
    '#5eccab', #downy\n",
    '#bdbdbd'] #gray\n"

In [None]:
### VM - Donnelly
working_dir = '/home/mystique27m/ext_gpu_hd/hackathon/'
# Main working directory on the Donnelly VM

main_out = working_dir+'out/'
# Base output directory where all results will be stored

### destination output directories for figures and objects
src_obj_dir = main_out+'script01c_output_objects/'
# Source directory containing objects generated from script01c

#out_obj_destdir_control = main_out+'script01c_output_objects_control/'
# (Optional, currently commented) Directory for control probe objects from script01c

dst_obj_dir = main_out+'script01d_output_objects/'
# Destination directory for objects generated from script01d

os.makedirs(dst_obj_dir, exist_ok=True)
# Create the destination directory if it does not already exist

In [None]:
os.listdir(src_obj_dir)
# List all files and folders inside the source object directory (script01c outputs)

In [None]:
adata = sc.read_h5ad(src_obj_dir+ os.listdir(src_obj_dir)[1])
# Load the second .h5ad file from the source object directory into an AnnData object

adata.obs
# Observation (obs) DataFrame of the AnnData object:
# - Rows = cells
# - Columns = cell-level metadata (e.g., sample_id, n_counts, annotations, clusters, etc.)


In [None]:
sns.scatterplot(x='x', y='y', data=adata.obs, hue='annotation_level_3', palette=colors_palette, s=1)
# Plot spatial coordinates of cells:
# - x, y = cell centroid positions from adata.obs
# - hue = color cells by 'annotation_level_3' (fine cell type labels)
# - palette = predefined color scheme
# - s=1 sets point size very small (good for large datasets)

In [None]:
adata.obs.to_csv(os.path.join(dst_obj_dir, 'gbm_obs_with_annotations_level_3.csv'))
# Save the AnnData observation table (obs) as a CSV file
# - Includes all cell-level metadata (e.g., coordinates, counts, annotations)
# - Output file: 'gbm_obs_with_annotations_level_3.csv' in the destination directory