# Visualization for GSE185862 Mouse scRNA-seq Data

This notebook creates UMAP visualizations for the processed mouse hippocampus scRNA-seq data, similar to the human integration plots.

**Plots created:**
1. UMAP colored by CellType1 (without labels)
2. UMAP colored by group (if available, without labels)
3. FeaturePlot with marker genes (without gene names)

In [1]:
# Import required libraries
import scanpy as sc
import anndata as ad
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
import warnings
warnings.filterwarnings('ignore')

# Set scanpy settings
sc.settings.verbosity = 3
sc.settings.set_figure_params(dpi=80, facecolor='white')

print("Libraries imported successfully!")
print(f"scanpy version: {sc.__version__}")

Libraries imported successfully!
scanpy version: 1.11.5


In [2]:
# File paths
input_h5ad = "/home/joonho345/1_Epilepsy_RNA/scRNA_Animal/02.GSE185862_10X_Deconv/GSE185862_HIP_processed.h5ad"
output_dir = "/home/joonho345/1_Epilepsy_RNA/scRNA_Animal/02.GSE185862_10X_Deconv"

# Create output directory for plots
plot_dir = os.path.join(output_dir, "04_01.visualization")
os.makedirs(plot_dir, exist_ok=True)

print(f"Input AnnData file: {input_h5ad}")
print(f"Output plot directory: {plot_dir}")

Input AnnData file: /home/joonho345/1_Epilepsy_RNA/scRNA_Animal/02.GSE185862_10X_Deconv/GSE185862_HIP_processed.h5ad
Output plot directory: /home/joonho345/1_Epilepsy_RNA/scRNA_Animal/02.GSE185862_10X_Deconv/04_01.visualization


In [3]:
# Load AnnData object
print("Loading AnnData object...")
adata = ad.read_h5ad(input_h5ad)
print(f"  ✓ AnnData object loaded")
print(f"    Shape: {adata.shape} (cells × genes)")
print(f"    Cells: {adata.n_obs:,}")
print(f"    Genes: {adata.n_vars:,}")

# Check available annotations
print(f"\n  Available obs columns: {list(adata.obs.columns)}")
if 'CellType1' in adata.obs.columns:
    print(f"\n  CellType1 distribution:")
    print(adata.obs['CellType1'].value_counts())

Loading AnnData object...
  ✓ AnnData object loaded
    Shape: (82535, 31053) (cells × genes)
    Cells: 82,535
    Genes: 31,053

  Available obs columns: ['donor_sex_id', 'donor_sex_label', 'donor_sex_color', 'region_id', 'region_label', 'region_color', 'platform_label', 'cluster_order', 'cluster_label', 'cluster_color', 'subclass_order', 'subclass_label', 'subclass_color', 'neighborhood_id', 'neighborhood_label', 'neighborhood_color', 'class_order', 'class_label', 'class_color', 'exp_component_name', 'external_donor_name_label', 'full_genotype_label', 'facs_population_plan_label', 'injection_roi_label', 'injection_materials_label', 'injection_method_label', 'injection_type_label', 'full_genotype_id', 'full_genotype_color', 'external_donor_name_id', 'external_donor_name_color', 'facs_population_plan_id', 'facs_population_plan_color', 'injection_materials_id', 'injection_materials_color', 'injection_method_id', 'injection_method_color', 'injection_roi_id', 'injection_roi_color', 'inje

In [4]:
# Compute UMAP if not already present
print("Checking for UMAP coordinates...")

if 'X_umap' in adata.obsm_keys():
    print("  ✓ UMAP coordinates already present in adata.obsm['X_umap']")
elif 'umap' in adata.obsm_keys():
    print("  ✓ UMAP coordinates found in adata.obsm['umap']")
    adata.obsm['X_umap'] = adata.obsm['umap']
else:
    print("  Computing UMAP...")
    # First compute PCA if needed
    if 'X_pca' not in adata.obsm_keys():
        print("    Computing PCA first...")
        sc.tl.pca(adata, svd_solver='arpack', n_comps=50)
    
    # Compute neighbors and UMAP
    sc.pp.neighbors(adata, n_neighbors=15, n_pcs=50)
    sc.tl.umap(adata, min_dist=0.5, spread=1.0)
    print("  ✓ UMAP computed")

# Verify UMAP coordinates
if 'X_umap' in adata.obsm_keys():
    umap_coords = adata.obsm['X_umap']
    print(f"\n  UMAP shape: {umap_coords.shape}")
else:
    print("  ⚠ Warning: UMAP coordinates not found!")

Checking for UMAP coordinates...
  Computing UMAP...
    Computing PCA first...
computing PCA
    with n_comps=50
    finished (0:01:17)
computing neighbors
    using 'X_pca' with n_pcs = 50
    finished: added to `.uns['neighbors']`
    `.obsp['distances']`, distances for each pair of neighbors
    `.obsp['connectivities']`, weighted adjacency matrix (0:00:24)
computing UMAP
    finished: added
    'X_umap', UMAP coordinates (adata.obsm)
    'umap', UMAP parameters (adata.uns) (0:00:35)
  ✓ UMAP computed

  UMAP shape: (82535, 2)


In [13]:
# FINAL PLOTS
# Similar to human script: 04.Integ_2_Deconv.R (121-133)

# 1. UMAP colored by CellType1 (without labels)
print("Creating UMAP plot colored by CellType1...")
fig, ax = plt.subplots(figsize=(8, 6))
sc.pl.umap(adata, color='CellType1', ax=ax, show=False, 
           frameon=False, legend_loc='right margin', title='')
ax.set_xlabel('UMAP 1')
ax.set_ylabel('UMAP 2')
plt.tight_layout()
output_path = os.path.join(plot_dir, 'umaprpca_celltype1_final.png')
plt.savefig(output_path, dpi=300, bbox_inches='tight')
print(f"  ✓ Saved: {output_path}")
plt.close()

Creating UMAP plot colored by CellType1...
  ✓ Saved: /home/joonho345/1_Epilepsy_RNA/scRNA_Animal/02.GSE185862_10X_Deconv/04_01.visualization/umaprpca_celltype1_final.png


In [14]:
# 2. UMAP colored by group (if available, without labels)
# Check if 'group' column exists, otherwise use 'donor_sex_label' as alternative
group_col = None
if 'group' in adata.obs.columns:
    group_col = 'group'
elif 'donor_sex_label' in adata.obs.columns:
    group_col = 'donor_sex_label'
    print("  Note: Using 'donor_sex_label' as grouping variable (M/F)")

if group_col:
    print(f"Creating UMAP plot colored by {group_col}...")
    fig, ax = plt.subplots(figsize=(8, 6))
    sc.pl.umap(adata, color=group_col, ax=ax, show=False, 
               frameon=False, legend_loc='right margin', title='')
    ax.set_xlabel('UMAP 1')
    ax.set_ylabel('UMAP 2')
    plt.tight_layout()
    output_path = os.path.join(plot_dir, 'umaprpca_group_final.png')
    plt.savefig(output_path, dpi=300, bbox_inches='tight')
    print(f"  ✓ Saved: {output_path}")
    plt.close()
else:
    print("  ⚠ No suitable grouping column found. Skipping group plot.")
    print("  Available columns that might be used: ", 
          [col for col in adata.obs.columns if any(x in col.lower() for x in ['group', 'condition', 'sample', 'sex', 'donor'])])

  Note: Using 'donor_sex_label' as grouping variable (M/F)
Creating UMAP plot colored by donor_sex_label...
  ✓ Saved: /home/joonho345/1_Epilepsy_RNA/scRNA_Animal/02.GSE185862_10X_Deconv/04_01.visualization/umaprpca_group_final.png


In [15]:
# 3. FeaturePlot with marker genes (without gene names)
# Convert human gene names to mouse gene names
# Human -> Mouse: SLC17A7, GAD1, GFAP, PTPRC, MOBP, PDGFRA, PECAM1
markers_human = ["SLC17A7", "GAD1", "GFAP", "PTPRC", "MOBP", "PDGFRA", "PECAM1"]
markers_mouse = ["Slc17a7", "Gad1", "Gfap", "Ptprc", "Mobp", "Pdgfra", "Pecam1"]

# Check which markers are available in the dataset
available_markers = []
for marker in markers_mouse:
    if marker in adata.var_names:
        available_markers.append(marker)
    else:
        print(f"  ⚠ Warning: {marker} not found in dataset")

print(f"\nCreating FeaturePlot with {len(available_markers)} markers...")
print(f"  Available markers: {available_markers}")

if len(available_markers) > 0:
    # Create FeaturePlot without gene names (similar to R script)
    # Use ncols parameter to arrange in 7 columns
    fig = sc.pl.umap(adata, color=available_markers, ncols=7, 
                     show=False, frameon=False, return_fig=True, 
                     legend_loc='none', use_raw=False)
    
    # Remove titles and add axis labels to each subplot
    for ax in fig.axes:
        ax.set_title('')
        ax.set_xlabel('UMAP 1')
        ax.set_ylabel('UMAP 2')
    
    # Set figure size (width=2100px at 300dpi = 7 inches, height=300px at 300dpi = 1 inch)
    fig.set_size_inches(21, 3)
    plt.tight_layout()
    output_path = os.path.join(plot_dir, 'umaprpca_marker_final.png')
    fig.savefig(output_path, dpi=300, bbox_inches='tight')
    print(f"  ✓ Saved: {output_path}")
    plt.close()
else:
    print("  ⚠ No markers available for plotting")


Creating FeaturePlot with 7 markers...
  Available markers: ['Slc17a7', 'Gad1', 'Gfap', 'Ptprc', 'Mobp', 'Pdgfra', 'Pecam1']
  ✓ Saved: /home/joonho345/1_Epilepsy_RNA/scRNA_Animal/02.GSE185862_10X_Deconv/04_01.visualization/umaprpca_marker_final.png


In [None]:
print("\n" + "="*80)
print("VISUALIZATION COMPLETE!")
print("="*80)
print(f"Plots saved to: {plot_dir}")
print("\nGenerated plots:")
print("  1. umaprpca_celltype1_final.png - UMAP colored by CellType1")
# Check if group plot was created
group_col = None
if 'group' in adata.obs.columns:
    group_col = 'group'
elif 'donor_sex_label' in adata.obs.columns:
    group_col = 'donor_sex_label'
if group_col:
    print(f"  2. umaprpca_group_final.png - UMAP colored by {group_col}")
print("  3. umaprpca_marker_final.png - FeaturePlot with marker genes")
print("="*80)