In [2]:
import sys
import os
from pathlib import Path

# Add the python directory to the path
python_dir = os.path.join(os.path.dirname(os.getcwd()), 'python')
if python_dir not in sys.path:
    sys.path.append(python_dir)

# from nucleus_extractor_manager import (
#     NucleusExtractorManager,
#     NucleusExtractorConfig,
# )

from lineage_tree import read_json_file, classify_node

In [2]:
import importlib
import nucleus_lineage_to_classification
importlib.reload(nucleus_lineage_to_classification)  # Reload to get metadata fixes

# Available datasets
datasets = {
    "230212_stack6": "/mnt/ceph/users/lbrown/MouseData/Rebecca/230212_stack6/",
    "220321_stack11": "/mnt/ceph/users/lbrown/MouseData/Eszter1",
    "221016_FUCCI_Nanog_stack_3": "/mnt/ceph/users/lbrown/Labels3DMouse/Abhishek/RebeccaData/221016_FUCCI_Nanog_stack_3/",
}

# Configuration
config = NucleusExtractorConfig()
config.time_window = 1

In [3]:
forest = read_json_file(Path(datasets["230212_stack6"]) / "LineageGraph.json")

In [None]:
# Test the METADATA-FIXED immediate-save nucleus extraction pipeline

# Extract nuclei using the metadata-fixed immediate-save approach
results = nucleus_lineage_to_classification.nucleus_extractor(
    forest=forest,
    timeframe=1,  # Extract t-1, t, t+1 (3-frame time series)
    base_dir=datasets["230212_stack6"],
    output_dir=str(Path("/mnt/home/dchhantyal/3d-cnn-classification/data/nuclei_state_dataset") / "v2"),
    max_samples=None  # Limit to 200 samples per classification for quick testing
)

print(f"\n🎯 EXTRACTION RESULTS SUMMARY:")
print("✅ Each nucleus should now be saved with correct metadata!")
print("🔧 Missing frames calculation should now be accurate.")

🚀 Testing METADATA-FIXED immediate-save sliding window nucleus extraction
🔧 Missing frames calculation issue has been resolved!
🚀 Starting nucleus extraction with timestamp-based processing...
Base directory: /mnt/ceph/users/lbrown/MouseData/Rebecca/230212_stack6/
Output directory: /mnt/home/dchhantyal/3d-cnn-classification/data/nuclei_state_dataset/v2
Timeframe: ±1 frames
Max samples per classification: unlimited
📁 Checking available frames in dataset...


📅 EXTRACTION PLAN:
   Total timestamps: 217
   Available frames: 216 (range: 24-239)
   Valid timestamps for extraction: 214
   First valid timestamp: 25
   First five timestamps: [25, 26, 27, 28, 29]
   Timeframe: ±1 frames
   Final frame: 240

📊 CLASSIFICATION DISTRIBUTION:
   • STABLE: 10607 candidates
   • NEW_DAUGHTER: 200 candidates
   • MITOTIC: 100 candidates
   • DEATH: 22 candidates

🔄 STARTING EXTRACTIONS (10929 total nuclei across 214 timestamps)...

🕒 PROCESSING TIMESTAMP 25 (1/214)
   Nuclei to extract: 14
📥 Loading initial window: frames [24, 25, 26]
  ✅ Loaded frame 24
  ✅ Loaded frame 24
  ✅ Loaded frame 25
  ✅ Loaded frame 25
  ✅ Loaded frame 26
  ✅ Loaded frame 26
   📊 Found 15 total nuclei in label volume: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]...
   🎯 Processing 14 valid nuclei with concurrent extraction...
      [Thread 23454896195328] Processing nucleus 10 (STABLE)
      🔍 Extracting nucleus 10 from 3 frames: [24, 25, 26]
      [Thread 23453903591168] Processing nucleus

In [3]:
forest = read_json_file(Path(datasets["220321_stack11"]) / "LineageGraph.json")

In [None]:
# Extract nuclei using the metadata-fixed immediate-save approach
results = nucleus_lineage_to_classification.nucleus_extractor(
    forest=forest,
    timeframe=1,  # Extract t-1, t, t+1 (3-frame time series)
    base_dir=datasets["220321_stack11"],
    output_dir=str(
        Path("/mnt/home/dchhantyal/3d-cnn-classification/data/nuclei_state_dataset")
        / "v2"
    ),
)

print(f"\n🎯 EXTRACTION RESULTS SUMMARY:")

🚀 Starting nucleus extraction with timestamp-based processing...
Base directory: /mnt/ceph/users/lbrown/MouseData/Eszter1
Output directory: /mnt/home/dchhantyal/3d-cnn-classification/data/nuclei_state_dataset/v2
Timeframe: ±1 frames
Max samples per classification: unlimited
📁 Checking available frames in dataset...
📅 EXTRACTION PLAN:
   Total timestamps: 188
   Available frames: 118 (range: 1-118)
   Valid timestamps for extraction: 116
   First valid timestamp: 2
   First five timestamps: [2, 3, 4, 5, 6]
   Timeframe: ±1 frames
   Final frame: 188

📊 CLASSIFICATION DISTRIBUTION:
   • NEW_DAUGHTER: 82 candidates
   • STABLE: 2992 candidates
   • MITOTIC: 40 candidates

🔄 STARTING EXTRACTIONS (3114 total nuclei across 116 timestamps)...

🕒 PROCESSING TIMESTAMP 2 (1/116)
   Nuclei to extract: 14
📥 Loading initial window: frames [1, 2, 3]
  ✅ Loaded frame 1
  ✅ Loaded frame 2
  ✅ Loaded frame 3
   📊 Found 0 total nuclei in label volume: []
      ❌ Nucleus 9 not found in timestamp 2
      

In [None]:
forest = read_json_file(
    Path(datasets["221016_FUCCI_Nanog_stack_3"]) / "LineageGraph.json"
)
# Extract nuclei using the metadata-fixed immediate-save approach
results = nucleus_lineage_to_classification.nucleus_extractor(
    forest=forest,
    timeframe=1,  # Extract t-1, t, t+1 (3-frame time series)
    base_dir=datasets["221016_FUCCI_Nanog_stack_3"],
    output_dir=str(
        Path("/mnt/home/dchhantyal/3d-cnn-classification/data/nuclei_state_dataset")
        / "v2"
    ),
)

print(f"\n🎯 EXTRACTION RESULTS SUMMARY:")

      💾 Saved to: /mnt/home/dchhantyal/3d-cnn-classification/data/nuclei_state_dataset/v2/stable/Eszter1_frame_007_nucleus_015_count_3
      [Thread 23454454892288] Processing nucleus 12 (STABLE)
      🔍 Extracting nucleus 12 from 3 frames: [6, 7, 8]
      💾 Saved to: /mnt/home/dchhantyal/3d-cnn-classification/data/nuclei_state_dataset/v2/stable/Eszter1_frame_007_nucleus_010_count_4
      [Thread 23454452791040] Processing nucleus 14 (STABLE)
      🔍 Extracting nucleus 14 from 3 frames: [6, 7, 8]
      💾 Saved to: /mnt/home/dchhantyal/3d-cnn-classification/data/nuclei_state_dataset/v2/stable/Eszter1_frame_007_nucleus_001_count_5
      [Thread 23454446487296] Processing nucleus 5 (STABLE)
      🔍 Extracting nucleus 5 from 3 frames: [6, 7, 8]
      💾 Saved to: /mnt/home/dchhantyal/3d-cnn-classification/data/nuclei_state_dataset/v2/stable/Eszter1_frame_007_nucleus_013_count_6
      [Thread 23454450689792] Processing nucleus 11 (STABLE)
      🔍 Extracting nucleus 11 from 3 frames: [6, 7, 8

In [3]:
forest = read_json_file(
    "/mnt/ceph/users/lbrown/MouseData/Rebecca/230212_stack6/LineageGraph.json"
)

In [4]:
from collections import defaultdict, deque


nodes_by_timestamp = defaultdict(list)
for node in forest.id_to_node.values():
    nodes_by_timestamp[node.timestamp_ordinal].append(node)

In [5]:
forest.find_tracks_and_lineages()

In [7]:
for node in sorted(nodes_by_timestamp[66], key=lambda x: x.node_id):
    classification = classify_node(
        node, 240, forest, 4
    )
    print(f"Node {node.node_id} classified as {classification}")

Node 066_002 classified as stable
Node 066_003 classified as unknown
Node 066_004 classified as stable
Node 066_005 classified as unknown
Node 066_006 classified as stable
Node 066_007 classified as unknown
Node 066_008 classified as unknown
Node 066_009 classified as stable
Node 066_010 classified as unknown
Node 066_011 classified as mitotic
Node 066_012 classified as unknown
Node 066_013 classified as mitotic
Node 066_014 classified as unknown
Node 066_015 classified as mitotic
Node 066_016 classified as new_daughter
Node 066_017 classified as new_daughter
Node 066_018 classified as new_daughter
Node 066_019 classified as new_daughter
Node 066_020 classified as mitotic
