In [1]:
import json
import os
import sys
from pathlib import Path

In [9]:
# Dataset configuration
datasets = {
    "230212_stack6": "/mnt/ceph/users/lbrown/MouseData/Rebecca/230212_stack6/",
    "220321_stack11": "/mnt/ceph/users/lbrown/MouseData/Eszter1/",
    "221016_FUCCI_Nanog_stack_3": "/mnt/ceph/users/lbrown/Labels3DMouse/Abhishek/RebeccaData/221016_FUCCI_Nanog_stack_3/",
}

# DATASET_SELECTION = "230212_stack6"  # Change this to switch datasets
DATASET_SELECTION = "221016_FUCCI_Nanog_stack_3"  # Change this to switch datasets


extractor_config = {
    'time_window': 1,
    'frame_offsets': [-1, 0, 1],
    'crop_padding': 2.0
}

In [10]:
import json
from pathlib import Path
import random

# Process selected dataset(s)
def process_dataset(dataset_name, dataset_path):
    """Process a single dataset and return nucleus states"""
    lineage_file = f"{dataset_path}/LineageGraph.json"
    
    with open(lineage_file, 'r') as f:
        data = json.load(f)

    nodes_data = data["Nodes"]
    edges_data = data["Edges"]

    # Create node properties
    nodes = {
        node["Name"]: {"frame": int(node["Name"].split("_")[0])} 
        for node in nodes_data
    }

    # Build parent-child relationships
    parent_of = {edge["EndNodes"][1]: edge["EndNodes"][0] for edge in edges_data}
    children_of = {}
    for edge in edges_data:
        parent_name = edge["EndNodes"][0]
        child_name = edge["EndNodes"][1]
        if parent_name not in children_of:
            children_of[parent_name] = []
        children_of[parent_name].append(child_name)
    
    # Identify nucleus states
    def identify_nucleus_states(nodes, parent_of, children_of):
        """Identify new daughters, mitotic, death, and stable nuclei"""
        nucleus_states = {
            'new_daughters': [],
            'mitotic': [],
            'death': [],
            'stable': []
        }
        
        all_frames = [nodes[name]["frame"] for name in nodes]
        last_frame = max(all_frames)
        
        for nucleus_name in sorted(nodes.keys()):
            frame = nodes[nucleus_name]["frame"]
            nucleus_id = int(nucleus_name.split('_')[1])
            children = children_of.get(nucleus_name, [])
            is_parent = nucleus_name in parent_of
            
            # Check if this is a new daughter (child of mitotic event)
            if is_parent:
                parent_name = parent_of[nucleus_name]
                parent_children = children_of.get(parent_name, [])
                if len(parent_children) >= 2:  # Parent had mitotic division
                    nucleus_states['new_daughters'].append((frame, nucleus_id))
                    continue
            
            # Check if undergoing mitosis (has 2+ children)
            if len(children) >= 2:
                nucleus_states['mitotic'].append((frame, nucleus_id))
                continue
            
            # Check if dies (no children and not at last frame)
            if len(children) == 0 and frame < last_frame:
                nucleus_states['death'].append((frame, nucleus_id))
                continue
            
            # Otherwise stable
            nucleus_states['stable'].append((frame, nucleus_id))
        
        return nucleus_states

    nucleus_states = identify_nucleus_states(nodes, parent_of, children_of)
    return nucleus_states, dataset_path

# Create classification data for all processed datasets
def create_classification_data(nucleus_states):
    """Create classification data from nucleus states"""
    classification_data = []
    
    for state_name, state_list in nucleus_states.items():
        for frame, nucleus_id in state_list:
            entry = {
                'frame': frame,
                'nucleus_id': nucleus_id,
                'new_daughter': 1 if state_name == 'new_daughters' else 0,
                'mitotic': 1 if state_name == 'mitotic' else 0,
                'death': 1 if state_name == 'death' else 0,
                'stable': 1 if state_name == 'stable' else 0
            }
            classification_data.append(entry)
    
    # Sort by frame and nucleus_id
    classification_data.sort(key=lambda x: (x['frame'], x['nucleus_id']))
    return classification_data


KeyError: 'Nodes'

In [None]:

# Process dataset(s)
all_results = {}

if DATASET_SELECTION == "all":
    for dataset_name, dataset_path in datasets.items():
        try:
            nucleus_states, path = process_dataset(dataset_name, dataset_path)
            all_results[dataset_name] = {'states': nucleus_states, 'path': path}
        except Exception as e:
            print(f"Error processing {dataset_name}: {e}")
else:
    dataset_path = datasets[DATASET_SELECTION]
    nucleus_states, path = process_dataset(DATASET_SELECTION, dataset_path)
    all_results[DATASET_SELECTION] = {'states': nucleus_states, 'path': path}

# Create classification data and targets
all_classification_data = {}
all_new_daughter_targets = {}
all_stable_targets = {}

for dataset_name, dataset_info in all_results.items():
    nucleus_states = dataset_info['states']

    # Create classification data
    classification_data = create_classification_data(nucleus_states)
    all_classification_data[dataset_name] = classification_data

    # Extract targets for extraction
    all_new_daughter_targets[dataset_name] = nucleus_states['new_daughters']

    stable_entries = [entry for entry in classification_data if entry['stable'] == 1]
    stable_entries = random.sample(stable_entries, min(200, len(stable_entries)))  # Limit to 100 stable nuclei
    all_stable_targets[dataset_name] = [
        (entry["frame"], entry["nucleus_id"]) for entry in stable_entries
    ]

# Save classification data to data/labels directory
output_dir = Path("../../data/labels")
output_dir.mkdir(parents=True, exist_ok=True)

# Save data for each dataset
for dataset_name, classification_data in all_classification_data.items():
    # Save full classification data
    full_output_file = output_dir / f"{dataset_name}_classification_full.txt"
    with open(full_output_file, 'w') as f:
        f.write("frame\tnucleus_id\tnew_daughter\tmitotic\tdeath\tstable\n")
        for entry in classification_data:
            f.write(f"{entry['frame']}\t{entry['nucleus_id']}\t{entry['new_daughter']}\t"
                    f"{entry['mitotic']}\t{entry['death']}\t{entry['stable']}\n")
    
    # Save extraction plans
    extraction_plan = {
        'dataset': dataset_name,
        'dataset_path': all_results[dataset_name]['path'],
        'extraction_targets': {
            'new_daughters': all_new_daughter_targets[dataset_name],
            'stable': all_stable_targets[dataset_name]
        },
        'extractor_config': extractor_config
    }
    
    plan_file = output_dir / f"{dataset_name}_nucleus_extraction_plan.json"
    with open(plan_file, 'w') as f:
        json.dump(extraction_plan, f, indent=2)

print(f"✅ Classification data saved for {len(all_results)} dataset(s)")
new_daughter_count = sum(len(targets) for targets in all_new_daughter_targets.values())
stable_count = sum(len(targets) for targets in all_stable_targets.values())
print(f"🎯 Targets: {new_daughter_count} new daughters, {stable_count} stable nuclei")

In [8]:
# Extract nucleus images

python_dir = os.path.join(os.path.dirname(os.getcwd()), 'python')
if python_dir not in sys.path:
    sys.path.append(python_dir)

from nucleus_extractor_manager import NucleusExtractorManager, NucleusExtractorConfig
manager_available = True


extraction_results = {}

for dataset_name in all_results.keys():
    new_daughter_targets = all_new_daughter_targets[dataset_name]
    stable_targets = all_stable_targets[dataset_name]
    dataset_path = all_results[dataset_name]['path']

    print(f"Processing {dataset_name}: {len(new_daughter_targets)} new daughters, {len(stable_targets)} stable")

    try:
        # Extract new daughter nuclei
        if new_daughter_targets:
            new_daughter_config = NucleusExtractorConfig()
            manager = NucleusExtractorManager(dataset_path, new_daughter_config)

            for i, (frame, nucleus_id) in enumerate(new_daughter_targets):
                try:
                    result = manager.extract_nucleus_time_series(nucleus_id, frame)

                    if result and result["extraction_success"]:
                        result["event_type"] = "new_daughter"
                        result["is_mitotic"] = 0
                        result["is_death"] = 0
                        result["new_daughter"] = 1
                        result["stable"] = 0
                        output_dir = manager.save_extraction_results([result], f"{dataset_name}_new_daughter")

                except Exception as e:
                    continue

        # Extract stable nuclei
        if stable_targets:
            stable_config = NucleusExtractorConfig()

            manager = NucleusExtractorManager(dataset_path, stable_config)

            for i, (frame, nucleus_id) in enumerate(stable_targets):
                try:
                    result = manager.extract_nucleus_time_series(nucleus_id, frame)

                    if result and result["extraction_success"]:
                        result["event_type"] = "stable"
                        result["is_mitotic"] = 0
                        result["is_death"] = 0
                        result["new_daughter"] = 0
                        result["stable"] = 1
                        output_dir = manager.save_extraction_results(
                            [result], f"{dataset_name}_stable"
                        )


                except Exception as e:
                    continue

    except Exception as e:
        print(f"Error extracting from {dataset_name}: {e}")
        continue

In [5]:
# Summary
if extraction_results:
    total_new_daughters = sum(
        results.get("new_daughter", {}).get("count", 0)
        for results in extraction_results.values()
    )
    total_stable = sum(
        results.get("stable", {}).get("count", 0)
        for results in extraction_results.values()
    )
    print(
        f"✅ Extraction complete: {total_new_daughters} new daughters, {total_stable} stable nuclei"
    )

    for dataset_name, results in extraction_results.items():
        if "new_daughter" in results:
            print(
                f"  {dataset_name} new daughters: {results['new_daughter']['count']} → {results['new_daughter']['output_dir']}"
            )
        if "stable" in results:
            print(
                f"  {dataset_name} stable: {results['stable']['count']} → {results['stable']['output_dir']}"
            )
else:
    print("No nuclei were extracted")

No nuclei were extracted
