In [4]:
## 
import json

file_path = '/rsrch9/home/plm/idso_fa1_pathology/codes/yshokrollahi/vitamin-p-latest/inference_dsp/xenium_cell_results/cell_segmentation.geojson'

with open(file_path, 'r') as f:
    data = json.load(f)

num_cells = len(data['features'])
print(f"Number of cells: {num_cells}")

Number of cells: 1140303


In [5]:
#!/usr/bin/env python3
"""
GeoJSON File Splitter
This script splits a large GeoJSON file into 5 equal parts.
The original file remains unchanged.
"""

import json
import os

def split_geojson():
    # File paths
    file_path = '/rsrch9/home/plm/idso_fa1_pathology/codes/yshokrollahi/vitamin-p-latest/inference_dsp/xenium_cell_results/cell_segmentation.geojson'
    output_dir = '/rsrch9/home/plm/idso_fa1_pathology/codes/yshokrollahi/vitamin-p-latest/inference_dsp/old_xeinum/test'
    
    # Read the original GeoJSON file
    print("Reading GeoJSON file...")
    try:
        with open(file_path, 'r') as f:
            data = json.load(f)
    except FileNotFoundError:
        print(f"Error: File not found at {file_path}")
        return
    except json.JSONDecodeError:
        print(f"Error: Invalid JSON format in {file_path}")
        return
    
    # Get number of cells
    num_cells = len(data['features'])
    print(f"Number of cells: {num_cells}")
    
    # Calculate split size (ceiling division)
    split_size = (num_cells + 4) // 5
    print(f"Split size: approximately {split_size} cells per file")
    
    # Create output directory if it doesn't exist
    os.makedirs(output_dir, exist_ok=True)
    print(f"Output directory: {output_dir}")
    
    # Split and save
    print("\nSplitting files...")
    for i in range(5):
        start_idx = i * split_size
        end_idx = min((i + 1) * split_size, num_cells)
        
        # Create a new GeoJSON structure with the split features
        split_data = {
            'type': data['type'],
            'features': data['features'][start_idx:end_idx]
        }
        
        # Copy any other top-level properties if they exist
        for key in data:
            if key not in ['type', 'features']:
                split_data[key] = data[key]
        
        # Save the split file
        output_file = os.path.join(output_dir, f'cell_segmentation_part{i+1}.geojson')
        with open(output_file, 'w') as f:
            json.dump(split_data, f)
        
        num_features = end_idx - start_idx
        print(f"  Part {i+1}/5: {num_features:,} cells -> cell_segmentation_part{i+1}.geojson")
    
    print("\n✓ Splitting complete!")
    print(f"✓ Original file remains unchanged at: {file_path}")
    print(f"✓ Split files saved in: {output_dir}")

if __name__ == "__main__":
    split_geojson()

Reading GeoJSON file...
Number of cells: 1140303
Split size: approximately 228061 cells per file
Output directory: /rsrch9/home/plm/idso_fa1_pathology/codes/yshokrollahi/vitamin-p-latest/inference_dsp/old_xeinum/test

Splitting files...
  Part 1/5: 228,061 cells -> cell_segmentation_part1.geojson
  Part 2/5: 228,061 cells -> cell_segmentation_part2.geojson
  Part 3/5: 228,061 cells -> cell_segmentation_part3.geojson
  Part 4/5: 228,061 cells -> cell_segmentation_part4.geojson
  Part 5/5: 228,059 cells -> cell_segmentation_part5.geojson

✓ Splitting complete!
✓ Original file remains unchanged at: /rsrch9/home/plm/idso_fa1_pathology/codes/yshokrollahi/vitamin-p-latest/inference_dsp/xenium_cell_results/cell_segmentation.geojson
✓ Split files saved in: /rsrch9/home/plm/idso_fa1_pathology/codes/yshokrollahi/vitamin-p-latest/inference_dsp/old_xeinum/test
