conda create -n NWBinspect python=3.9 

conda install ipykernel

pip install nwbinspector

brew install hdf5

In [100]:
from nwbinspector import inspect_all
from pathlib import Path
# from pynwb import NWBHDF5IO
import numpy as np
import h5py
import os

nwb_file_path = Path("path/to/your_file.nwb") # Edit this

In [None]:
nwb_file_path1 = 'nwb_5e5a2ba5.nwb'
nwb_file_path2 = 'nwb_6af2eb48.nwb'

inspection_results1 = list(inspect_all(path=nwb_file_path1))
print(f"Found {len(inspection_results1)} file divisions\n")
inspection_results2 = list(inspect_all(path=nwb_file_path2))
print(f"Found {len(inspection_results2)} file divisions\n")

In [51]:
def print_nwb_structure(file_path, indent=''):
    """
    Print the hierarchical structure of an NWB file with better error handling
    """
    if not os.path.exists(file_path):
        print(f"Error: File {file_path} does not exist")
        return
        
    try:
        with h5py.File(file_path, 'r') as f:
            print(f"\nStructure of {os.path.basename(file_path)}:")
            
            def visit_item(name, obj):
                level = name.count('/')
                indent = '  ' * level
                if isinstance(obj, h5py.Dataset):
                    shape = obj.shape
                    dtype = obj.dtype
                    print(f"{indent}├── {os.path.basename(name)} (Dataset: shape={shape}, dtype={dtype})")
                else:
                    print(f"{indent}├── {os.path.basename(name)}/")
                
            f.visititems(visit_item)
            
    except Exception as e:
        print(f"Error reading file {file_path}: {str(e)}")


In [61]:
print_nwb_structure(nwb_file_path1)


Structure of nwb_5e5a2ba5.nwb:
├── acquisition/
  ├── TwoPhotonSeries/
    ├── data (Dataset: shape=(0, 0, 0), dtype=uint8)
    ├── external_file (Dataset: shape=(1,), dtype=object)
    ├── format (Dataset: shape=(), dtype=object)
    ├── starting_time (Dataset: shape=(), dtype=float64)
├── analysis/
├── file_create_date (Dataset: shape=(1,), dtype=object)
├── general/
  ├── devices/
    ├── Microscope device/
  ├── experiment_description (Dataset: shape=(), dtype=object)
  ├── optophysiology/
    ├── ImagingPlane/
      ├── OpticalChannel/
        ├── description (Dataset: shape=(), dtype=object)
        ├── emission_lambda (Dataset: shape=(), dtype=float64)
      ├── description (Dataset: shape=(), dtype=object)
      ├── excitation_lambda (Dataset: shape=(), dtype=float64)
      ├── imaging_rate (Dataset: shape=(), dtype=float64)
      ├── indicator (Dataset: shape=(), dtype=object)
      ├── location (Dataset: shape=(), dtype=object)
├── identifier (Dataset: shape=(), dtype=object

In [62]:
print_nwb_structure(nwb_file_path2)


Structure of nwb_6af2eb48.nwb:
├── acquisition/
  ├── TwoPhotonSeries/
    ├── data (Dataset: shape=(0, 0, 0), dtype=uint8)
    ├── external_file (Dataset: shape=(1,), dtype=object)
    ├── format (Dataset: shape=(), dtype=object)
    ├── starting_time (Dataset: shape=(), dtype=float64)
├── analysis/
├── file_create_date (Dataset: shape=(1,), dtype=object)
├── general/
  ├── devices/
    ├── Microscope device/
  ├── experiment_description (Dataset: shape=(), dtype=object)
  ├── optophysiology/
    ├── ImagingPlane/
      ├── OpticalChannel/
        ├── description (Dataset: shape=(), dtype=object)
        ├── emission_lambda (Dataset: shape=(), dtype=float64)
      ├── description (Dataset: shape=(), dtype=object)
      ├── excitation_lambda (Dataset: shape=(), dtype=float64)
      ├── imaging_rate (Dataset: shape=(), dtype=float64)
      ├── indicator (Dataset: shape=(), dtype=object)
      ├── location (Dataset: shape=(), dtype=object)
├── identifier (Dataset: shape=(), dtype=object

In [98]:
def compare_analysis_data(file1_path, file2_path):
    """
    Compare only the analysis data between two NWB files, ignoring timestamps and IDs.
    Lists all matched pairs and reports any differences found.
    """
    analysis_prefixes = ['suite2p_roi_', 'cca_', 'pca_', 'tsne_', 'eta_', 'glm_']
    compared_pairs = {}  # Store matched pairs
    
    def is_analysis_path(path):
        return any(prefix in path for prefix in analysis_prefixes)
    
    def get_base_name(path):
        """Extract the analysis type and parent path"""
        path_parts = Path(path).parts
        for part in path_parts:
            for prefix in analysis_prefixes:
                if prefix in part:
                    parent = str(Path(path).parent).split('/')[-1]
                    return f"{parent}/{part.split('_')[0]}"
        return None
    
    def find_matching_analysis_path(path, f2):
        if not is_analysis_path(path):
            return None
            
        parent_path = str(Path(path).parent)
        if parent_path not in f2:
            return None
            
        # Get the prefix that matched
        matching_prefix = next((prefix for prefix in analysis_prefixes if prefix in path), None)
        if not matching_prefix:
            return None
            
        # Find corresponding path with different ID
        for name in f2[parent_path].keys():
            if name.startswith(matching_prefix):
                return f"{parent_path}/{name}"
        return None
    
    def compare_numerical_data(data1, data2, tolerance=1e-10):
        """Compare numerical data with a small tolerance for floating point differences"""
        try:
            if isinstance(data1, np.ndarray) and isinstance(data2, np.ndarray):
                if data1.dtype.kind in ['f', 'i'] and data2.dtype.kind in ['f', 'i']:
                    if data1.shape != data2.shape:
                        return False, f"Shape mismatch: {data1.shape} vs {data2.shape}"
                    
                    # For floating point, use allclose
                    if data1.dtype.kind == 'f' or data2.dtype.kind == 'f':
                        if not np.allclose(data1, data2, rtol=tolerance, atol=tolerance, equal_nan=True):
                            max_diff = np.max(np.abs(data1 - data2))
                            return False, f"Max difference: {max_diff}"
                    # For integers, use exact equality
                    else:
                        if not np.array_equal(data1, data2):
                            diff_indices = np.where(data1 != data2)
                            diff_values = list(zip(data1[diff_indices], data2[diff_indices]))
                            return False, f"Different values at indices {diff_indices}: {diff_values}"
                            
                    return True, "Arrays match within tolerance"
                    
            return True, "Non-numerical data skipped"
            
        except Exception as e:
            return False, f"Error comparing data: {str(e)}"

    try:
        with h5py.File(file1_path, 'r') as f1, h5py.File(file2_path, 'r') as f2:
            print(f"\nComparing analysis data between:")
            print(f"File 1: {Path(file1_path).name}")
            print(f"File 2: {Path(file2_path).name}\n")
            
            differences_found = False
            
            def visit_and_compare(name, obj1):
                nonlocal differences_found
                
                if not is_analysis_path(name):
                    return
                    
                matching_path = find_matching_analysis_path(name, f2)
                if matching_path is None:
                    return
                
                obj2 = f2[matching_path]
                
                # Store the matched pair
                base_name = get_base_name(name)
                if base_name:
                    name_id = Path(name).name
                    matching_id = Path(matching_path).name
                    if base_name not in compared_pairs:
                        compared_pairs[base_name] = {'id1': name_id, 'id2': matching_id, 'has_diff': False}
                
                if isinstance(obj1, h5py.Dataset) and isinstance(obj2, h5py.Dataset):
                    if obj1.dtype.kind in ['f', 'i'] or obj2.dtype.kind in ['f', 'i']:
                        is_equal, message = compare_numerical_data(obj1[()], obj2[()])
                        if not is_equal:
                            differences_found = True
                            if base_name:
                                compared_pairs[base_name]['has_diff'] = True
                                compared_pairs[base_name]['diff_message'] = message
            
            f1.visititems(visit_and_compare)
            
            # Print comparison results
            print("Comparison Results:")
            print("-" * 50)
            for base_name, info in sorted(compared_pairs.items()):
                diff_status = "differs" if info['has_diff'] else "no difference"
                diff_msg = f" ({info['diff_message']})" if info.get('diff_message') else ""
                print(f"{info['id1']} and {info['id2']}: {diff_status}{diff_msg}")
            
            if not differences_found:
                print("\nAll numerical arrays match within tolerance of 1e-10")
                
    except Exception as e:
        print(f"Error comparing files: {str(e)}")

# Run the comparison
# nwb_file_path1 = '/Users/milesd/Desktop/New_params/nwb_5e5a2ba5.nwb'
# nwb_file_path2 = '/Users/milesd/Desktop/Old_params/nwb_6af2eb48.nwb'

# compare_analysis_data(nwb_file_path1, nwb_file_path2)

In [99]:
compare_analysis_data(nwb_file_path1, nwb_file_path2)


Comparing analysis data between:
File 1: nwb_5e5a2ba5.nwb
File 2: nwb_6af2eb48.nwb

Comparison Results:
--------------------------------------------------
suite2p_roi_8pb7br39ix and suite2p_roi_meyjrw2v43: no difference
suite2p_roi_8pb7br39ix and suite2p_roi_meyjrw2v43: no difference
cca_dgzn8fkhx0_coef and cca_wrm4ex8ptr_coef: no difference
eta_z4ftkfgved_mean and eta_k9a9tbqunt_mean: no difference
pca_acm8tlv7ru_components and pca_3zqyxii6qs_components: no difference
suite2p_roi_8pb7br39ix_all_roi_img and suite2p_roi_meyjrw2v43_all_roi_img: no difference
tsne_n4c54ja6it_projectedNd and tsne_gnmg5v4a6k_projectedNd: no difference

All numerical arrays match within tolerance of 1e-10
