# FVCOM Member-Node Mapping Demo
**Author: Jun Sasaki  Coded on 2025-10-16**<br>

This notebook demonstrates how to extract and visualize member-to-node mappings from FVCOM ensemble dye runs.

## Features
- Parse FVCOM namelist files to extract dye source configurations
- Identify which nodes are active in each ensemble member
- Extract node coordinates from FVCOM grid file
- Visualize member-node mappings in tables and maps with mesh overlay

## 1. Setup and Imports

In [None]:
import sys
from pathlib import Path

# Add xfvcom to path
xfvcom_root = Path.cwd().parents[1]
sys.path.insert(0, str(xfvcom_root))

import pandas as pd
import matplotlib.pyplot as plt

from xfvcom.ensemble_analysis.member_info import (
    extract_member_node_mapping,
    get_member_summary,
    get_node_coordinates,
    export_member_mapping,
)

print("✓ Imports successful")
print(f"xfvcom root: {xfvcom_root}")

## 2. Configuration

In [None]:
# Path configuration
tb_fvcom_dir = Path("~/Github/TB-FVCOM").expanduser()
nml_dir = tb_fvcom_dir / "goto2023/dye_run"
output_dir = tb_fvcom_dir / "goto2023/dye_run/output/2021"

# Case configuration
basename = "tb_w18_r16"
year = 2021
members = list(range(19))  # Members 0-18

print(f"TB-FVCOM directory: {tb_fvcom_dir}")
print(f"Namelist directory: {nml_dir}")
print(f"Output directory: {output_dir}")
print(f"Configuration: {basename}, year {year}, members {members}")

## 3. Extract Member-Node Mapping

Parse all namelist files to extract which nodes are active in each member.

In [None]:
# Extract full member-node mapping
mapping_df = extract_member_node_mapping(
    nml_dir=nml_dir,
    basename=basename,
    year=year,
    members=members,
)

print(f"Extracted mapping for {len(mapping_df)} source-member combinations")
print(f"\nDataFrame shape: {mapping_df.shape}")
print(f"Columns: {list(mapping_df.columns)}")
mapping_df.head(10)

## 4. Member Summary

Get a summary of active sources for each member.

In [None]:
# Get member summary
summary_df = get_member_summary(
    nml_dir=nml_dir,
    basename=basename,
    year=year,
    members=members,
)

print("Member Summary:")
print("=" * 80)
# Display without truncation
pd.set_option('display.max_colwidth', None)
summary_df

## 5. Examine Specific Members

Look at detailed configuration for specific members.

In [None]:
# Member 0: All sources (reference)
print("Member 0 (All sources):")
print("=" * 60)
member_0 = mapping_df[mapping_df['member'] == 0]
print(f"Total sources: {len(member_0)}")
print(f"Total dye release rate: {member_0['strength'].sum():.2f}")
print("\nSources:")
member_0[['source_name', 'node_id', 'strength', 'source_type']]

In [None]:
# Member 1: Arakawa group
print("Member 1 (Arakawa group):")
print("=" * 60)
member_1 = mapping_df[mapping_df['member'] == 1]
print(f"Total sources: {len(member_1)}")
print(f"Total dye release rate: {member_1['strength'].sum():.2f}")
print("\nSources:")
member_1[['source_name', 'node_id', 'strength', 'source_type']]

In [None]:
# Member 2: Sumidagawa group
print("Member 2 (Sumidagawa group):")
print("=" * 60)
member_2 = mapping_df[mapping_df['member'] == 2]
print(f"Total sources: {len(member_2)}")
print(f"Total dye release rate: {member_2['strength'].sum():.2f}")
print("\nSources:")
member_2[['source_name', 'node_id', 'strength', 'source_type']]

## 6. Extract Node Coordinates

Get geographic coordinates for all active nodes from the FVCOM grid file (recommended method).

In [None]:
# Get unique node IDs across all members
unique_nodes = mapping_df['node_id'].unique()
print(f"Total unique nodes across all members: {len(unique_nodes)}")
print(f"Node IDs: {sorted(unique_nodes)}")

# Use grid file for coordinate extraction (more reliable than NetCDF output)
grid_file = Path("~/Github/TB-FVCOM/goto2023/input/TokyoBay18_grd.dat").expanduser()
utm_zone = 54

if grid_file.exists():
    # Extract coordinates from grid file (recommended method)
    coords_df = get_node_coordinates(
        nc_file=None,  # Not used when grid_file is provided
        node_ids=unique_nodes.tolist(),
        grid_file=grid_file,
        utm_zone=utm_zone
    )
    
    print(f"\n✓ Extracted coordinates from grid file for {len(coords_df)} nodes")
    print("Coordinate range:")
    print(f"  Longitude: {coords_df['lon'].min():.6f} - {coords_df['lon'].max():.6f}")
    print(f"  Latitude:  {coords_df['lat'].min():.6f} - {coords_df['lat'].max():.6f}")
    coords_df.head(10)
else:
    print(f"Warning: Grid file not found: {grid_file}")
    print("Trying to use NetCDF file instead...")
    
    # Fallback: try NetCDF file
    sample_nc = output_dir / "0" / f"{basename}_2021_0_0001.nc"
    
    if sample_nc.exists():
        try:
            coords_df = get_node_coordinates(sample_nc, unique_nodes.tolist())
            print(f"\n✓ Extracted coordinates from NetCDF for {len(coords_df)} nodes")
            coords_df.head(10)
        except Exception as e:
            print(f"Error extracting coordinates from NetCDF: {e}")
            print("Coordinate extraction failed.")
    else:
        print(f"Warning: Sample NetCDF file not found: {sample_nc}")
        print("Skipping coordinate extraction")

## 7. Merge Mapping with Coordinates

Combine member-node mapping with geographic coordinates.

In [None]:
if 'coords_df' in locals():
    # Merge mapping with coordinates
    mapping_with_coords = mapping_df.merge(
        coords_df,
        on='node_id',
        how='left'
    )
    
    print(f"Merged DataFrame shape: {mapping_with_coords.shape}")
    print(f"Columns: {list(mapping_with_coords.columns)}")
    mapping_with_coords.head(10)
else:
    print("Skipping merge (coordinates not available)")

## 8. Visualize Node Locations

Plot the locations of dye release nodes on a map.

In [None]:
if 'coords_df' in locals() and not coords_df.empty:
    # Import required modules for mesh plotting with map tiles
    from cartopy.io.img_tiles import GoogleTiles
    from xfvcom import (
        FvcomInputLoader,
        FvcomPlotter,
        FvcomPlotConfig,
        FvcomPlotOptions,
        make_node_marker_post,
    )
    
    # Load FVCOM grid to overlay nodes on mesh
    grid_file = Path("~/Github/TB-FVCOM/goto2023/input/TokyoBay18_grd.dat").expanduser()
    utm_zone = 54
    
    if grid_file.exists():
        print(f"Loading grid file: {grid_file}")
        
        # Load grid using FvcomInputLoader
        loader = FvcomInputLoader(
            grid_path=grid_file,
            utm_zone=utm_zone,
            add_dummy_time=False,
            add_dummy_siglay=False
        )
        
        grid_ds = loader.ds
        grid_obj = loader.grid
        
        # Create plotter for visualization
        cfg = FvcomPlotConfig()
        plotter = FvcomPlotter(grid_ds, cfg)
        
        # Get node IDs from coords_df (1-based)
        node_list = coords_df['node_id'].astype(int).tolist()
        print(f"Number of nodes to plot: {len(node_list)}")
        print(f"Node IDs (1-based): {node_list}")
        
        # Extract coordinates from grid_ds using 0-based indexing
        # This is the correct way as shown in demo_node_checker.ipynb
        node_lons = []
        node_lats = []
        valid_nodes = []
        
        for node_id_1based in node_list:
            node_idx_0based = node_id_1based - 1  # Convert to 0-based
            
            if 0 <= node_idx_0based < len(grid_ds.lon):
                lon = float(grid_ds.lon.values[node_idx_0based])
                lat = float(grid_ds.lat.values[node_idx_0based])
                node_lons.append(lon)
                node_lats.append(lat)
                valid_nodes.append(node_id_1based)
                print(f"  Node {node_id_1based}: lon={lon:.6f}, lat={lat:.6f}")
            else:
                print(f"  Warning: Node {node_id_1based} out of range (max={len(grid_ds.lon)})")
        
        if not valid_nodes:
            print("ERROR: No valid nodes found!")
        else:
            # Calculate coordinate ranges from grid_ds coordinates
            lon_min, lon_max = min(node_lons), max(node_lons)
            lat_min, lat_max = min(node_lats), max(node_lats)
            print("\nCoordinate range from grid_ds:")
            print(f"  Longitude: [{lon_min:.6f}, {lon_max:.6f}]")
            print(f"  Latitude:  [{lat_min:.6f}, {lat_max:.6f}]")
            
            # Calculate buffer - handle edge case of single point or very close points
            lon_range = lon_max - lon_min
            lat_range = lat_max - lat_min
            
            if lon_range < 0.001:  # Less than ~100m difference
                lon_buffer = 0.05  # Fixed buffer of ~5km
            else:
                lon_buffer = lon_range * 0.2  # 20% buffer
                
            if lat_range < 0.001:
                lat_buffer = 0.05
            else:
                lat_buffer = lat_range * 0.2
            
            xlim = (lon_min - lon_buffer, lon_max + lon_buffer)
            ylim = (lat_min - lat_buffer, lat_max + lat_buffer)
            
            print("\nMap extent with buffer:")
            print(f"  Longitude: [{xlim[0]:.6f}, {xlim[1]:.6f}]")
            print(f"  Latitude:  [{ylim[0]:.6f}, {ylim[1]:.6f}]")
            
            # Define marker and text styling (matching demo_node_checker.ipynb cell 10)
            mkw = {"marker": "o", "color": "red", "markersize": 5, "zorder": 5}
            tkw = {"fontsize": 10, "color": "yellow", "ha": "left", "va": "top",
                   "zorder": 6, "clip_on": True}
            
            # Create post-processing function for node markers
            pp_nodes = make_node_marker_post(
                valid_nodes,
                plotter,
                marker_kwargs=mkw,
                text_kwargs=tkw,
                index_base=1,
                respect_bounds=False,  # Show all specified nodes
            )
            
            # Plot options (matching demo_node_checker.ipynb cell 10)
            opts = FvcomPlotOptions(
                figsize=(12, 10),
                add_tiles=True,
                tile_provider=GoogleTiles(style="satellite"),
                mesh_color="lightgray",
                mesh_linewidth=0.2,
                title=f"Dye Release Node Locations on FVCOM Mesh ({len(valid_nodes)} nodes)",
                xlim=xlim,
                ylim=ylim,
            )
            
            # Create the plot
            print("\nCreating plot...")
            ax = plotter.plot_2d(da=None, post_process_func=pp_nodes, opts=opts)
            
            plt.tight_layout()
            plt.show()
            
            print(f"\n✓ Plotted {len(valid_nodes)} dye release nodes on FVCOM mesh")
    else:
        print(f"✗ Grid file not found: {grid_file}")
        print("  Cannot create mesh plot. Falling back to simple scatter plot.")
        
        # Fallback to simple plot using coords_df
        fig, ax = plt.subplots(figsize=(12, 8))
        ax.scatter(
            coords_df['lon'],
            coords_df['lat'],
            c='blue',
            s=100,
            alpha=0.6,
            edgecolors='black',
            linewidth=1
        )
        for _, row in coords_df.iterrows():
            ax.annotate(
                str(int(row['node_id'])),
                xy=(row['lon'], row['lat']),
                xytext=(5, 5),
                textcoords='offset points',
                fontsize=8,
                bbox=dict(boxstyle='round,pad=0.3', facecolor='white', alpha=0.7)
            )
        ax.set_xlabel('Longitude', fontsize=12)
        ax.set_ylabel('Latitude', fontsize=12)
        ax.set_title('Dye Release Node Locations', fontsize=14)
        ax.grid(True, alpha=0.3)
        plt.tight_layout()
        plt.show()
else:
    print("Skipping visualization (coordinates not available)")

## 9. Source Type Analysis

Analyze the distribution of rivers vs sewers across members.

In [None]:
# Count sources by type for each member
source_type_counts = mapping_df.groupby(['member', 'source_type']).size().unstack(fill_value=0)

print("Source Type Distribution by Member:")
print("=" * 60)
source_type_counts

In [None]:
# Visualize source type distribution
fig, ax = plt.subplots(figsize=(12, 6))

source_type_counts.plot(
    kind='bar',
    ax=ax,
    color=['#1f77b4', '#ff7f0e'],
    width=0.8
)

ax.set_xlabel('Member', fontsize=12)
ax.set_ylabel('Number of Sources', fontsize=12)
ax.set_title('Number of Active Sources by Member and Type', fontsize=14)
ax.legend(title='Source Type', fontsize=10)
ax.grid(True, alpha=0.3, axis='y')

plt.tight_layout()
plt.show()

## 10. Export Results

Export member-node mappings to various formats for documentation.

In [None]:
# Create output directory
export_dir = Path.cwd().parent / "output"
export_dir.mkdir(exist_ok=True)

print(f"Export directory: {export_dir}")

# Export full mapping to CSV
export_member_mapping(
    mapping_df,
    export_dir / "member_node_mapping.csv",
    format='csv'
)

# Export summary to CSV
summary_df.to_csv(export_dir / "member_summary.csv", index=False)
print(f"Exported to: {export_dir / 'member_summary.csv'}")

# Export to markdown
export_member_mapping(
    mapping_df,
    export_dir / "member_node_mapping.md",
    format='markdown'
)

# Export coordinates if available
if 'coords_df' in locals():
    coords_df.to_csv(export_dir / "node_coordinates.csv", index=False)
    print(f"Exported to: {export_dir / 'node_coordinates.csv'}")

print("\n✓ Export complete")

## 11. Summary

Review the key findings.

In [None]:
print("MEMBER-NODE MAPPING SUMMARY")
print("=" * 80)
print("Configuration:")
print(f"  Case: {basename}")
print(f"  Year: {year}")
print(f"  Members analyzed: {len(members)}")
print()
print("Results:")
print(f"  Total unique nodes: {len(mapping_df['node_id'].unique())}")
print(f"  Total unique sources: {len(mapping_df['source_name'].unique())}")
print(f"  Source-member combinations: {len(mapping_df)}")
print()
print("Source breakdown:")
print(f"  Rivers: {len(mapping_df[mapping_df['source_type'] == 'River']['source_name'].unique())}")
print(f"  Sewers: {len(mapping_df[mapping_df['source_type'] == 'Sewer']['source_name'].unique())}")
print()
print("Member configuration patterns:")
for member in [0, 1, 2, 3, 4, 5]:
    member_data = summary_df[summary_df['member'] == member]
    if not member_data.empty:
        n_sources = member_data['n_sources'].iloc[0]
        sources = member_data['source_names'].iloc[0]
        print(f"  Member {member}: {n_sources} sources - {sources}")
print("=" * 80)