# Groundwater Forcing Generator

This notebook demonstrates how to generate groundwater forcing files for FVCOM using the `GroundwaterNetCDFGenerator`.

In [None]:
import numpy as np
from pathlib import Path
from xfvcom.io import GroundwaterNetCDFGenerator
import xarray as xr

## 1. Constant Values Example

Generate a groundwater forcing file with constant values for all nodes and times:

In [None]:
# FVCOM grid files are ASCII text files (.dat format)
grid_file = Path("~/Github/TB-FVCOM/goto2023/input/TokyoBay18_grd.dat").expanduser()

# Create generator with constant values
gen = GroundwaterNetCDFGenerator(
    grid_nc=grid_file,        # Despite the parameter name, it accepts .dat files
    start="2025-01-01T00:00:00Z",
    end="2025-01-07T00:00:00Z",
    dt_seconds=3600,          # hourly
    utm_zone=54,              # Required for .dat files: UTM zone
    # northern=True,          # Default is True; set False for southern hemisphere
    flux=0.001,               # m³/s
    temperature=15.0,         # °C
    salinity=0.5,             # PSU
)

# Write to file
output_file = Path("groundwater_constant.nc")
gen.write(output_file)
print(f"Generated: {output_file}")

## 2. Node-Varying Values Example

Create forcing with different values for each node (but constant in time):

In [None]:
# First, check how many nodes in the grid
from xfvcom.grid import FvcomGrid

# Load FVCOM grid file (.dat format)
grid = FvcomGrid.from_dat(grid_file, utm_zone=54)
n_nodes = len(grid.x)
print(f"Grid has {n_nodes} nodes")

# Create node-specific values
flux_by_node = np.random.uniform(0.0, 0.01, n_nodes)  # Random flux values
temp_by_node = np.random.uniform(10.0, 20.0, n_nodes) # Random temperatures

# Create generator
gen = GroundwaterNetCDFGenerator(
    grid_nc=grid_file,
    start="2025-01-01T00:00:00Z",
    end="2025-01-02T00:00:00Z",
    dt_seconds=3600,
    utm_zone=54,
    flux=flux_by_node,
    temperature=temp_by_node,
    salinity=0.0,  # Fresh water
)

output_file = Path("groundwater_nodes.nc")
gen.write(output_file)
print(f"Generated: {output_file}")

## 3. Time-Varying Values Example

Create forcing that varies in both space and time:

In [None]:
# Define time parameters
start = "2025-01-01T00:00:00Z"
end = "2025-01-03T00:00:00Z"
dt_hours = 6  # 6-hourly data

# Calculate number of time steps
import pandas as pd
times = pd.date_range(start, end, freq=f"{dt_hours}h", inclusive="both")
n_times = len(times)
print(f"Time steps: {n_times}")

# Create time-varying flux (e.g., tidal influence)
flux_data = np.zeros((n_nodes, n_times))
for i in range(n_nodes):
    # Sinusoidal variation with different phase for each node
    phase = i * 2 * np.pi / n_nodes
    flux_data[i, :] = 0.005 * (1 + np.sin(2 * np.pi * np.arange(n_times) / 4 + phase))

# Create generator
gen = GroundwaterNetCDFGenerator(
    grid_nc=grid_file,
    start=start,
    end=end,
    dt_seconds=dt_hours * 3600,
    utm_zone=54,
    flux=flux_data,
    temperature=15.0,  # Constant temperature
    salinity=0.0,      # Fresh water
)

output_file = Path("groundwater_timevarying.nc")
gen.write(output_file)
print(f"Generated: {output_file}")

## 4. Reading Spatio-Temporal Data from CSV Files

For more realistic scenarios, you'll want to read groundwater data from external files. Here's how to read time-varying data from CSV files:

In [None]:
# First, let's create sample CSV files with spatio-temporal data
# Select a subset of nodes that have groundwater (e.g., near rivers or springs)
selected_nodes = [100, 200, 300, 400, 500]  # Example node indices

# Create time series
times_local = pd.date_range("2025-01-01", "2025-01-07", freq="6h", tz="Asia/Tokyo")
n_times = len(times_local)

# Create flux CSV with realistic patterns
flux_df = pd.DataFrame({'datetime': times_local})
for node_idx in selected_nodes:
    # Each node has different base flux and variation
    base_flux = 0.001 + 0.002 * np.random.rand()
    hours = np.array([t.hour for t in times_local])
    
    # Daily cycle (stronger during day) + tidal influence
    daily_var = 0.3 * np.sin(2 * np.pi * hours / 24 - np.pi/2)
    tidal_var = 0.2 * np.sin(2 * np.pi * np.arange(n_times) / (12.42 * 4))
    
    flux_df[f'node_{node_idx}'] = base_flux * (1 + daily_var + tidal_var)

# Save flux data
flux_csv = Path("groundwater_flux_timeseries.csv")
flux_df.to_csv(flux_csv, index=False)
print(f"Created flux CSV: {flux_csv}")
print(flux_df.head())

In [None]:
# Create temperature CSV
temp_df = pd.DataFrame({'datetime': times_local})
for node_idx in selected_nodes:
    # Base temperature with daily variation
    base_temp = 12 + 3 * np.random.rand()
    hours = np.array([t.hour for t in times_local])
    daily_var = 0.5 * np.sin(2 * np.pi * hours / 24 - np.pi/2)
    
    temp_df[f'node_{node_idx}'] = base_temp + daily_var

temp_csv = Path("groundwater_temperature_timeseries.csv")
temp_df.to_csv(temp_csv, index=False)
print(f"Created temperature CSV: {temp_csv}")

### Read CSV Files and Convert to 2D Arrays

In [None]:
def read_spatiotemporal_csv(csv_file, selected_nodes, total_nodes):
    """
    Read spatio-temporal data from CSV and prepare for FVCOM.
    
    CSV format:
    datetime, node_100, node_200, node_300, ...
    2025-01-01 00:00:00, 0.001, 0.002, 0.0015, ...
    """
    df = pd.read_csv(csv_file)
    df['datetime'] = pd.to_datetime(df['datetime'])
    
    times = pd.DatetimeIndex(df['datetime'])
    n_times = len(times)
    
    # Initialize array with zeros for all nodes
    data_array = np.zeros((total_nodes, n_times))
    
    # Fill data for nodes that have values
    for node_idx in selected_nodes:
        col_name = f'node_{node_idx}'
        if col_name in df.columns:
            data_array[node_idx, :] = df[col_name].values
    
    return times, data_array

# Read the CSV files
flux_times, flux_array = read_spatiotemporal_csv(flux_csv, selected_nodes, n_nodes)
temp_times, temp_array = read_spatiotemporal_csv(temp_csv, selected_nodes, n_nodes)

print(f"Flux array shape: {flux_array.shape}")
print(f"Temperature array shape: {temp_array.shape}")
print(f"Active nodes: {np.sum(flux_array.sum(axis=1) > 0)} out of {n_nodes}")

### Create FVCOM Forcing from CSV Data

In [None]:
# Convert times to UTC (FVCOM standard)
start_utc = flux_times[0].tz_convert('UTC').strftime('%Y-%m-%dT%H:%M:%SZ')
end_utc = flux_times[-1].tz_convert('UTC').strftime('%Y-%m-%dT%H:%M:%SZ')
dt_seconds = int((flux_times[1] - flux_times[0]).total_seconds())

print(f"Time range (UTC): {start_utc} to {end_utc}")
print(f"Time step: {dt_seconds} seconds ({dt_seconds/3600} hours)")

# Create groundwater forcing with CSV data
gen = GroundwaterNetCDFGenerator(
    grid_nc=grid_file,
    start=start_utc,
    end=end_utc,
    dt_seconds=dt_seconds,
    utm_zone=54,
    flux=flux_array,         # 2D array from CSV
    temperature=temp_array,  # 2D array from CSV
    salinity=0.0,           # Fresh groundwater
)

output_file = Path("groundwater_from_csv.nc")
gen.write(output_file)
print(f"\\nGenerated: {output_file}")

### Visualize the Data

## 5. Alternative: Long Format CSV

For larger datasets, you might prefer a long format CSV:

In [None]:
import matplotlib.pyplot as plt

# Plot time series for first 3 active nodes
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(10, 6), sharex=True)

for i, node_idx in enumerate(selected_nodes[:3]):
    ax1.plot(flux_times, flux_array[node_idx, :], label=f'Node {node_idx}')
    ax2.plot(temp_times, temp_array[node_idx, :], label=f'Node {node_idx}')

ax1.set_ylabel('Flux (m³/s)')
ax1.set_title('Groundwater Flux Time Series')
ax1.legend()
ax1.grid(True, alpha=0.3)

ax2.set_ylabel('Temperature (°C)')
ax2.set_title('Groundwater Temperature Time Series')
ax2.set_xlabel('Time')
ax2.legend()
ax2.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

In [None]:
# Create long format CSV (more memory efficient for sparse data)
long_data = []
for t_idx, time in enumerate(flux_times):
    for node_idx in selected_nodes:
        long_data.append({
            'datetime': time,
            'node_id': node_idx,
            'flux': flux_array[node_idx, t_idx],
            'temperature': temp_array[node_idx, t_idx]
        })

long_df = pd.DataFrame(long_data)
long_csv = Path("groundwater_long_format.csv")
long_df.to_csv(long_csv, index=False)
print(f"Created long format CSV: {long_csv}")
print(f"Total rows: {len(long_df)}")
print(long_df.head())

In [None]:
def read_long_format_csv(csv_file, total_nodes):
    """
    Read long format CSV and convert to 2D arrays.
    
    CSV format:
    datetime, node_id, flux, temperature
    2025-01-01 00:00:00, 100, 0.001, 15.2
    2025-01-01 00:00:00, 200, 0.002, 14.8
    """
    df = pd.read_csv(csv_file)
    df['datetime'] = pd.to_datetime(df['datetime'])
    
    # Get unique times and create mapping
    times = pd.DatetimeIndex(df['datetime'].unique()).sort_values()
    time_map = {t: i for i, t in enumerate(times)}
    n_times = len(times)
    
    # Initialize arrays
    flux_array = np.zeros((total_nodes, n_times))
    temp_array = np.zeros((total_nodes, n_times))
    
    # Fill arrays
    for _, row in df.iterrows():
        t_idx = time_map[row['datetime']]
        n_idx = int(row['node_id'])
        flux_array[n_idx, t_idx] = row['flux']
        temp_array[n_idx, t_idx] = row['temperature']
    
    return times, flux_array, temp_array

# Test reading long format
times_long, flux_long, temp_long = read_long_format_csv(long_csv, n_nodes)
print(f"Read {len(times_long)} time steps from long format CSV")
print(f"Arrays match: flux={np.allclose(flux_array, flux_long)}, temp={np.allclose(temp_array, temp_long)}")

## 6. Using the CLI

You can also use the command-line interface:

In [None]:
# Display CLI help
!xfvcom-make-groundwater-nc --help

### CLI Examples:

```bash
# Constant values
xfvcom-make-groundwater-nc grid.nc \
    --start 2025-01-01T00:00Z --end 2025-01-07T00:00Z \
    --flux 0.001 --temperature 15.0 --salinity 0.0

# Using CSV files for node-specific values (currently only reads mean values)
xfvcom-make-groundwater-nc grid.nc \
    --start 2025-01-01 --end 2025-12-31 \
    --start-tz Asia/Tokyo \
    --flux groundwater_flux_by_node.csv \
    --temperature 10.0 --salinity 0.0

# For .dat grid files, specify UTM zone
xfvcom-make-groundwater-nc grid.dat --utm-zone 54 \
    --start 2025-01-01T00:00Z --end 2025-01-02T00:00Z \
    --flux 0.005 --temperature 18.0 --salinity 32.0
```

**Note**: The CLI currently doesn't support full time series from CSV files. Use the Python API (as shown above) for spatio-temporal data.

## 7. Verify Output

Check the generated NetCDF file:

In [None]:
# Verify the constant value output
with xr.open_dataset("groundwater_constant.nc", decode_times=False) as ds:
    print(ds)
    print("\nVariables:")
    for var in ds.data_vars:
        print(f"  {var}: {ds[var].dims} - {ds[var].attrs.get('long_name', '')}")

In [None]:
# Verify the spatio-temporal output
with xr.open_dataset("groundwater_from_csv.nc", decode_times=False) as ds:
    print("\nSpatio-temporal groundwater forcing:")
    print(f"  Time steps: {ds.sizes['time']}")
    print(f"  Active nodes: {(ds.groundwater_flux.sum(dim='time') > 0).sum().values}")
    print(f"  Total flux over time: {ds.groundwater_flux.sum().values:.3f} m³")
    
    # Calculate mean temperature for active nodes
    flux_data = ds.groundwater_flux.values
    temp_data = ds.groundwater_temp.values
    active_temps = temp_data[flux_data > 0]
    print(f"  Mean temperature (active nodes): {active_temps.mean():.2f} °C")

## Summary

This notebook demonstrated several ways to create groundwater forcing files:

1. **Constant values** - Same value for all nodes and times
2. **Node-varying** - Different values per node, constant in time
3. **Time-varying** - Programmatically generated temporal variations
4. **CSV-based spatio-temporal** - Reading realistic data from external files:
   - Wide format CSV (one column per node)
   - Long format CSV (node_id column)

### Key Points for Spatio-Temporal Data:

- The `GroundwaterNetCDFGenerator` accepts 2D numpy arrays of shape (n_nodes, n_times)
- CSV files can store time series data with timestamps in local timezone
- The generator automatically converts times to UTC for FVCOM
- Only nodes with non-zero flux will affect the model
- Grid files can be either NetCDF (.nc) or ASCII (.dat) format

### Next Steps:

To make CSV input more convenient, the package could be enhanced to:
- Add direct CSV reading support in `GroundwaterNetCDFGenerator`
- Update CLI to accept time series files (like `--flux-ts file.csv`)
- Add interpolation for different time resolutions
- Support additional file formats (HDF5, NetCDF)