# Helmholtz Solver: High-Frequency Production (ω=128)
**Project Goal:** Generate a validated dataset of 1,875 samples for frequencies [32, 64, 128].
**Target Accuracy:** Reflection ratio < 0.001 (0.1%).
**Grid Config:** 501x501 physical, 64-pt PML (629x629 total).

In [None]:
import os
import json
import numpy as np
import scipy.sparse as sp
import matplotlib.pyplot as plt
from scipy.sparse.linalg import factorized

# Define the logical save path on WAVE6.MIT.EDU
SAVE_DIR = "experiments/data/helmholtz_501_v1"
os.makedirs(SAVE_DIR, exist_ok=True)

print(f"Environment ready. Data will be saved to: {SAVE_DIR}")

 ## The Physics Engine
 We implement a Discrete Stretching PML. By applying the stretching factor $1/s^2$ directly to the Laplacian coefficients, we ensure the wave is absorbed before it can reflect off the numerical boundary.

In [None]:
def build_helmholtz_system(omega, n_phys=501, n_pml=64, d0=80.0):
    """
    Builds the Helmholtz matrix with synchronized grid and PML logic.
    """
    n_total = n_phys + 2 * n_pml
    h = 1.0 / (n_phys - 1)
    k = omega
    
    # 1. Coordinate stretching profile (s-values)
    s_vals = np.ones(n_total, dtype=complex)
    for n in range(n_total):
        if n < n_pml: # Left/Top PML
            dist = (n_pml - n) * h
            sigma = d0 * (dist / (n_pml * h))**2
            s_vals[n] = 1.0 + 1j * sigma / k
        elif n >= n_pml + n_phys: # Right/Bottom PML
            dist = (n - (n_pml + n_phys - 1)) * h
            sigma = d0 * (dist / (n_pml * h))**2
            s_vals[n] = 1.0 + 1j * sigma / k

    # 2. Matrix Assembly
    size = n_total**2
    diag = np.zeros(size, dtype=complex)
    off_x = np.zeros(size - 1, dtype=complex)
    off_y = np.zeros(size - n_total, dtype=complex)
    
    for j in range(n_total):
        for i in range(n_total):
            idx = i + j * n_total
            sx, sy = s_vals[i], s_vals[j]
            
            # Standard 5-point stencil with stretching
            diag[idx] = (2/(h**2 * sx**2) + 2/(h**2 * sy**2)) - k**2
            
            if i < n_total - 1:
                off_x[idx] = -1 / (h**2 * sx**2)
            if j < n_total - 1:
                off_y[idx] = -1 / (h**2 * sy**2)
                
    A = sp.diags([diag, off_x, off_x, off_y, off_y], 
                 [0, 1, -1, n_total, -n_total], 
                 shape=(size, size), format='csc')
    return A, n_total, n_pml, h

## Verification 
(The "Verdict")Before we generate 1,875 samples, we must prove that the reflection ratio is below 0.001 for the hardest case ($\omega=128$).

In [None]:
# Constants for validation
TEST_OMEGA = 128
A, N_TOT, NPML, H = build_helmholtz_system(TEST_OMEGA)
solve = factorized(A)

# Source in the center
f = np.zeros((N_TOT, N_TOT), dtype=complex)
f[N_TOT//2, N_TOT//2] = 1.0
u_vec = solve(f.flatten())
u = u_vec.reshape(N_TOT, N_TOT)

# Calculate Reflection Ratio
u_phys = u[NPML:NPML+501, NPML:NPML+501]
peak = np.max(np.abs(u_phys))
edge_leakage = np.mean(np.abs(u_phys[0, :])) # Top boundary
ratio = edge_leakage / peak

print(f"--- PHYSICAL VALIDATION ---")
print(f"Frequency: ω={TEST_OMEGA}")
print(f"Reflection Ratio: {ratio:.6f}")

if ratio < 0.001:
    print("✅ VERDICT: SUCCESS. Boundary is quiet. Proceed to Generation.")
else:
    print(f"❌ VERDICT: FAIL. Ratio {ratio:.4f} is still too high.")

## Production Dataset Generation
This cell iterates through all frequencies and saves compressed .npz files along with a manifest.json for reproducibility.

In [None]:
import time

FREQS = [32, 64, 128]
SAMPLES_PER_FREQ = 625

# Save manifest first
manifest = {
    "n_phys": 501,
    "n_pml": 64,
    "d0": 80.0,
    "target_reflection": 0.001,
    "frequencies": FREQS
}
with open(os.path.join(SAVE_DIR, "manifest.json"), "w") as m:
    json.dump(manifest, m, indent=4)

for omega in FREQS:
    print(f"\nStarting Production for ω={omega}...")
    A, N_TOT, NPML, _ = build_helmholtz_system(omega)
    solve = factorized(A)
    
    start_time = time.time()
    for i in range(SAMPLES_PER_FREQ):
        # Random source position in physical domain
        f_grid = np.zeros((N_TOT, N_TOT), dtype=complex)
        sx, sy = np.random.randint(NPML, NPML+501, size=2)
        f_grid[sx, sy] = 1.0
        
        # Solve and Crop
        u = solve(f_grid.flatten()).reshape(N_TOT, N_TOT)
        u_phys = u[NPML:NPML+501, NPML:NPML+501]
        f_phys = f_grid[NPML:NPML+501, NPML:NPML+501]
        
        # Save
        fn = os.path.join(SAVE_DIR, f"w{omega}_s{i:03d}.npz")
        np.savez_compressed(fn, u_real=u_phys.real, u_imag=u_phys.imag,
                                f_real=f_phys.real, f_imag=f_phys.imag)
        
        if (i+1) % 100 == 0:
            elapsed = time.time() - start_time
            print(f"  > Saved {i+1}/{SAMPLES_PER_FREQ} (Time: {elapsed:.1f}s)")

print("\n--- DATASET COMPLETE ---")