diff --git a/.codespellrc b/.codespellrc
deleted file mode 100644
index e459c4f2..00000000
--- a/.codespellrc
+++ /dev/null
@@ -1,7 +0,0 @@
-[codespell]
-skip = .git,*.pdf,*.png,*.jpg,*.jpeg,*.gif,*.svg,*.bmp,*.tiff,*.pyc,venv,.venv,.ipynb_checkpoints
-check-filenames = true
-quiet-level = 2
-ignore-words-list = flexcompute,tidy3d,TE,TM,te,tm,FOM,fom,Commun,Thru
-ignore-regex = [a-f0-9]{40}
-builtin = clear,rare,informal
\ No newline at end of file
diff --git a/.github/workflows/lint-notebooks.yml b/.github/workflows/lint-notebooks.yml
index 5889907e..acb2d61f 100644
--- a/.github/workflows/lint-notebooks.yml
+++ b/.github/workflows/lint-notebooks.yml
@@ -42,7 +42,7 @@ jobs:
if: github.event_name == 'pull_request' && steps.changed_notebooks.outputs.any_changed == 'true'
continue-on-error: true
run: |
- uvx python spellcheck.py ${{ steps.changed_notebooks.outputs.all_changed_files }} > spellcheck_output.txt || true
+ uv run spellcheck.py ${{ steps.changed_notebooks.outputs.all_changed_files }} > spellcheck_output.txt || true
- name: Prepare spellcheck comment body
id: prepare_comment
diff --git a/custom_dictionary.json b/custom_dictionary.json
new file mode 100644
index 00000000..157e8400
--- /dev/null
+++ b/custom_dictionary.json
@@ -0,0 +1 @@
+{"'s": 1, "-channel": 1, "-degree": 1, "-dimensional": 1, "-ff": 1, "-fold": 1, "-ify": 1, "-offset": 1, "-order": 1, "-t": 1, "a-coordinate": 1, "abouzahra": 1, "above-defined": 1, "adamoptimizer": 1, "add-drop": 1, "adj": 1, "adjoint-based": 1, "admittancenetwork": 1, "advancedfastfitterparam": 1, "agoutane": 1, "air-bridged": 1, "air-filled": 1, "aligned-corrugation": 1, "alkeskjold": 1, "all-dielectric": 1, "all-optical": 1, "amar": 1, "anisotropicmedium": 1, "antennacharacteristics": 1, "antennametricsdata": 1, "anti-mask": 1, "anti-reflection": 1, "anti-reflective": 1, "anti-resonant": 1, "anti-symmetric": 1, "antireflection": 1, "apodization": 1, "apodized": 1, "area-integrated": 1, "args": 1, "arrow-shaped": 1, "artech": 1, "asi": 1, "audhkhasi": 1, "augustin-jean": 1, "autograd": 1, "autograd's": 1, "autograd-wrapped": 1, "autogradsimulation": 1, "autogrid": 1, "backpropagate": 1, "backscattering": 1, "backward-propagating": 1, "balanis": 1, "bandgap": 1, "bandpass": 1, "bandstructure": 1, "bandstructure-normalized": 1, "batchdata": 1, "bayesian-optimization": 1, "bcs": 1, "beam-steerable": 1, "beamprofile": 1, "behaviour": 1, "behdad": 1, "best-performing": 1, "bezier": 1, "bi-layer": 1, "bi-level": 1, "bilayer": 1, "binarization": 1, "binarizations": 1, "binarize": 1, "binarized": 1, "binarizing": 1, "bio-medical": 1, "biomolecule": 1, "biomolecules": 1, "biosensor": 1, "biosensors": 1, "bistability": 1, "blue-shifted": 1, "blueshift": 1, "blueviolet": 1, "bo": 1, "booske": 1, "bottom-left": 1, "bottom-right": 1, "boundaryspec": 1, "brillouin": 1, "brute-force": 1, "bs": 1, "built-in": 1, "builtin": 1, "builtins": 1, "butterworth": 1, "bwr": 1, "c-band": 1, "cavity-bordering": 1, "cavity-induced": 1, "center-to-center": 1, "centres": 1, "charge-induced": 1, "charge-to-optical": 1, "chebyshev": 1, "chenchen": 1, "cheung": 1, "chip-level": 1, "chip-to-chip": 1, "chiroptical": 1, "chrostowski": 1, "circuit-level": 1, "claddings": 1, "classifyoctant": 1, "classmethod": 1, "client-side": 1, "clipoperation": 1, "close-up": 1, "clothoid": 1, "cmos-compatible": 1, "cmrrs": 1, "co-located": 1, "co-optimization": 1, "co-polarization": 1, "coaxiallumpedport": 1, "coeffs": 1, "colocate": 1, "colocated": 1, "colocating": 1, "colocation": 1, "colorbar": 1, "colormap": 1, "colours": 1, "comboboxselected": 1, "compensated-single-cell": 1, "complex-conjugate": 1, "complex-valued": 1, "complexpolyslab": 1, "componentmodeler": 1, "computer-aided": 1, "config": 1, "conformally": 1, "continuoussource": 1, "coolwarm": 1, "coords": 1, "cost-effective": 1, "counter-clockwise": 1, "counter-example": 1, "coupler's": 1, "couplerverify": 1, "courant": 1, "cpu-based": 1, "cross-polarization": 1, "cross-polarized": 1, "cross-section": 1, "cross-sectional": 1, "cross-sections": 1, "cross-talk": 1, "crystal-like": 1, "csi": 1, "csv": 1, "currentintegralaxisaligned": 1, "currentsource": 1, "custom-defined": 1, "customcurrentsource": 1, "customfieldsource": 1, "customizable": 1, "custommedium": 1, "custompoleresidue": 1, "customsource": 1, "customsourcetime": 1, "cutting-edge": 1, "darkblue": 1, "darkred": 1, "data-driven": 1, "dataarray": 1, "dataarrays": 1, "dataframe": 1, "dataset": 1, "dataset's": 1, "datasets": 1, "datastructure": 1, "datatypes": 1, "dbr-based": 1, "de-multiplexer": 1, "deep-subwavelength": 1, "def": 1, "deg": 1, "delocalized": 1, "demultiplexer": 1, "demultiplexing": 1, "density-based": 1, "deotare": 1, "der": 1, "derivative-traced": 1, "designregion": 1, "designspace": 1, "desktop-based": 1, "devsim": 1, "diamond-air": 1, "dict": 1, "dielectric-silicon": 1, "diffractionmonitor": 1, "diffractionmonitors": 1, "directivities": 1, "directivity": 1, "directivitymonitor": 1, "directly-measured": 1, "discretization": 1, "discretize": 1, "discretized": 1, "dispersion-free": 1, "dispersionless": 1, "distanceunstructuredgrid": 1, "div": 1, "dl": 1, "docstring": 1, "double-check": 1, "downsample": 1, "downsampled": 1, "downsampling": 1, "dpi": 1, "drift-diffusion": 1, "dropdown": 1, "duty-cycle": 1, "e-field": 1, "e-fields": 1, "e-plane": 1, "early-stop": 1, "easily-usable": 1, "ed-qbic": 1, "eigenmode": 1, "eigenmodes": 1, "eigenvectors": 1, "electro-optic": 1, "electro-optical": 1, "electroabsorption": 1, "electromagnetics": 1, "electrorefraction": 1, "elektronik": 1, "elementwise": 1, "emecoefficientmonitor": 1, "emeexplicitgrid": 1, "emefieldmonitor": 1, "emelengthsweep": 1, "ememodesolvermonitor": 1, "emesimulationdata": 1, "emeuniformgrid": 1, "eps": 1, "epsmon": 1, "erosiondilation": 1, "etc": 1, "ev": 1, "even-symmetric": 1, "ew": 1, "exp": 1, "exploration-exploitation": 1, "ey": 1, "ez-dominant": 1, "ez-polarized": 1, "fabricability": 1, "fabricable": 1, "fabrication-aware": 1, "fabrication-constrained": 1, "fabrication-induced": 1, "fabrication-predicted": 1, "fabry-perot": 1, "false-color": 1, "fano": 1, "far-field": 1, "farfield": 1, "farfieldlocalprojection": 1, "farfieldmon": 1, "farfieldmonitor": 1, "farfieldserverdownsample": 1, "fastdispersionfitter": 1, "fbg-based": 1, "fbg-reflected": 1, "fdtd": 1, "fdtd-creating": 1, "femtoseconds": 1, "fiber-to-chip": 1, "fibres": 1, "fielddata": 1, "fielddataset": 1, "fieldmon": 1, "fieldmonitor": 1, "fieldprofilemon": 1, "fieldprojectionangledata": 1, "fieldprojectionanglemonitor": 1, "fieldprojectioncartesiandata": 1, "fieldprojector": 1, "fieldtimemon": 1, "fieldtimemonitor": 1, "fieldtimemonitors": 1, "figure-of-merit": 1, "fill-factor": 1, "fill-factors": 1, "filled-in": 1, "filterproject": 1, "fine-featured": 1, "fine-tune": 1, "fine-tuned": 1, "fine-tuning": 1, "finite-difference": 1, "finite-size": 1, "finite-sized": 1, "flexcompute": 1, "flexcompute's": 1, "flexcredit": 1, "flexcredits": 1, "fluidspec": 1, "flux-time": 1, "fluxmon": 1, "fluxmonitor": 1, "fluxmonitors": 1, "foms": 1, "forward-propagating": 1, "four-fold": 1, "four-wave": 1, "fourier-transformed": 1, "fps": 1, "free-carrier": 1, "free-form": 1, "free-space": 1, "freq": 1, "freqs": 1, "frequency-dependence": 1, "frequency-domain": 1, "fsss": 1, "ftol": 1, "full-wave": 1, "fullyanisotropicmedium": 1, "functionalities": 1, "fwd": 1, "gaas": 1, "gaussian-like": 1, "gaussianbeam": 1, "gaussianpulse": 1, "gaussians": 1, "gcopt": 1, "gdspy": 1, "general-purpose": 1, "generation-recombination": 1, "geometrygroup": 1, "ghz": 1, "gif": 1, "glass-al": 1, "gouraud": 1, "gouy": 1, "gradient-ascent": 1, "gradient-based": 1, "gradient-descent": 1, "gradient-index": 1, "graphene's": 1, "graphene-based": 1, "gratingefficiency": 1, "grayscale": 1, "grcwa's": 1, "gridspec": 1, "gui": 1, "guis": 1, "h-plane": 1, "hagness": 1, "half-circle": 1, "half-space": 1, "half-sphere": 1, "hamiltonians": 1, "hammerstad": 1, "hammerstad-jensen": 1, "hankel": 1, "hardcode": 1, "harmonic-inversion": 1, "hbn": 1, "hdf": 1, "heatchargesimulation": 1, "heatchargesimulationdata": 1, "hermite-gaussian": 1, "hermitian": 1, "hexagon-shaped": 1, "high-density": 1, "high-frequency": 1, "high-gain": 1, "high-index": 1, "high-performance": 1, "high-power": 1, "high-q": 1, "high-quality": 1, "high-speed": 1, "higher-level": 1, "higher-order": 1, "highest-index": 1, "highest-tm-fraction": 1, "hlim": 1, "hochberg": 1, "hollow-core": 1, "horiba": 1, "hx": 1, "hy": 1, "hyper-geometric": 1, "hyperparameter": 1, "hyperparameters": 1, "ie": 1, "ij": 1, "im": 1, "imag": 1, "impedancecalculator": 1, "impedances": 1, "in-built": 1, "in-coupling": 1, "in-place": 1, "in-plane": 1, "inas": 1, "indexperturbation": 1, "infinite-extent": 1, "information-carrying": 1, "insulator-metal-insulator": 1, "int": 1, "inter-chip": 1, "inter-particle": 1, "intermedium": 1, "interp": 1, "interposers": 1, "interscience": 1, "invdes": 1, "inverse-designed": 1, "inversedesign": 1, "inversedesignmulti": 1, "inversedesignresult": 1, "isel": 1, "isocontour": 1, "isothermalsteadychargedcanalysis": 1, "isotropically": 1, "ivanova": 1, "jakobsen": 1, "jax": 1, "jax-compatible": 1, "jax-traced": 1, "jaxbox": 1, "jaxcustommedium": 1, "jaxmedium": 1, "jaxpolyslab": 1, "jaxsimulation": 1, "jaxsimulationdata": 1, "jaxstructure": 1, "jaxstructures": 1, "jaxstructurestaticgeometry": 1, "jaxstructurestaticmedium": 1, "jens": 1, "jesper": 1, "jian-ming": 1, "jin": 1, "jkn": 1, "jnp": 1, "joannopoulos": 1, "json": 1, "jupyter": 1, "k-space": 1, "kalz": 1, "kirchhoff's": 1, "kirschning": 1, "klayout": 1, "ko": 1, "koster": 1, "kwarg": 1, "kwargs": 1, "l-band": 1, "l-cavity": 1, "laguerre-gaussian": 1, "large-area": 1, "large-scale": 1, "layerrefinementspec": 1, "lcapy": 1, "lcb": 1, "learning-based": 1, "left-hand": 1, "left-handed": 1, "lensed": 1, "level-set": 1, "lian": 1, "light-focusing": 1, "light-line": 1, "light-matter": 1, "lightcoral": 1, "lightning-fast": 1, "limegreen": 1, "linear-biasing": 1, "linearlumpedelement": 1, "linestyle": 1, "linewidth": 1, "lithography-induced": 1, "ln's": 1, "loadtxt": 1, "lobemeasurer": 1, "logspacing": 1, "long-lived": 1, "long-term": 1, "longest-lifetime": 1, "look-up": 1, "loss-resistant": 1, "lossless": 1, "lossy": 1, "low-contrast": 1, "low-cost": 1, "low-index": 1, "low-index-contrast": 1, "low-level": 1, "low-loss": 1, "low-pass": 1, "low-profile": 1, "lower-level": 1, "lower-most": 1, "lumpedport": 1, "lunebug": 1, "luyen": 1, "mach-zehnder": 1, "magneto-optic": 1, "manufacturability": 1, "manufacturable": 1, "mashanovich": 1, "matplotlib": 1, "mattia": 1, "maximising": 1, "maxiter": 1, "maxwell's": 1, "mccutcheon": 1, "medium-index": 1, "mediummediuminterface": 1, "mesher": 1, "meshoverrideregion": 1, "meshoverridestructure": 1, "meshoverridestructures": 1, "meta-atoms": 1, "metadata": 1, "metagrating": 1, "metagratings": 1, "metal-insulator-metal": 1, "metalens": 1, "metalens-assisted": 1, "metalenses": 1, "metamaterial": 1, "metamaterial-based": 1, "metamaterials": 1, "metaparameters": 1, "metasurface": 1, "metasurfaces": 1, "methodbayopt": 1, "methodgenalg": 1, "methodgrid": 1, "methodmontecarlo": 1, "methodparticleswarm": 1, "michaels": 1, "michieletto": 1, "micro-meters": 1, "micro-sized": 1, "microcavities": 1, "microcavity": 1, "microfabricated": 1, "microlens": 1, "microporous": 1, "microring": 1, "microstrip": 1, "microstrips": 1, "microstructured": 1, "mid-infrared": 1, "mid-ir": 1, "mie": 1, "millimeter-wave": 1, "minimalistic": 1, "minimum-switch": 1, "misc": 1, "mm": 1, "mmis": 1, "mode-mixing": 1, "modefieldmonitor": 1, "modemon": 1, "modemonitor": 1, "modemonitors": 1, "modesolver": 1, "modesolvermonitor": 1, "modesolvers": 1, "modesource": 1, "modespec": 1, "modulo": 1, "monitordata": 1, "mono-exponentially": 1, "monostatic": 1, "mos": 1, "multi-chip": 1, "multi-design": 1, "multi-dimensional": 1, "multi-freq": 1, "multi-frequency": 1, "multi-functional": 1, "multi-layer": 1, "multi-mode": 1, "multi-objective": 1, "multi-octave": 1, "multi-physical": 1, "multi-physics": 1, "multi-port": 1, "multi-simulation": 1, "multimaterial": 1, "multimode": 1, "multiphysics": 1, "multiphysicsmedium": 1, "multipole": 1, "multipoleexpansion": 1, "mw": 1, "n-dimensional": 1, "n-k": 1, "namespace": 1, "nano-strips": 1, "nanoantenna": 1, "nanoantennas": 1, "nanocavity": 1, "nanodisk": 1, "nanodisks": 1, "nanofabrication": 1, "nanolasing": 1, "nanoparticle": 1, "nanoparticles": 1, "nanophotonic": 1, "nanophotonics": 1, "nanopillars": 1, "nanoresonator": 1, "nanoresonators": 1, "nanorod": 1, "nanorods": 1, "nanoscale": 1, "nanosphere": 1, "nanostrip": 1, "nanostrips": 1, "nanostructure": 1, "nanostructure's": 1, "nanostructured": 1, "nanostructures": 1, "nanowires": 1, "narrow-band": 1, "narrowband": 1, "natively": 1, "ndarray": 1, "near-complete": 1, "near-field": 1, "near-fields": 1, "near-infrared": 1, "near-optimal": 1, "near-to-far": 1, "nearfield": 1, "nedeljkovic": 1, "neumann": 1, "next-generation": 1, "next-order": 1, "nh": 1, "nik": 1, "niobate": 1, "nitrogen-vacancy": 1, "nm": 1, "non-adiabatic": 1, "non-conformal": 1, "non-conservative": 1, "non-destructive": 1, "non-differentiable": 1, "non-dispersive": 1, "non-downsampled": 1, "non-etched": 1, "non-experts": 1, "non-exponentially": 1, "non-faid-based": 1, "non-hermitian": 1, "non-linear": 1, "non-negative": 1, "non-optimized": 1, "non-periodic": 1, "non-physical": 1, "non-radiative": 1, "non-taper": 1, "non-touching": 1, "non-translational": 1, "non-uniform": 1, "non-union": 1, "non-unionized": 1, "non-unity": 1, "non-zero": 1, "nondispersive": 1, "nonlinearities": 1, "nonlinearly": 1, "nonzero-size": 1, "normal-incidence": 1, "ns": 1, "numpy": 1, "nxn": 1, "o-band": 1, "o-th": 1, "octante": 1, "odd-symmetric": 1, "off-normal": 1, "off-resonance": 1, "offsetfactor": 1, "ol": 1, "ole": 1, "on-chip": 1, "one-dimensional": 1, "one-half": 1, "one-third": 1, "open-circuited": 1, "open-source": 1, "opls": 1, "optax": 1, "optica": 1, "optimizable": 1, "ostermann": 1, "out-coupling": 1, "out-of-plane": 1, "overfitting": 1, "p-polarized": 1, "palik": 1, "parag": 1, "parallel-strip": 1, "parameterany": 1, "parameterfloat": 1, "parameterint": 1, "parameterization": 1, "parameterize": 1, "parameterized": 1, "parameterizes": 1, "paraview": 1, "parity-time": 1, "partially-etched": 1, "passband": 1, "pdk's": 1, "pec-like": 1, "periodicities": 1, "permittivities": 1, "perturbationmedium": 1, "phase-matching": 1, "phc": 1, "phonons": 1, "photodetector": 1, "photodetectors": 1, "photon-detector": 1, "photonforge": 1, "photonics": 1, "photothermal": 1, "photovoltaics": 1, "phys": 1, "pin-junction": 1, "pixel-by-pixel": 1, "pixelated": 1, "pixellated": 1, "piyg": 1, "pkl": 1, "plane-wave": 1, "planewave": 1, "plasmon": 1, "plasmonic": 1, "plasmonics": 1, "plug-and-play": 1, "pmc-like": 1, "pmls": 1, "pn": 1, "png": 1, "point-wise": 1, "pointdipole": 1, "polaritons": 1, "polarization-rotating": 1, "polarizers": 1, "pole-residue": 1, "poleresidue": 1, "polyslab": 1, "polyslabs": 1, "popsize": 1, "positive-x": 1, "post-correction": 1, "post-process": 1, "post-processes": 1, "post-processing": 1, "postprocess": 1, "postprocessing": 1, "power-current": 1, "power-voltage": 1, "powercoords": 1, "poynting": 1, "pozar": 1, "pp": 1, "pre": 1, "pre-define": 1, "pre-defined": 1, "pre-determined": 1, "pre-optimized": 1, "pre-print": 1, "pre-processing": 1, "prepregs": 1, "preprocessing": 1, "previously-set": 1, "previously-specified": 1, "printed-circuit": 1, "programmatically": 1, "pseudo-colors": 1, "pseudo-vector": 1, "psrs": 1, "purpose-specific": 1, "py": 1, "pydantic": 1, "pygad": 1, "pyswarms": 1, "python-based": 1, "pytorch": 1, "q-factor": 1, "q-factors": 1, "q-te": 1, "q-tm": 1, "qbic": 1, "qu": 1, "quadrupoles": 1, "quasi-bound": 1, "quasi-digital": 1, "quasi-guided": 1, "quasi-periodic": 1, "quasi-static": 1, "quasi-te": 1, "quasi-tem": 1, "quasi-tm": 1, "quasi-uniform": 1, "qubit": 1, "qubits": 1, "quickstart": 1, "rad": 1, "radio-frequency": 1, "radiuspenalty": 1, "ragani": 1, "ramp-up": 1, "ramping": 1, "rcond": 1, "rdbu": 1, "re-compute": 1, "re-entering": 1, "re-index": 1, "re-interpolate": 1, "re-project": 1, "re-projected": 1, "re-projection": 1, "re-run": 1, "re-set": 1, "re-simulate": 1, "re-simulation": 1, "readonly": 1, "real-time": 1, "real-valued": 1, "rectangulardielectric": 1, "refactored": 1, "reflectarray": 1, "reflectarrays": 1, "reflectiveless": 1, "refractiveindex": 1, "reproj": 1, "resampled": 1, "resimulate": 1, "resonancefinder": 1, "right-hand": 1, "right-handed": 1, "ring-shaped": 1, "rmax": 1, "rmin": 1, "ro": 1, "robustpath": 1, "romil": 1, "rotator-splitter": 1, "round-trip": 1, "roundoff": 1, "royalblue": 1, "runtime": 1, "runtimes": 1, "rxy-": 1, "ryy-": 1, "rz": 1, "s-band": 1, "s-bend": 1, "s-bends": 1, "s-matrix": 1, "s-parameter": 1, "s-parameters": 1, "s-polarized": 1, "s-shape": 1, "sandybrown": 1, "savefig": 1, "scale-invariant": 1, "scattered-field": 1, "scatterplot": 1, "scikit-rf": 1, "scipy": 1, "sel": 1, "self-imaging": 1, "self-intersecting": 1, "self-intersection": 1, "self-intersections": 1, "semi-analytical": 1, "semi-circles": 1, "semi-infinite": 1, "semi-lens": 1, "semiconductormedium": 1, "separately-defined": 1, "sequentially-rotated": 1, "server-side": 1, "serverless": 1, "set-up": 1, "seven-element": 1, "several-fold": 1, "shanhui": 1, "shape-optimized": 1, "si-gan-si": 1, "si-resonator-delta-": 1, "side-by-side": 1, "siganc": 1, "silicon-on-insulator": 1, "simulationdata": 1, "simulationoutputs": 1, "simulationparameters": 1, "sine-like": 1, "single-cell": 1, "single-feed": 1, "single-layer": 1, "single-mode": 1, "single-photon": 1, "single-pole": 1, "sio": 1, "skyblue": 1, "small-area": 1, "smartphones": 1, "soi-sbse": 1, "solid-state": 1, "solver-based": 1, "soref": 1, "sourcetimes": 1, "space-time": 1, "spatialdataarray": 1, "spatialfiltering": 1, "spatially-varying": 1, "spatio-temporal": 1, "speed-up": 1, "sphere-cone": 1, "spherercs": 1, "sphp": 1, "sphps": 1, "splitter-rotator": 1, "spot-size": 1, "sqrt": 1, "srgb": 1, "sss": 1, "stack-up": 1, "stackings": 1, "staircase-like": 1, "stand-alone": 1, "star-shaped": 1, "state-of-the-art": 1, "steady-state": 1, "steadyfreecarrierdata": 1, "step-by-step": 1, "stopband": 1, "str": 1, "strip-to-rib": 1, "strip-to-slot": 1, "structs": 1, "structure-setup": 1, "structureboundary": 1, "sub-functions": 1, "sub-optimal": 1, "sub-pixel": 1, "sub-polyslabs": 1, "sub-wavelength": 1, "subpixel": 1, "subpixel-smoothing": 1, "subwavelength": 1, "superstrate": 1, "swg": 1, "swgs": 1, "sx": 1, "symmetry-breaking": 1, "sz": 1, "taflove": 1, "taoufik": 1, "td": 1, "te-like": 1, "te-polarized": 1, "telecom": 1, "terminalcomponentmodeler": 1, "testgc": 1, "thermal-ring": 1, "thermo-optic": 1, "thickness-free": 1, "thin-film": 1, "third-order": 1, "threadpool": 1, "three-dimensional": 1, "thresholding": 1, "thz": 1, "tightly-packed": 1, "time-average": 1, "time-averaged": 1, "time-dependent": 1, "time-domain": 1, "time-efficient": 1, "time-harmonic": 1, "timemonitor": 1, "tkinter": 1, "tm-like": 1, "tm-polarized": 1, "tmm": 1, "top-level": 1, "topologydesignregion": 1, "total-field": 1, "touhami": 1, "transformative": 1, "translationally": 1, "trianglemesh": 1, "trimesh": 1, "tristimulus": 1, "tuple": 1, "tuples": 1, "two-dimensional": 1, "two-layer": 1, "two-photon": 1, "txt": 1, "ucb": 1, "ultra-compact": 1, "ultra-slim": 1, "ultra-thin": 1, "ultracompact": 1, "ultrasharp": 1, "un-corrected": 1, "un-etched": 1, "un-normalized": 1, "uncladded": 1, "uncomment": 1, "uncomputed": 1, "und": 1, "unetched": 1, "uni-directional": 1, "uniformcurrentsource": 1, "unitarity": 1, "unnormalized": 1, "unphysical": 1, "untapered": 1, "up-conversion": 1, "url": 1, "use-case": 1, "user-defined": 1, "user-friendly": 1, "user-input": 1, "user-specified": 1, "user-supplied": 1, "util": 1, "v-antenna": 1, "vector-jacobian": 1, "vectorial": 1, "versa": 1, "visualise": 1, "viz": 1, "vlim": 1, "vmax": 1, "vol": 1, "voltage-current": 1, "vortexmetasurface": 1, "voxel": 1, "vtk": 1, "vtkunstructuredgrid": 1, "vtu": 1, "waals": 1, "wafer-scale": 1, "walkthrough": 1, "water-tight": 1, "water-tightness": 1, "waveguide's": 1, "waveguide-cavity": 1, "waveguide-only": 1, "waveguide-to-ring": 1, "waveguiding": 1, "wavelength-dependent": 1, "wavevector": 1, "wavevectors": 1, "web-based": 1, "webapi": 1, "well-approximated": 1, "well-binarized": 1, "well-defined": 1, "well-established": 1, "well-known": 1, "well-matched": 1, "well-suited": 1, "wg": 1, "wga": 1, "whispering-gallery": 1, "wideband": 1, "widely-used": 1, "wireframe": 1, "workflow": 1, "wrap-around": 1, "wvgout": 1, "x-": 1, "x-aligned": 1, "x-axis": 1, "x-coordinate": 1, "x-coordinates": 1, "x-direction": 1, "x-distance": 1, "x-minus": 1, "x-oriented": 1, "x-plus": 1, "x-polarized": 1, "x-position": 1, "x-y": 1, "x-z": 1, "xarray": 1, "xarray's": 1, "xmax": 1, "xmin": 1, "xp": 1, "xr": 1, "xx": 1, "xy": 1, "xy-plane": 1, "xyfieldmon": 1, "xyz": 1, "xz": 1, "y-": 1, "y-axis": 1, "y-branch": 1, "y-coordinate": 1, "y-direction": 1, "y-junction": 1, "y-junctions": 1, "y-minus": 1, "y-oriented": 1, "y-plus": 1, "y-polarized": 1, "y-z": 1, "yablonovitch": 1, "yagi-uda": 1, "yaml": 1, "yanik": 1, "yee": 1, "yee-grid": 1, "ymax": 1, "yurui": 1, "yy": 1, "yz": 1, "z-aligned": 1, "z-axis": 1, "z-boundaries": 1, "z-bounds": 1, "z-component": 1, "z-coordinate": 1, "z-direction": 1, "z-directions": 1, "z-normal": 1, "z-planes": 1, "z-size": 1, "zemax": 1, "zero-contour": 1, "zero-level": 1, "zero-mode": 1, "zero-phase": 1, "zero-phonon": 1, "zero-size": 1, "zero-th": 1, "zhang": 1, "zoom-in": 1, "zoomed-in": 1, "zz": 1}
\ No newline at end of file
diff --git a/spellcheck.py b/spellcheck.py
index a2d23cec..9ed88d60 100755
--- a/spellcheck.py
+++ b/spellcheck.py
@@ -1,99 +1,441 @@
-#!/usr/bin/env python3
+#!/usr/bin/env -S uv run --script
+#
+# /// script
+# requires-python = ">=3.12"
+# dependencies = ["nbformat", "pyspellchecker", "google-genai", "pydantic", "tqdm"]
+# ///
import argparse
+import ast
import concurrent.futures
+import io
+import logging
import os
import re
-import subprocess
import sys
+import tokenize
+from collections import Counter
+from enum import Enum
from typing import Optional
+import nbformat
+from google import genai
+from google.genai import types
+from pydantic import BaseModel
+from spellchecker import SpellChecker
+from tqdm import tqdm
+
+CUSTOM_DICT_PATH = "custom_dictionary.json"
+
+
+class Decision(str, Enum):
+ ADD_TO_VOCABULARY = "add_to_vocabulary"
+ IS_A_MISSPELLING = "is_a_misspelling"
+
+
+class SpellcheckDecision(BaseModel):
+ decision: Decision
+ reasoning: str
+ corrected_word: Optional[str] = None
+
+
+def load_custom_words(spell: SpellChecker):
+ """Loads words from a custom dictionary file into the SpellChecker instance."""
+ if os.path.exists(CUSTOM_DICT_PATH):
+ spell.word_frequency.load_dictionary(CUSTOM_DICT_PATH)
+
+
+def add_words_to_custom_dictionary(new_words: set[str]):
+ """Adds words to the custom dictionary file."""
+ temp_spell = SpellChecker(language=None)
+ if os.path.exists(CUSTOM_DICT_PATH):
+ temp_spell.word_frequency.load_dictionary(CUSTOM_DICT_PATH)
+
+ for word in new_words:
+ temp_spell.word_frequency.add(word)
+
+ temp_spell.export(CUSTOM_DICT_PATH, gzipped=False)
+ logging.info(f"Updated {CUSTOM_DICT_PATH} with {len(new_words)} new word(s).")
+
+
+def run_manual_interactive_mode(notebooks: list[str], base_spell: SpellChecker):
+ """Runs the spell checker in interactive mode manually."""
+ for notebook in notebooks:
+ logging.info(f"Checking notebook: {notebook}")
+ texts, all_identifiers = extract_text_from_notebook(notebook)
+ misspelled_lines = find_misspelled_in_notebook_texts(texts, all_identifiers, base_spell)
+
+ if not misspelled_lines:
+ logging.info(f"No spelling errors found in {notebook}.")
+ continue
+
+ unique_misspelled = {} # {lower_word: (original_word, context)}
+ for cell_num, line_num, source_line, words in misspelled_lines:
+ for word in words:
+ if word.lower() not in unique_misspelled:
+ context = f"Found in Cell {cell_num}, Line {line_num}: {source_line.strip()}"
+ unique_misspelled[word.lower()] = (word, context)
+
+ for word_lower, (original_word, context) in sorted(unique_misspelled.items()):
+ print(f"\nMisspelled word: '{original_word}'")
+ print(context)
+ answer = input(f"Add '{original_word}' to dictionary? [Y/n/q] (yes/no/quit) ").lower()
+
+ if answer.strip() == "" or answer == "y":
+ add_words_to_custom_dictionary({word_lower})
+ base_spell.word_frequency.add(word_lower)
+ print(f"Added '{word_lower}' to dictionary for this session.")
+ elif answer == "q":
+ print("Quitting interactive session.")
+ return
+ else: # 'n'
+ print(f"Skipping '{word_lower}'.")
+
+ print("\nManual interactive session finished.")
+
+
+def run_llm_interactive_mode(notebooks: list[str], base_spell: SpellChecker):
+ """Runs the spell checker in interactive mode using an LLM to make decisions."""
+ api_key = os.environ.get("GEMINI_API_KEY")
+ if not api_key:
+ logging.error("GEMINI_API_KEY environment variable not set for interactive LLM mode.")
+ sys.exit(1)
+
+ client = genai.Client(api_key=api_key)
+ model = "gemini-2.5-flash-lite-preview-06-17"
+
+ for notebook in notebooks:
+ logging.info(f"Checking notebook: {notebook}")
+ texts, all_identifiers = extract_text_from_notebook(notebook)
+ misspelled_lines = find_misspelled_in_notebook_texts(texts, all_identifiers, base_spell)
+
+ if not misspelled_lines:
+ logging.info(f"No spelling errors found in {notebook}.")
+ continue
+
+ unique_misspelled = {} # {lower_word: (original_word, context)}
+ for cell_num, line_num, source_line, words in misspelled_lines:
+ for word in words:
+ if word.lower() not in unique_misspelled:
+ context = f"Found in Cell {cell_num}, Line {line_num}: {source_line.strip()}"
+ unique_misspelled[word.lower()] = (word, context)
+
+ logging.info(
+ f"Found {len(unique_misspelled)} potential misspellings in {os.path.basename(notebook)}. Checking with LLM..."
+ )
+ for word_lower, (original_word, context) in tqdm(
+ sorted(unique_misspelled.items()), desc=f"Checking {os.path.basename(notebook)}"
+ ):
+ prompt = f"""You are an expert spell checker. Your task is to analyze a word and determine if it's a misspelling or a valid word that should be added to a custom dictionary.
+The word to check is: "{original_word}"
+It was found in the notebook: "{os.path.basename(notebook)}"
+Here is the line of context: "{context}"
+
+Analyze the word in its context. Consider that it might be a technical term, a variable name, a product name, or a non-English word.
+
+Based on your analysis, decide one of the following:
+1. 'add_to_vocabulary': The word is correct in this context (e.g., technical term, name, code identifier) and should be added to the dictionary.
+2. 'is_a_misspelling': The word is a misspelling.
+
+If you decide it's a misspelling, provide a correction.
+
+Provide your response in JSON format matching this Pydantic schema:
+class Decision(str, Enum):
+ ADD_TO_VOCABULARY = "add_to_vocabulary"
+ IS_A_MISSPELLING = "is_a_misspelling"
+
+class SpellcheckDecision(BaseModel):
+ decision: Decision
+ reasoning: str
+ corrected_word: Optional[str] = None
+"""
+ try:
+ response = client.models.generate_content(
+ model=model,
+ contents=prompt,
+ config=types.GenerateContentConfig(
+ response_mime_type="application/json", response_schema=SpellcheckDecision
+ ),
+ )
+ decision = SpellcheckDecision.model_validate_json(response.text)
+
+ if decision.decision == Decision.ADD_TO_VOCABULARY:
+ add_words_to_custom_dictionary({word_lower})
+ base_spell.word_frequency.add(word_lower)
+ logging.debug(
+ f"LLM added '{word_lower}' to vocabulary. Reasoning: {decision.reasoning}"
+ )
+ elif decision.decision == Decision.IS_A_MISSPELLING:
+ logging.warning(
+ f"Misspelled: '{original_word}' in {notebook}. "
+ f"Suggestion: {decision.corrected_word or 'None'}. "
+ f"Reasoning: {decision.reasoning}"
+ )
+ except Exception as e:
+ logging.error(f"Error checking word '{word_lower}' with LLM: {e}")
+
+ print("\nInteractive LLM session finished.")
+
+
+def extract_identifiers_from_code(source: str) -> set[str]:
+ """Extracts and splits identifiers from a python code string."""
+ identifiers = set()
+ raw_identifiers = set()
+ try:
+ tree = ast.parse(source)
+ for node in ast.walk(tree):
+ if isinstance(node, ast.Name):
+ raw_identifiers.add(node.id)
+ elif isinstance(node, ast.Attribute):
+ raw_identifiers.add(node.attr)
+ elif isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef, ast.ClassDef)):
+ raw_identifiers.add(node.name)
+ elif isinstance(node, ast.arg):
+ raw_identifiers.add(node.arg)
+ except SyntaxError:
+ pass # ignore code that can't be parsed
+
+ for identifier in raw_identifiers:
+ # Split by snake_case and camelCase
+ words = re.sub(r"_", " ", identifier)
+ words = re.sub(r"([a-z])([A-Z])", r"\1 \2", words)
+ identifiers.update(word.lower() for word in words.split() if word)
+
+ return identifiers
+
+
+def extract_text_from_code(source: str) -> list[tuple[int, str]]:
+ """Extracts strings and comments from a python code string."""
+ text_nodes = []
+ try:
+ tokens = tokenize.generate_tokens(io.StringIO(source).readline)
+ for toknum, tokval, (srow, _), _, _ in tokens:
+ if toknum == tokenize.STRING:
+ try:
+ text_nodes.append((srow, ast.literal_eval(tokval)))
+ except (ValueError, SyntaxError, MemoryError, TypeError):
+ text_nodes.append((srow, tokval))
+ elif toknum == tokenize.COMMENT:
+ text_nodes.append((srow, tokval.lstrip("#").strip()))
+ except (tokenize.TokenError, IndentationError, SyntaxError):
+ pass
+ return text_nodes
+
+
+def extract_text_from_notebook(
+ notebook_path: str,
+) -> tuple[list[tuple[int, int, str, str]], set[str]]:
+ """
+ Extracts markdown text, comments, and strings from a notebook.
+ Also extracts all python identifiers from code cells.
+ Returns a tuple of:
+ - list of tuples (cell_num, line_num, text, source_line).
+ - set of all python identifiers found.
+ """
+ try:
+ notebook = nbformat.read(notebook_path, as_version=4)
+ except Exception as e:
+ logging.warning(f"Could not read or parse notebook '{notebook_path}': {e}")
+ return [], set()
+
+ texts = []
+ all_identifiers = set()
+ for cell_index, cell in enumerate(notebook.cells):
+ cell_num = cell_index + 1
+ source_lines = cell.source.splitlines()
+ if cell.cell_type == "markdown":
+ for line_num, line in enumerate(source_lines):
+ texts.append((cell_num, line_num + 1, line, line))
+ elif cell.cell_type == "code":
+ all_identifiers.update(extract_identifiers_from_code(cell.source))
+ code_texts = extract_text_from_code(cell.source)
+ for line_num, text in code_texts:
+ source_line = source_lines[line_num - 1] if line_num <= len(source_lines) else ""
+ texts.append((cell_num, line_num, text, source_line))
+
+ return texts, all_identifiers
+
+
+def get_all_words_from_notebook(notebook_path: str) -> set[str]:
+ """Extracts all unique words from a notebook."""
+ words = set()
+ texts, identifiers = extract_text_from_notebook(notebook_path)
+ words.update(identifiers)
+
+ for _, _, text, _ in texts:
+ if "/" in text or "\\" in text or "http" in text:
+ continue
+ found_words = re.findall(r"\b[a-zA-Z-']+\b", text)
+ words.update(w.lower() for w in found_words)
+ return words
+
+
+def build_reference_word_set(reference_notebooks: list[str], threshold: int) -> set[str]:
+ """Builds a set of words that appear in at least 'threshold' reference notebooks."""
+ if not reference_notebooks or threshold <= 0:
+ return set()
+
+ logging.info(f"Building reference dictionary from {len(reference_notebooks)} notebooks...")
+
+ word_counts = Counter()
+ with concurrent.futures.ProcessPoolExecutor() as executor:
+ future_to_notebook = {
+ executor.submit(get_all_words_from_notebook, nb): nb for nb in reference_notebooks
+ }
+ for future in concurrent.futures.as_completed(future_to_notebook):
+ try:
+ words_in_notebook = future.result()
+ # Each word is counted once per notebook
+ word_counts.update(words_in_notebook)
+ except Exception as exc:
+ notebook = future_to_notebook[future]
+ logging.warning(f"Could not process reference notebook {notebook}: {exc}")
+
+ reference_words = {word for word, count in word_counts.items() if count >= threshold}
+ logging.info(f"Found {len(reference_words)} words meeting the threshold of {threshold}.")
+ return reference_words
+
+
+def find_misspelled_in_notebook_texts(
+ texts: list[tuple[int, int, str, str]],
+ all_identifiers: set[str],
+ base_spell: SpellChecker,
+) -> list[tuple[int, int, str, list[str]]]:
+ """
+ Finds misspelled words in texts from a notebook.
+ Returns a list of tuples: (cell_num, line_num, source_line, list_of_misspelled_words).
+ """
+ notebook_spell = SpellChecker(language=None)
+ notebook_spell.word_frequency.load_words(base_spell.word_frequency.words())
+ notebook_spell.word_frequency.load_words([word.lower() for word in all_identifiers])
+
+ misspelled_lines_info = []
+
+ for cell_num, line_num, text, source_line in texts:
+ if "/" in text or "\\" in text or "http" in text:
+ continue
+
+ words_in_parens = {w.lower() for w in re.findall(r"\(([a-zA-Z\-']+)\)", text)}
+ words = re.findall(r"\b[a-zA-Z-']+\b", text)
+ words_to_check = [w for w in words if w.lower() not in words_in_parens and not w.isupper()]
+
+ if not words_to_check:
+ continue
+
+ # Check lowercase words, as pyspellchecker is case-insensitive
+ misspelled_lower = notebook_spell.unknown(w.lower() for w in words_to_check)
+
+ if misspelled_lower:
+ # Find original cased words that are misspelled
+ misspelled_original_case = sorted(
+ {w for w in words_to_check if w.lower() in misspelled_lower}
+ )
+ if misspelled_original_case:
+ misspelled_lines_info.append(
+ (cell_num, line_num, source_line, misspelled_original_case)
+ )
+
+ return misspelled_lines_info
+
def get_relative_path(notebook: str) -> str:
"""Get the relative path of the notebook from the current directory."""
return os.path.relpath(notebook, os.getcwd())
-def check_spelling(notebook: str) -> Optional[str]:
+def check_spelling(notebook: str, base_spell: SpellChecker) -> Optional[str]:
"""
- Check spelling in a notebook.
+ Check spelling in a notebook using pyspellchecker.
Returns:
A formatted Markdown string containing spelling errors for the notebook,
- using a code block to show codespell's output, or None if no errors were found.
+ or None if no errors were found.
"""
rel_path = get_relative_path(notebook)
- error_message_block = None
try:
- with open(notebook, encoding="utf-8") as f:
- content = f.read()
-
- # nbstripout to remove outputs
- nbstripout_proc = subprocess.run(
- ["uvx", "nbstripout"],
- input=content,
- capture_output=True,
- text=True,
- check=True,
- )
+ texts, all_identifiers = extract_text_from_notebook(notebook)
+ misspelled_lines = find_misspelled_in_notebook_texts(texts, all_identifiers, base_spell)
- # remove image tags with base64 data
- stripped_content = re.sub(
- r'
]*>|
]*/>',
- "",
- nbstripout_proc.stdout,
- flags=re.DOTALL,
- )
+ if not misspelled_lines:
+ return None
- # remove any remaining base64 strings that might appear without proper HTML tags
- stripped_content = re.sub(
- r"data:image/[^;]+;base64,[A-Za-z0-9+/=]+",
- "",
- stripped_content,
- flags=re.DOTALL,
- )
+ error_strings = []
+ for cell_num, line_num, source_line, misspelled_words in misspelled_lines:
+ misspelled_info = ", ".join(f"'{w}'" for w in misspelled_words)
+ error_strings.append(
+ f"Cell {cell_num}, Line {line_num}: {misspelled_info}\n > {source_line.strip()}"
+ )
- codespell_proc = subprocess.run(
- ["uvx", "codespell", "-"],
- input=stripped_content,
- capture_output=True,
- text=True,
- check=False, # codespell exits non-zero on errors, which is expected
- )
+ if error_strings:
+ error_details = "\n".join(error_strings)
+ return f"**{rel_path}**:\n```\n{error_details}\n```"
- # filter codespell's config file lines
- output_lines = []
- for line in codespell_proc.stdout.splitlines():
- if line.strip().startswith("Used config files:") or re.match(
- r"^\s+\d+:\s+\.codespellrc", line
- ):
- continue
- output_lines.append(line.replace("-:", "Line ", 1))
-
- filtered_output = "\n".join(output_lines).strip()
-
- if filtered_output:
- error_message_block = f"**{rel_path}**:\n```\n{filtered_output}\n```"
-
- except FileNotFoundError:
- error_message_block = f"**{rel_path}**: Error - File not found."
- except subprocess.CalledProcessError as e:
- cmd_str = " ".join(e.cmd)
- error_message_block = (
- f"**{rel_path}**: Error running command `{cmd_str}`:\n```\n{e.stderr}\n```"
- )
except Exception as e:
- error_message_block = f"**{rel_path}**: An unexpected error occurred:\n```\n{str(e)}\n```"
+ logging.error(f"An unexpected error processing notebook {rel_path}", exc_info=True)
+ return f"**{rel_path}**: An unexpected error with pyspellchecker:\n```\n{str(e)}\n```"
- return error_message_block
+ return None
def main():
parser = argparse.ArgumentParser(description="Check spelling in Jupyter notebooks")
parser.add_argument("notebooks", nargs="+", help="List of notebook files to check")
+ parser.add_argument(
+ "-i",
+ "--interactive",
+ action="store_true",
+ help="Run in interactive mode to add words to the dictionary.",
+ )
+ parser.add_argument(
+ "--llm",
+ action="store_true",
+ help="Use LLM for decision making in interactive mode. Requires GEMINI_API_KEY.",
+ )
+ parser.add_argument(
+ "--reference-notebooks",
+ nargs="+",
+ default=None,
+ help="Reference notebooks to build a dictionary of common words. If not provided, all other notebooks in the current directory are used.",
+ )
+ parser.add_argument(
+ "--threshold",
+ type=int,
+ default=3,
+ help="Minimum number of occurrences in reference notebooks for a word to be ignored.",
+ )
+ parser.add_argument(
+ "-v", "--verbose", action="store_true", help="Enable verbose logging output."
+ )
args = parser.parse_args()
+ log_level = logging.DEBUG if args.verbose else logging.INFO
+ logging.basicConfig(level=log_level, format="%(levelname)s: %(message)s")
+
+ reference_notebooks = args.reference_notebooks
+ if reference_notebooks is None:
+ logging.info(
+ "No reference notebooks provided, using all other notebooks in the current directory as reference."
+ )
+ all_notebooks_in_dir = [f for f in os.listdir(".") if f.endswith(".ipynb")]
+ notebooks_to_check_set = set(args.notebooks)
+ reference_notebooks = [
+ nb for nb in all_notebooks_in_dir if nb not in notebooks_to_check_set
+ ]
+
+ reference_words = build_reference_word_set(reference_notebooks, args.threshold)
+
+ base_spell = SpellChecker()
+ load_custom_words(base_spell)
+ base_spell.word_frequency.load_words(reference_words)
+
+ if args.interactive:
+ if args.llm:
+ run_llm_interactive_mode(args.notebooks, base_spell)
+ else:
+ run_manual_interactive_mode(args.notebooks, base_spell)
+ sys.exit(0)
+
all_errors: list[str] = []
num_files_processed = 0
num_files_with_errors = 0
@@ -102,7 +444,7 @@ def main():
futures = []
with concurrent.futures.ProcessPoolExecutor() as executor:
for notebook in args.notebooks:
- futures.append(executor.submit(check_spelling, notebook))
+ futures.append(executor.submit(check_spelling, notebook, base_spell))
for future in concurrent.futures.as_completed(futures):
num_files_processed += 1
@@ -119,7 +461,7 @@ def main():
else:
num_files_with_errors += 1
except Exception as exc:
- print(f"An unexpected error occurred processing a task: {exc}", file=sys.stderr)
+ logging.error("An unexpected error occurred processing a task", exc_info=True)
num_files_with_processing_errors += 1
all_errors.append(
f"**Unknown File**: An unexpected error occurred during processing:\n```\n{exc}\n```"