cherab · jacklovell · May 28, 2026 · Mar 11, 2026 · Mar 11, 2026 · Mar 21, 2026
diff --git a/cherab/openadas/parse/adf15.py b/cherab/openadas/parse/adf15.py
@@ -17,27 +17,52 @@
 # under the Licence.
 
 import re
+
 import numpy as np
-from cherab.core.atomic import hydrogen, Element
+
+from cherab.core.atomic import Element, hydrogen
 from cherab.core.utility import RecursiveDict
 from cherab.core.utility.conversion import Cm3ToM3, PerCm3ToPerM3
 
+# Compiled regex patterns for ADF15 file parsing
+_ADF_HEADER_MATCH = re.compile(r"^\s*(\d*) {4}/(.*)/?\s*$")
+_PEC_INDEX_HEADER_MATCH_STANDARD = re.compile(r"^C\s*ISEL\s*(?:WAVELENGTH|WVLEN\(A\))\s*TRANSITION\s*TYPE", re.IGNORECASE)
+_PEC_HYDROGEN_TRANSITION_MATCH = re.compile(r"^C\s*([0-9]*)\.\s*([0-9]*\.[0-9]*)\s*N=\s*([0-9]*) - N=\s*([0-9]*)\s*([A-Z]*)", re.IGNORECASE)
+_PEC_FULL_TRANSITION_MATCH = re.compile(r"^[cC]\s*([0-9]*)\.?\s*([0-9]*\.[0-9]*)\s*([0-9]*)[\(\)\.0-9\s]*-\s*([0-9]*)[\(\)\.0-9\s]*([A-Z]*)", re.IGNORECASE)
+_CONFIGURATION_HEADER_MATCH = re.compile(r"^C\s*(?:lv\s+)?Configuration\s*\(2S\+1\)L\(w-1/2\)\s*Energy\s*\(cm(?:\*\*|\^)-1\)\s*$", re.IGNORECASE)
+_CONFIGURATION_STRING_MATCH = re.compile(
+    r"^[cC]\s*([0-9]+)\s*"
+    r"((?:[0-9][SPDFG][0-9](?:\s+[0-9][SPDFG][0-9])*)|(?:[0-9A-Z]+))\s*"
+    r"\(([0-9]*\.?[0-9]+)\)"
+    r"\s*([0-9]+)"
+    r"\(\s*([0-9]*\.?[0-9]+)\)",
+    re.IGNORECASE,
+)
+_WAVELENGTH_MATCH = re.compile(r"^\s*[0-9]*\.[0-9]* ?a?\s+[0-9]+\s+[0-9]+.*?/isel *= *[0-9]+$", re.IGNORECASE)
+_BLOCK_ID_MATCH = re.compile(r"^\s*[0-9]*\.[0-9]* ?a?\s*([0-9]*)\s*([0-9]*).*/type *= *([a-zA-Z]*).*/isel *= * ([0-9]*)$", re.IGNORECASE)
 
 _L_LOOKUP = {
-    0: 'S',
-    1: 'P',
-    2: 'D',
-    3: 'F',
-    4: 'G',
-    5: 'H',
-    6: 'I',
-    7: 'K',
-    8: 'L',
-    9: 'M',
-    10: 'N',
-    11: 'O',
-    12: 'Q',
-    13: 'R',
+    0: "S",
+    1: "P",
+    2: "D",
+    3: "F",
+    4: "G",
+    5: "H",
+    6: "I",
+    7: "K",
+    8: "L",
+    9: "M",
+    10: "N",
+    11: "O",
+    12: "Q",
+    13: "R",
+    14: "T",
+    15: "U",
+    16: "V",
+    17: "W",
+    18: "X",
+    19: "Y",
+    20: "Z",
 }
 
 
@@ -52,26 +77,25 @@ def parse_adf15(element, charge, adf_file_path, header_format=None):
     """
 
     if not isinstance(element, Element):
-        raise TypeError('The element must be an Element object.')
+        raise TypeError("The element must be an Element object.")
 
     charge = int(charge)
 
     with open(adf_file_path, "r") as file:
-
         # for check header line
         header = file.readline()
-        if not re.match(r'^\s*(\d*) {4}/(.*)/?\s*$', header):
-            raise ValueError('The specified path does not point to a valid ADF15 file.')
+        if not _ADF_HEADER_MATCH.match(header):
+            raise ValueError("The specified path does not point to a valid ADF15 file.")
 
         # scrape transition information and wavelength
         # use simple electron configuration structure for hydrogen-like ions
-        if header_format == 'hydrogen' or element == hydrogen:
+        if header_format == "hydrogen" or element == hydrogen:
             config = _scrape_metadata_hydrogen(file, element, charge)
-        elif header_format == 'hydrogen-like':
+        elif header_format == "hydrogen-like":
             config = _scrape_metadata_hydrogen_like(file, element, charge)
         elif element.atomic_number - charge == 1:
             config = _scrape_metadata_hydrogen_like(file, element, charge)
-            if not config and 'bnd#' in adf_file_path:
+            if not config and "bnd#" in adf_file_path:
                 # ADF15 files with the "bnd" suffix may have metadata in the "hydrogen" format
                 config = _scrape_metadata_hydrogen(file, element, charge)
         else:
@@ -82,14 +106,14 @@ def parse_adf15(element, charge, adf_file_path, header_format=None):
 
         # process rate data
         rates = RecursiveDict()
-        for cls in ('excitation', 'recombination', 'thermalcx'):
+        for cls in ("excitation", "recombination", "thermalcx"):
             for element, charge_states in config[cls].items():
                 for charge, transitions in charge_states.items():
                     for transition in transitions.keys():
                         block_num = config[cls][element][charge][transition]
                         rates[cls][element][charge][transition] = _extract_rate(file, block_num)
 
-    wavelengths = config['wavelength']
+    wavelengths = config["wavelength"]
     return rates, wavelengths
 
 
@@ -104,29 +128,25 @@ def _scrape_metadata_hydrogen(file, element, charge):
     file.seek(0)
     lines = file.readlines()
 
-    pec_index_header_match = r'^C\s*ISEL\s*WAVELENGTH\s*TRANSITION\s*TYPE'
-    while not re.match(pec_index_header_match, lines[0], re.IGNORECASE):
+    while not _PEC_INDEX_HEADER_MATCH_STANDARD.match(lines[0]):
         lines.pop(0)
     index_lines = lines
-
     for i in range(len(index_lines)):
-
-        pec_hydrogen_transition_match = r'^C\s*([0-9]*)\.\s*([0-9]*\.[0-9]*)\s*N=\s*([0-9]*) - N=\s*([0-9]*)\s*([A-Z]*)'
-        match = re.match(pec_hydrogen_transition_match, index_lines[i], re.IGNORECASE)
+        match = _PEC_HYDROGEN_TRANSITION_MATCH.match(index_lines[i])
         if not match:
             continue
 
         block_num = int(match.groups()[0])
         wavelength = float(match.groups()[1]) / 10  # convert Angstroms to nm
         upper_level = int(match.groups()[2])
         lower_level = int(match.groups()[3])
-        rate_type_adas = match.groups()[4]
-        if rate_type_adas == 'EXCIT':
-            rate_type = 'excitation'
-        elif rate_type_adas == 'RECOM':
-            rate_type = 'recombination'
-        elif rate_type_adas == 'CHEXC':
-            rate_type = 'thermalcx'
+        rate_type_adas = match.groups()[4].upper()
+        if rate_type_adas == "EXCIT":
+            rate_type = "excitation"
+        elif rate_type_adas == "RECOM":
+            rate_type = "recombination"
+        elif rate_type_adas == "CHEXC":
+            rate_type = "thermalcx"
         else:
             raise ValueError("Unrecognised rate type - {}".format(rate_type_adas))
 
@@ -147,29 +167,25 @@ def _scrape_metadata_hydrogen_like(file, element, charge):
     file.seek(0)
     lines = file.readlines()
 
-    pec_index_header_match = r'^C\s*ISEL\s*WAVELENGTH\s*TRANSITION\s*TYPE'
-    while not re.match(pec_index_header_match, lines[0], re.IGNORECASE):
+    while not _PEC_INDEX_HEADER_MATCH_STANDARD.match(lines[0]):
         lines.pop(0)
     index_lines = lines
-
     for i in range(len(index_lines)):
-
-        pec_full_transition_match = r'^C\s*([0-9]*)\.\s*([0-9]*\.[0-9]*)\s*([0-9]*)[\(\)\.0-9\s]*-\s*([0-9]*)[\(\)\.0-9\s]*([A-Z]*)'
-        match = re.match(pec_full_transition_match, index_lines[i], re.IGNORECASE)
+        match = _PEC_FULL_TRANSITION_MATCH.match(index_lines[i])
         if not match:
             continue
 
         block_num = int(match.groups()[0])
         wavelength = float(match.groups()[1]) / 10  # convert Angstroms to nm
         upper_level = int(match.groups()[2])
         lower_level = int(match.groups()[3])
-        rate_type_adas = match.groups()[4]
-        if rate_type_adas == 'EXCIT':
-            rate_type = 'excitation'
-        elif rate_type_adas == 'RECOM':
-            rate_type = 'recombination'
-        elif rate_type_adas == 'CHEXC':
-            rate_type = 'thermalcx'
+        rate_type_adas = match.groups()[4].upper()
+        if rate_type_adas == "EXCIT":
+            rate_type = "excitation"
+        elif rate_type_adas == "RECOM":
+            rate_type = "recombination"
+        elif rate_type_adas == "CHEXC":
+            rate_type = "thermalcx"
         else:
             raise ValueError("Unrecognised rate type - {}".format(rate_type_adas))
 
@@ -193,19 +209,14 @@ def _scrape_metadata_full(file, element, charge):
     configuration_lines = []
     configuration_dict = {}
 
-    configuration_header_match = r'^C\s*Configuration\s*\(2S\+1\)L\(w-1/2\)\s*Energy \(cm\*\*-1\)$'
-    while not re.match(configuration_header_match, lines[0], re.IGNORECASE):
+    while not _CONFIGURATION_HEADER_MATCH.match(lines[0]):
         lines.pop(0)
-    pec_index_header_match = r'^C\s*ISEL\s*WAVELENGTH\s*TRANSITION\s*TYPE'
-    while not re.match(pec_index_header_match, lines[0], re.IGNORECASE):
+    while not _PEC_INDEX_HEADER_MATCH_STANDARD.match(lines[0]):
         configuration_lines.append(lines[0])
         lines.pop(0)
     index_lines = lines
-
     for i in range(len(configuration_lines)):
-
-        configuration_string_match = r"^C\s*([0-9]*)\s*((?:[0-9][SPDFG][0-9]\s)*)\s*\(([0-9]*\.?[0-9]*)\)([0-9]*)\(\s*([0-9]*\.?[0-9]*)\)"
-        match = re.match(configuration_string_match, configuration_lines[i], re.IGNORECASE)
+        match = _CONFIGURATION_STRING_MATCH.match(configuration_lines[i])
         if not match:
             continue
 
@@ -215,13 +226,10 @@ def _scrape_metadata_full(file, element, charge):
         total_orbital_quantum_number = _L_LOOKUP[int(match.groups()[3])]  # L
         total_angular_momentum_quantum_number = match.groups()[4]  # J
 
-        configuration_dict[config_id] = (electron_configuration + " " + spin_multiplicity +
-                                         total_orbital_quantum_number + total_angular_momentum_quantum_number)
+        configuration_dict[config_id] = electron_configuration + " " + spin_multiplicity + total_orbital_quantum_number + total_angular_momentum_quantum_number
 
     for i in range(len(index_lines)):
-
-        pec_full_transition_match = r'^C\s*([0-9]*)\.?\s*([0-9]*\.[0-9]*)\s*([0-9]*)[\(\)\.0-9\s]*-\s*([0-9]*)[\(\)\.0-9\s]*([A-Z]*)'
-        match = re.match(pec_full_transition_match, index_lines[i], re.IGNORECASE)
+        match = _PEC_FULL_TRANSITION_MATCH.match(index_lines[i])
         if not match:
             continue
 
@@ -231,13 +239,13 @@ def _scrape_metadata_full(file, element, charge):
         upper_level = configuration_dict[upper_level_id]
         lower_level_id = int(match.groups()[3])
         lower_level = configuration_dict[lower_level_id]
-        rate_type_adas = match.groups()[4]
-        if rate_type_adas == 'EXCIT':
-            rate_type = 'excitation'
-        elif rate_type_adas == 'RECOM':
-            rate_type = 'recombination'
-        elif rate_type_adas == 'CHEXC':
-            rate_type = 'thermalcx'
+        rate_type_adas = match.groups()[4].upper()
+        if rate_type_adas == "EXCIT":
+            rate_type = "excitation"
+        elif rate_type_adas == "RECOM":
+            rate_type = "recombination"
+        elif rate_type_adas == "CHEXC":
+            rate_type = "thermalcx"
         else:
             raise ValueError("Unrecognised rate type - {}".format(rate_type_adas))
 
@@ -255,11 +263,8 @@ def _extract_rate(file, block_num):
     # search from start of file
     file.seek(0)
 
-    wavelength_match = r"^\s*[0-9]*\.[0-9]* ?a? +.*$"
-    block_id_match = r"^\s*[0-9]*\.[0-9]* ?a?\s*([0-9]*)\s*([0-9]*).*/type *= *([a-zA-Z]*).*/isel *= * ([0-9]*)$"
-
-    for block in _group_by_block(file, wavelength_match):
-        match = re.match(block_id_match, block[0], re.IGNORECASE)
+    for block in _group_by_block(file, _WAVELENGTH_MATCH):
+        match = _BLOCK_ID_MATCH.match(block[0])
 
         if not match:
             continue
@@ -311,24 +316,24 @@ def _extract_rate(file, block_num):
             density = PerCm3ToPerM3.to(density)
             rates = Cm3ToM3.to(rates)
 
-            return {'ne': density, 'te': temperature, 'rate': rates}
+            return {"ne": density, "te": temperature, "rate": rates}
 
     # If code gets to here, block wasn't found.
-    raise RuntimeError('Block number {} was not found in the ADF15 file.'.format(block_num))
+    raise RuntimeError("Block number {} was not found in the ADF15 file.".format(block_num))
 
 
-def _group_by_block(source_file, match_string):
+def _group_by_block(source_file, match_pattern):
     """
     Generator the splits the ADF15 file into blocks.
 
-    Groups lines of file into blocks based on precursor '  6561.9A   24...'
+    Groups lines of file into blocks based on wavelength pattern match.
 
     Note: comment section not filtered out of last block, don't over-read!
     """
 
     buffer = []
     for line in source_file:
-        if re.match(match_string, line, re.IGNORECASE):
+        if match_pattern.match(line):
             if buffer:
                 yield buffer
             buffer = [line]