In [None]:
import pandas as pd
import numpy as np
import re
from scipy.constants import milli, day, year, nano, micro, pico,minute, hour, atto, zepto, giga,mega, tera, yotta, femto, kilo, peta, exa, zetta
from scipy.constants import m_p, m_n, u, kilo, eV, c

isotopes parser

In [25]:
def parse_nubase_line(line):
    # Extract fixed-width fields
    ZZZi = line[4:8].strip()
    AAA = line[0:3].strip()
    raw_element = line[11:16].strip()
    isomer_flag = line[16:17].strip()
    mass_excess = line[18:31].strip() # excess from A*u
    half_life = line[69:78].strip()
    half_life_unit = line[78:80].strip()  # Half-life unit
    spin_parity = line[88:102].strip()
    br_field = line[119:209].strip()

    # Separate mass number and symbol
    match = re.match(r"(\d+)([A-Za-z]+)", raw_element)
    if match:
        mass_number = int(match.group(1))
        symbol = match.group(2)
    else:
        mass_number = None
        symbol = raw_element  # fallback

    formatted_name = f"{symbol}{mass_number}"


    # Compute ZAID
    try:
        Z = int(ZZZi[:3])
        A = int(AAA)
        zaid = Z * 1000 + A
    except ValueError:
        return None  # skip malformed lines
    
    isomer_suffix_map = {
        "m": "1",  # isomer
        "n": "2",  # isomer
        "p": "3",  # excited level
        "q": "4",  # excited level
        "r": "5",  # resonance
        "i": "6",  # IAS
        "j": "7"   # IAS
    }    
    suffix = isomer_suffix_map.get(isomer_flag)
    if suffix:
        zaid = int(f"{zaid}{suffix}")
        formatted_name += isomer_flag



    # Clean mass and half-life
    try:
        mass_excess = float(mass_excess.replace('#', '').replace('*', ''))
    except ValueError:
        mass_excess = 0

   # Parse half-life value
    hl_raw = half_life.lower()
    if hl_raw.startswith("stbl"):
        hl_val = float("inf")
    elif hl_raw.startswith("p-unst"):
        hl_val = 0.0
    else:
        try:
            hl_val = float(hl_raw.replace('#', '').replace('*', ''))
        except ValueError:
            hl_val = None

    # Convert half-life to seconds

   # Convert half-life to seconds
    unit_map = {
        "ms": milli, "us": micro, "ns": nano, "ps": pico,
        "s": 1, "m": minute, "h": hour, "d": day, "y": year,
        "ys": year,  # years    
        # Rare and extended units
        "as": atto,     # attoseconds
        "zs": zepto,     # zeptoseconds
        "fs": femto,     # femtoseconds
        "ky": kilo*year,  # kiloyears
        "my": mega*year,  # megayears
        "gy": giga*year,  # gigayears
        "ty": tera*year,  # terayears
        "py": peta*year,  # petayears
        "ey": exa*year,  # exayears
        "zy": zetta*year,  # zettayears
        "yy": yotta*year   # yottayears

    }
    half_life_unit_clean = half_life_unit.lower()
    unit_factor = unit_map.get(half_life_unit_clean)

    if unit_factor is None and half_life_unit_clean:
        print(f"Unrecognized unit for ZAID {zaid}: '{half_life_unit_clean}'")


    if hl_val is None or unit_factor is None or hl_val == 0.0:
        hl_seconds = None
    else:
        hl_seconds = hl_val * unit_factor

    # Compute decay constant safely
    if hl_seconds is None:
        decay = None
    else:
        decay = hl_seconds / np.log(2)

    abundance = None
    if "IS=" in br_field:
        try:
            abundance_str = br_field.split("IS=")[1].split()[0]
            abundance = float(abundance_str)
        except (IndexError, ValueError):
            abundance = 0
    return {
        "Z": Z,
        "A": A,
        "ZAID": zaid,
        "symbol": formatted_name,
        "mass": A + mass_excess*kilo*eV/c**2/u ,
        "decay": decay,
        "abundance": abundance,
    }

In [26]:
# Load and parse
with open("./nubase_4.mas20.txt", "r", encoding="utf-8") as f:
    lines = f.readlines()

# Skip headers and parse valid lines
data_lines = [line for line in lines if line.strip() and not line.startswith("#")]
parsed = [parse_nubase_line(line) for line in data_lines]
parsed = [row for row in parsed if row is not None]

# Create DataFrame
df = pd.DataFrame(parsed)

In [27]:
df = df.sort_values(by=["Z", "A"]).reset_index(drop=True)

In [28]:
df.to_csv("nubase2020.csv", index=False)

In [30]:
df[df.symbol=="H1"]

Unnamed: 0,Z,A,ZAID,symbol,mass,decay,abundance
1,1,1,1001,H1,1.007825,,99.9855


elements parser

In [47]:
# Load and parse
with open("./nubase_4.mas20.txt", "r", encoding="utf-8") as f:
    lines = f.readlines()
elements = []

# Skip headers and parse valid lines
data_lines = [line for line in lines if line.strip() and not line.startswith("#")]
parsed = [parse_nubase_line(line) for line in data_lines]
parsed = [row for row in parsed if row is not None]

elements = []
seen = set()

for parsed_line in parsed:
    if parsed_line['symbol'] == 'n1':
        continue

    element_symb = re.sub(r'\d+', '', parsed_line['symbol'])

    # Only add if not seen before
    if element_symb not in seen:
        elements.append((element_symb, parsed_line['Z']))
        seen.add(element_symb)

# Create DataFrame
df = pd.DataFrame(elements, columns=['element', 'Z'])

In [49]:
df.to_csv("elements.csv", index=False)

In [54]:
df[df.element=='Cu'].Z.item() == 29

True