# Cutting data ftom 1 kHz to 5 kHz - Fit - Interpolation 

In [8]:
"""
Batch EIS semicircle fitter — Jupyter/Notebook version
=====================================================

This notebook-friendly script traverses a nested directory structure and fits
a simple complex impedance model (Rs + Zarc/Cole–Cole) to the 1–5 kHz portion
of each impedance spectrum. It writes:
  • a summary CSV with model values at fixed frequencies (1–5 kHz, step 500 Hz)
  • a Nyquist plot PNG of the data and the fitted curve

HOW TO USE IN A NOTEBOOK
------------------------
1) Set ROOT_DIR below to your top-level folder (e.g., 'NPG-500mM-H2SO4-C,T').
2) Run this cell. If ROOT_DIR exists, it will process eligible CSVs right away.
   - Only files whose *basename* ends with '_2' (before '.csv') are processed.

Folder layout expected (example):
    root_dir/
      ├── 1mM/
      │   ├── 26C/
      │   │    ├── <csv files...>
      │   │    └── ...
      │   ├── 28C/
      │   └── ...
      ├── 2mM/
      └── ...

Column naming assumptions:
  • Frequency column contains "frequency" (e.g., "Frequency (Hz)").
  • Real part (Z′) column contains "WE.Z'" or "Z'".
  • Imag part is stored as NEGATIVE imaginary (−Z″), column contains "-Z\"" or "imag".
If your exports differ, adjust `identify_columns`.

Outputs per input file:
  <basename>_1000-5000_fit.csv
  <basename>_1000-5000_fit.png
"""

# --- SET YOUR ROOT FOLDER HERE ---
ROOT_DIR = r'/Volumes/01785304894/Uni Stuff/Measurement_Data/2025/June-July/NPG-500mM-H2SO4-C,T'  # <- put your root folder here

# --- Imports ---
import os
from typing import Tuple, Sequence, Optional, Dict, List

import numpy as np
import pandas as pd
from scipy.optimize import least_squares
import matplotlib.pyplot as plt


def identify_columns(df: pd.DataFrame) -> Tuple[str, str, str]:
    """Identify the frequency, real and (negative) imaginary column names.

    Heuristics:
      • Frequency: header contains 'frequency'
      • Real part (Z′): header contains "we.z'" or "z'" and NOT a minus sign
      • Negative imag (−Z″): header contains '-z' and either 'imag' or a double quote

    Returns
    -------
    freq_col : str
    zreal_col : str
    zimag_col : str  (NOTE: this is the NEGATIVE imaginary column, i.e., −Z″)
    """
    freq_col: Optional[str] = None
    zreal_col: Optional[str] = None
    zimag_col: Optional[str] = None

    for col in df.columns:
        lc = col.lower()
        if 'frequency' in lc:
            freq_col = col
        if ("we.z'" in lc or "z'" in lc) and "-" not in lc:
            zreal_col = col
        if ("-z" in lc or "-z" in col) and ("imag" in lc or '"' in col):
            zimag_col = col

    if freq_col is None:
        raise ValueError("Frequency column not found.")

    if zreal_col is None or zimag_col is None:
        # Fallback: assume the second and third columns are real and (negative) imag.
        cols = [c for c in df.columns if c != freq_col]
        if len(cols) >= 2:
            zreal_col = zreal_col or cols[0]
            zimag_col = zimag_col or cols[1]
        else:
            raise ValueError("Could not identify impedance columns.")

    return freq_col, zreal_col, zimag_col


def zarc_impedance(R: float, tau: float, gamma: float, w: np.ndarray) -> np.ndarray:
    """Cole–Cole (Zarc) element: R / (1 + (j ω τ)^γ)."""
    return R / (1 + (1j * w * tau) ** gamma)


def model_Rs_Zarc(params: Sequence[float], w: np.ndarray) -> np.ndarray:
    """Impedance of an Rs + Zarc model."""
    Rs, R, tau, gamma = params
    return Rs + zarc_impedance(R, tau, gamma, w)


def residual_Rs_Zarc(params: Sequence[float], w: np.ndarray, Z: np.ndarray) -> np.ndarray:
    """Concatenated residuals of real and imaginary parts."""
    Zf = model_Rs_Zarc(params, w)
    return np.hstack((Zf.real - Z.real, Zf.imag - Z.imag))


def fit_semicircle(f: np.ndarray, Z: np.ndarray) -> np.ndarray:
    """Fit Rs + Zarc to the provided frequency window.

    Initial guesses:
      Rs0  := min(Re(Z))
      R0   := max(Re(Z)) - Rs0   (clamped to ≥ 1 if nonpositive)
      tau0 := 1 / ω_peak where ω_peak is at max(-Im(Z))
      γ0   := 0.8

    Bounds:
      Rs ≥ 0, R ≥ 0, tau ≥ 1e-9, 0 ≤ γ ≤ 1
    """
    w = 2 * np.pi * f

    Rs0 = float(np.min(Z.real))
    R0 = float(np.max(Z.real) - Rs0)
    if R0 <= 0:
        R0 = 1.0

    idx_peak = int(np.argmax(-Z.imag)) if Z.size > 0 else 0
    w_peak = float(w[idx_peak]) if idx_peak < w.size else 1.0
    tau0 = 1.0 / w_peak if w_peak > 0 else 1e-3

    init = [Rs0, R0, 1.0 * tau0, 0.8]
    lower = [0, 0, 1e-9, 0.0]
    upper = [np.inf, np.inf, np.inf, 1.0]

    res = least_squares(
        residual_Rs_Zarc, init, bounds=(lower, upper), args=(w, Z), max_nfev=20000
    )
    return res.x  # [Rs, R, tau, gamma]


def process_file(
    filepath: str,
    freq_window: Tuple[float, float] = (1000.0, 5000.0),
    freq_points: Optional[Sequence[float]] = None,
    show_plot: bool = False,
) -> Optional[Dict[str, float]]:
    """Process a single CSV file and save outputs next to it.

    - Reads CSV
    - Identifies frequency / Z′ / (−Z″) columns
    - Selects the 1–5 kHz window
    - Fits Rs + Zarc
    - Evaluates model at fixed frequencies
    - Saves a summary CSV and a Nyquist plot PNG

    Parameters
    ----------
    filepath : str
    freq_window : (float, float)
    freq_points : sequence of float or None
        If None, uses [1000, 1500, ..., 5000].
    show_plot : bool
        If True, display the Nyquist plot inline (in addition to saving it).

    Returns
    -------
    dict with fitted parameters and output paths, or None if skipped/failed.
    """
    if freq_points is None:
        freq_points = [1000.0, 1500.0, 2000.0, 2500.0, 3000.0, 3500.0, 4000.0, 4500.0, 5000.0]

    # Load data
    df = pd.read_csv(filepath)

    # Identify columns
    try:
        freq_col, zreal_col, zimag_col = identify_columns(df)
    except ValueError as e:
        print(f"Skipping {filepath}: {e}")
        return None

    # Build complex impedance (note: input imag column is −Z″)
    f = df[freq_col].astype(float).to_numpy()
    Z_real = df[zreal_col].astype(float).to_numpy()
    Z_neg_imag = df[zimag_col].astype(float).to_numpy()
    Z = Z_real + 1j * (-Z_neg_imag)

    # Select frequency window
    fmin, fmax = freq_window
    mask = (f >= fmin) & (f <= fmax)
    if not np.any(mask):
        print(f"No data in the specified frequency window {freq_window} for {filepath}.")
        return None

    f_sub = f[mask]
    Z_sub = Z[mask]

    # Fit
    params = fit_semicircle(f_sub, Z_sub)
    Rs, R, tau, gamma = [float(p) for p in params]

    # Predictions at requested points
    freq_pts = np.array(freq_points, dtype=float)
    w_pts = 2 * np.pi * freq_pts
    Z_pred = model_Rs_Zarc(params, w_pts)
    summary = pd.DataFrame({
        'Frequency (Hz)': freq_pts,
        "Z' (Ω)": Z_pred.real,
        "-Z'' (Ω)": -Z_pred.imag,
        '|Z| (Ω)': np.abs(Z_pred),
        '-Phase (deg)': -np.degrees(np.angle(Z_pred)),
    })

    # Write summary CSV
    base = os.path.splitext(os.path.basename(filepath))[0]
    #out_csv_name = f"{base}_1000-5000_fit.csv"
    out_csv_name = "single_frequency_summary.csv"

    out_csv_path = os.path.join(os.path.dirname(filepath), out_csv_name)
    summary.to_csv(out_csv_path, index=False)

    # Nyquist plot over the fitted region (conventional descending frequency)
    w_sub = 2 * np.pi * f_sub
    Z_fit_sub = model_Rs_Zarc(params, w_sub)
    order = np.argsort(f_sub)[::-1]

    plt.figure(figsize=(6, 6))
    plt.plot(Z_sub.real, -Z_sub.imag, 'o', markersize=4, label='Data')
    plt.plot(Z_fit_sub.real[order], -Z_fit_sub.imag[order], '-', label='Fit')
    plt.xlabel("Z' (Ω)")
    plt.ylabel("-Z'' (Ω)")
    plt.title('Nyquist plot (1000–5000 Hz)')
    plt.legend()
    plt.tight_layout()

    out_png_name = f"{base}_1000-5000_fit.png"
    out_png_path = os.path.join(os.path.dirname(filepath), out_png_name)
    plt.savefig(out_png_path, dpi=200)
    if show_plot:
        plt.show()
    plt.close()

    print(f"Processed {os.path.basename(filepath)} → {os.path.basename(out_csv_path)}, {os.path.basename(out_png_path)}")

    return {
        "file": filepath,
        "out_csv": out_csv_path,
        "out_png": out_png_path,
        "Rs": Rs,
        "R": R,
        "tau": tau,
        "gamma": gamma,
    }


def scan_directory(root_dir: str, suffix: str = "_2", **process_kwargs) -> List[Dict[str, float]]:
    """Traverse the directory tree and process eligible CSV files.

    Parameters
    ----------
    root_dir : str
        Top-level folder containing concentration and temperature subfolders.
    suffix : str
        Only CSVs whose *basename* ends with this suffix are processed.
    **process_kwargs :
        Forwarded to `process_file` (e.g., show_plot=True).

    Returns
    -------
    List of result dicts (one per processed file).
    """
    results: List[Dict[str, float]] = []

    # Walk one level into concentration dirs, then one level into temperature dirs
    for conc in sorted(os.listdir(root_dir)):
        conc_path = os.path.join(root_dir, conc)
        if not os.path.isdir(conc_path):
            continue

        for temp in sorted(os.listdir(conc_path)):
            temp_path = os.path.join(conc_path, temp)
            if not os.path.isdir(temp_path):
                continue

            for fname in sorted(os.listdir(temp_path)):
                if not fname.lower().endswith('.csv'):
                    continue
                base, _ = os.path.splitext(fname)
                if not base.endswith(suffix):
                    continue

                file_path = os.path.join(temp_path, fname)
                try:
                    out = process_file(file_path, **process_kwargs)
                    if out is not None:
                        results.append(out)
                except Exception as exc:
                    print(f"Error processing {file_path}: {exc}")

    if not results:
        print("No files processed. Check your folder path and filename suffix.")
    return results


# --- Run the batch if ROOT_DIR is valid (safe for notebooks) ---
if os.path.isdir(ROOT_DIR):
    _results = scan_directory(ROOT_DIR, show_plot=False)  # set show_plot=True to display each Nyquist plot inline
else:
    print(f"Set ROOT_DIR to a valid folder; current value not found:\n{ROOT_DIR}")


Processed EIS_whole_spectrum_10mM_H2SO4_26C_60kHz_0.1Hz_pH=1.864_2.csv → single_frequency_summary.csv, EIS_whole_spectrum_10mM_H2SO4_26C_60kHz_0.1Hz_pH=1.864_2_1000-5000_fit.png
Processed EIS_whole_spectrum_10mM_H2SO4_28C_60kHz_0.1Hz_pH=1.870_2.csv → single_frequency_summary.csv, EIS_whole_spectrum_10mM_H2SO4_28C_60kHz_0.1Hz_pH=1.870_2_1000-5000_fit.png
Processed EIS_whole_spectrum_10mM_H2SO4_30C_60kHz_0.1Hz_pH=1.873_2.csv → single_frequency_summary.csv, EIS_whole_spectrum_10mM_H2SO4_30C_60kHz_0.1Hz_pH=1.873_2_1000-5000_fit.png
Processed EIS_whole_spectrum_10mM_H2SO4_32C_60kHz_0.1Hz_pH=1.876_2.csv → single_frequency_summary.csv, EIS_whole_spectrum_10mM_H2SO4_32C_60kHz_0.1Hz_pH=1.876_2_1000-5000_fit.png
Processed EIS_whole_spectrum_10mM_H2SO4_34C_60kHz_0.1Hz_pH=1.879_2.csv → single_frequency_summary.csv, EIS_whole_spectrum_10mM_H2SO4_34C_60kHz_0.1Hz_pH=1.879_2_1000-5000_fit.png
Processed EIS_whole_spectrum_10mM_H2SO4_36C_60kHz_0.1Hz_pH=1.882_2.csv → single_frequency_summary.csv, EIS_who

# This block of code, takes the single frequencies, averages and make similar csv as previous code

In [8]:
# --- Single-frequency folder aggregator (robust) ---

import os, re
from typing import Dict, List, Optional, Tuple
import numpy as np
import pandas as pd

def identify_columns(df: pd.DataFrame) -> Tuple[Optional[str], str, str]:
    """
    Return (freq_col or None, zreal_col, neg_zimag_col) by matching typical EIS headers,
    e.g. "WE.Z' (Ω)" and 'WE.-Z" (Ω)'. Tolerates WE. prefix, Ω/°, and unicode minus.
    """
    cols = list(df.columns)

    def norm(s: str) -> str:
        s = s.replace("−", "-")            # unicode minus -> ascii
        s = s.replace("Ω", "ohm").replace("°", "deg")
        return s.lower()

    # Z' (real)
    zreal = None
    for c in cols:
        nc = norm(c)
        if "z'" in nc or "z prime" in nc or "zreal" in nc or "z re" in nc or "z_re" in nc:
            zreal = c
            break

    # -Z'' (negative imaginary)
    zim_neg = None
    for c in cols:
        nc = norm(c)
        if "-z\"" in nc or "-z''" in nc or "minus z" in nc:
            zim_neg = c
            break
    # Fallback: if only Z'' exists, use it (we'll flip sign later if needed)
    if zim_neg is None:
        for c in cols:
            nc = norm(c)
            if "z\"" in nc or "z''" in nc or "z_im" in nc or "zim" in nc:
                zim_neg = c
                break

    # Frequency (optional)
    fcol = None
    for c in cols:
        if "freq" in norm(c):
            fcol = c
            break

    if zreal is None or zim_neg is None:
        raise ValueError(f"Could not identify Z columns. Found: {cols}")
    return fcol, zreal, zim_neg

def extract_frequency_from_name(filename: str) -> float:
    m = re.search(r'(\d+(?:\.\d+)?)\s*Hz', filename, flags=re.IGNORECASE)
    if not m:
        raise ValueError(f"Could not parse frequency from filename: {filename}")
    return float(m.group(1))

def average_single_frequency_csv(csv_path: str) -> Dict[str, float]:
    df = pd.read_csv(csv_path, encoding="utf-8")
    freq_col, zreal_col, zimag_neg_col = identify_columns(df)

    zr   = pd.to_numeric(df[zreal_col], errors='coerce')
    zneg = pd.to_numeric(df[zimag_neg_col], errors='coerce')

    # Average (ignore NaNs)
    zr_mean   = float(np.nanmean(zr))
    zneg_mean = float(np.nanmean(zneg))

    # Zbar with your convention (input column is -Z'')
    Zbar = zr_mean + 1j * (-zneg_mean)
    mag = float(np.abs(Zbar))
    minus_phase_deg = float(-np.degrees(np.angle(Zbar)))

    freq = extract_frequency_from_name(os.path.basename(csv_path))

    return {
        "Frequency (Hz)": freq,
        "Z' (Ω)": zr_mean,
        "-Z'' (Ω)": zneg_mean,
        "|Z| (Ω)": mag,
        "-Phase (deg)": minus_phase_deg,
    }

def process_single_frequency_folder(folder: str,
                                    outfile_name: str = "single_frequency_summary.csv",
                                    verbose: bool = True) -> Optional[pd.DataFrame]:
    if not os.path.isdir(folder):
        if verbose: print(f"[skip] Not a folder: {folder}")
        return None

    rows: List[Dict[str, float]] = []
    # >>> ignore AppleDouble '._' files and our own output file
    csvs = [f for f in os.listdir(folder)
            if f.lower().endswith(".csv")
            and not f.startswith("._")
            and f != outfile_name]

    if not csvs:
        if verbose: print(f"[info] No CSVs in: {folder}")
        return None

    for fname in sorted(csvs):
        fpath = os.path.join(folder, fname)
        try:
            row = average_single_frequency_csv(fpath)
            rows.append(row)
            if verbose:
                print(f"[ok] Averaged {fname} @ {row['Frequency (Hz)']:.0f} Hz")
        except Exception as e:
            if verbose:
                print(f"[warn] Skipping {fname}: {e}")

    if not rows:
        if verbose: print(f"[info] No valid rows produced in: {folder}")
        return None

    out = pd.DataFrame(rows).sort_values("Frequency (Hz)").reset_index(drop=True)
    out_path = os.path.join(folder, outfile_name)
    out.to_csv(out_path, index=False)
    if verbose:
        print(f"[done] Wrote {outfile_name} with {len(out)} rows in {folder}")
    return out

def process_all_single_frequency(root_dir: str,
                                 conc_whitelist=("14mM","15mM","16mM","17mM","18mM","19mM","20mM"),
                                 single_folder_name: str = "single-frequency",
                                 outfile_name: str = "single_frequency_summary.csv",
                                 verbose: bool = True):
    results: Dict[str, Optional[pd.DataFrame]] = {}
    for conc in sorted(os.listdir(root_dir)):
        if conc not in conc_whitelist:
            continue
        conc_path = os.path.join(root_dir, conc)
        if not os.path.isdir(conc_path):
            continue
        for temp in sorted(os.listdir(conc_path)):
            temp_path = os.path.join(conc_path, temp)
            if not os.path.isdir(temp_path):
                continue
            sf_path = os.path.join(temp_path, single_folder_name)
            df = process_single_frequency_folder(sf_path, outfile_name=outfile_name, verbose=verbose)
            results[sf_path] = df
    return results

# Example:
ROOT_DIR = r"/Volumes/01785304894/Uni Stuff/Measurement_Data/2025/June-July/NPG-500mM-H2SO4-whole"
all_results = process_all_single_frequency(ROOT_DIR)


[ok] Averaged EIS_capillary_single_frequency_14mM_H2SO4_26C_1000Hz_pH=1.729.csv @ 1000 Hz
[ok] Averaged EIS_capillary_single_frequency_14mM_H2SO4_26C_1500Hz_pH=1.729.csv @ 1500 Hz
[ok] Averaged EIS_capillary_single_frequency_14mM_H2SO4_26C_2000Hz_pH=1.729.csv @ 2000 Hz
[ok] Averaged EIS_capillary_single_frequency_14mM_H2SO4_26C_2500Hz_pH=1.729.csv @ 2500 Hz
[ok] Averaged EIS_capillary_single_frequency_14mM_H2SO4_26C_3000Hz_pH=1.729.csv @ 3000 Hz
[ok] Averaged EIS_capillary_single_frequency_14mM_H2SO4_26C_3500Hz_pH=1.729.csv @ 3500 Hz
[ok] Averaged EIS_capillary_single_frequency_14mM_H2SO4_26C_4000Hz_pH=1.729.csv @ 4000 Hz
[ok] Averaged EIS_capillary_single_frequency_14mM_H2SO4_26C_4500Hz_pH=1.729.csv @ 4500 Hz
[ok] Averaged EIS_capillary_single_frequency_14mM_H2SO4_26C_5000Hz_pH=1.729.csv @ 5000 Hz
[done] Wrote single_frequency_summary.csv with 9 rows in /Volumes/01785304894/Uni Stuff/Measurement_Data/2025/June-July/NPG-500mM-H2SO4-whole/14mM/26C/single-frequency
[ok] Averaged EIS_capi

# This code takes the file with the name single_frequency_summary.csv from the single-frequency folder, paste it in one folder back, then deletes the folder and moves the dontent of whole-spectrum folder and paste it in one folder back

In [9]:
# --- Folder cleanup utility: move 'single_frequency_summary.csv' out of 'single-frequency',
#     then delete 'single-frequency', hoist contents of 'whole-spectrum' ---

from pathlib import Path
import shutil, time, os
from typing import Iterable, Dict, Any

# >>>>> SET YOUR ROOT FOLDER HERE <<<<<
ROOT_DIR = Path(r"/Volumes/01785304894/Uni Stuff/Measurement_Data/2025/June-July/NPG-500mM-H2SO4-whole")

# Options
CONC_WHITELIST = None  # e.g., {"14mM","15mM","16mM","17mM","18mM","19mM","20mM"} or None for all
DRY_RUN = False         # <-- set to False to actually perform changes
CONFLICT = "rename"     # one of: "rename", "overwrite", "skip"

# Names to match (case-insensitive; underscores/spaces normalized to hyphens)
SINGLE_NAMES = {"single-frequency"}
WHOLE_NAMES  = {"whole-spectrum", "whole-specrtrum"}  # include common typo

SUMMARY_FILENAME = "single_frequency_summary.csv"      # file to extract from 'single-frequency'


# --- helpers (unchanged) ---
def _norm(name: str) -> str:
    s = name.strip().lower().replace("_", "-").replace(" ", "-")
    while "--" in s:
        s = s.replace("--", "-")
    return s

def _find_child_dir(parent: Path, candidates: Iterable[str]) -> Path | None:
    cset = {_norm(c) for c in candidates}
    for child in parent.iterdir():
        if child.is_dir() and _norm(child.name) in cset:
            return child
    return None

def _unique_target(p: Path) -> Path:
    if not p.exists():
        return p
    stem, suffix = (p.stem, p.suffix)
    i = 1
    while True:
        cand = p.with_name(f"{stem} (copy{'' if i==1 else f' {i}'}){suffix}")
        if not cand.exists():
            return cand
        i += 1

def _move_item(src: Path, dst_dir: Path, conflict: str, dry_run: bool) -> Path | None:
    target = dst_dir / src.name
    if target.exists():
        if conflict == "skip":
            print(f"  [skip exists] {target.name}")
            return None
        elif conflict == "overwrite":
            print(f"  [overwrite]   {target.name}")
            if not dry_run:
                if target.is_dir():
                    shutil.rmtree(target)
                else:
                    target.unlink(missing_ok=True)
        elif conflict == "rename":
            new_target = _unique_target(target)
            print(f"  [rename]      {target.name} -> {new_target.name}")
            target = new_target
        else:
            raise ValueError("CONFLICT must be one of: 'rename', 'overwrite', 'skip'")
    else:
        print(f"  [move]        {src.name} -> {dst_dir.name}/")
    if not dry_run:
        shutil.move(str(src), str(target))
    return target

# --- robust delete that tolerates disappearing entries ---
def safe_rmtree(path: Path, retries: int = 3, pause: float = 0.1) -> None:
    for attempt in range(1, retries + 1):
        try:
            shutil.rmtree(path)
            return
        except FileNotFoundError:
            return
        except OSError as e:
            if attempt == retries:
                try:
                    entries = [p.name for p in path.iterdir()] if path.exists() else []
                except Exception:
                    entries = ["<unreadable>"]
                print(f"[error] rmtree failed for: {path}\n         entries: {entries}\n         {e.__class__.__name__}: {e}")
                raise
            time.sleep(pause)

def _find_summary_file(folder: Path, expected_name: str) -> Path | None:
    """
    Return the path to the summary CSV inside `folder` if present.
    - Case-insensitive match on filename.
    - Ignores AppleDouble files that start with '._'.
    """
    expected_lower = expected_name.lower()
    for child in folder.iterdir():
        if child.is_file():
            if child.name.startswith("._"):   # ignore resource-fork files
                continue
            if child.name.lower() == expected_lower:
                return child
    return None

# --- main ---
def tidy_temperature_folders(root_dir: Path,
                             conc_whitelist: set[str] | None = None,
                             single_names = {"single-frequency"},
                             whole_names  = {"whole-spectrum", "whole-specrtrum"},
                             conflict: str = "rename",
                             dry_run: bool = True,
                             summary_filename: str = SUMMARY_FILENAME) -> Dict[str, Any]:
    if not root_dir.is_dir():
        raise FileNotFoundError(f"Root directory not found: {root_dir}")

    actions = {
        "single_deleted": 0,
        "whole_moved": 0,
        "whole_deleted": 0,
        "temps_seen": 0,
        "summaries_extracted": 0,
    }

    for conc_dir in sorted(p for p in root_dir.iterdir() if p.is_dir()):
        if conc_whitelist and conc_dir.name not in conc_whitelist:
            continue

        for temp_dir in sorted(p for p in conc_dir.iterdir() if p.is_dir()):
            actions["temps_seen"] += 1
            print(f"\n== {conc_dir.name} / {temp_dir.name} ==")

            # 1) Extract summary from 'single-frequency', then delete that folder
            single = _find_child_dir(temp_dir, single_names)
            if single and single.exists():
                # Look for the summary file and move it up one level
                summary_path = _find_summary_file(single, summary_filename)
                if summary_path:
                    print(f"[extract] {summary_path.name} from {single.name}/ -> {temp_dir.name}/")
                    out = _move_item(summary_path, temp_dir, conflict=conflict, dry_run=dry_run)
                    if out is not None:
                        actions["summaries_extracted"] += 1
                else:
                    print(f"[info]   {summary_filename} not found in {single.name}/")

                # Now delete 'single-frequency'
                print(f"[delete] single-frequency -> {single}")
                if not dry_run:
                    safe_rmtree(single)
                actions["single_deleted"] += 1
            else:
                print("[info]   single-frequency not found")

            # 2) Move contents of 'whole-spectrum' up one level, then delete it
            whole = _find_child_dir(temp_dir, whole_names)
            if whole and whole.exists():
                for child in sorted(whole.iterdir()):
                    out = _move_item(child, temp_dir, conflict=conflict, dry_run=dry_run)
                    if out is not None:
                        actions["whole_moved"] += 1
                print(f"[delete] whole-spectrum -> {whole}")
                if not dry_run:
                    safe_rmtree(whole)
                actions["whole_deleted"] += 1
            else:
                print("[info]   whole-spectrum not found")

    print("\n--- SUMMARY ---")
    print(f"Temperature folders seen        : {actions['temps_seen']}")
    print(f"Summaries extracted             : {actions['summaries_extracted']}")
    print(f"'single-frequency' deleted      : {actions['single_deleted']}")
    print(f"Items moved from 'whole'        : {actions['whole_moved']}")
    print(f"'whole-spectrum' deleted        : {actions['whole_deleted']}")
    print(f"Dry-run: {dry_run} | Conflict policy: {conflict}")
    return actions


# Call it (prints progress). Start with a dry run or restrict to a concentration to verify.
summary = tidy_temperature_folders(
    ROOT_DIR,
    conc_whitelist=CONC_WHITELIST,  # e.g., {"20mM"} to test only one concentration
    conflict=CONFLICT,
    dry_run=DRY_RUN
)
summary



== 10mM / 26C ==
[info]   single-frequency not found
[info]   whole-spectrum not found

== 10mM / 28C ==
[info]   single-frequency not found
[info]   whole-spectrum not found

== 10mM / 30C ==
[info]   single-frequency not found
[info]   whole-spectrum not found

== 10mM / 32C ==
[info]   single-frequency not found
[info]   whole-spectrum not found

== 10mM / 34C ==
[info]   single-frequency not found
[info]   whole-spectrum not found

== 10mM / 36C ==
[info]   single-frequency not found
[info]   whole-spectrum not found

== 10mM / 38C ==
[info]   single-frequency not found
[info]   whole-spectrum not found

== 10mM / 40C ==
[info]   single-frequency not found
[info]   whole-spectrum not found

== 10mM / 42C ==
[info]   single-frequency not found
[info]   whole-spectrum not found

== 10mM / 44C ==
[info]   single-frequency not found
[info]   whole-spectrum not found

== 10mM / 46C ==
[info]   single-frequency not found
[info]   whole-spectrum not found

== 10mM / 48C ==
[info]   singl

{'single_deleted': 91,
 'whole_moved': 182,
 'whole_deleted': 91,
 'temps_seen': 260,
 'summaries_extracted': 91}

# This Block of code takes the EIS_whole_scpectrums with  "_3" in the end and deletes the other versions and in the end removes the "_3" from that file and save it.


In [16]:
from pathlib import Path
import re, shutil
from typing import Dict, List

# >>>>> SET YOUR ROOT FOLDER HERE <<<<<
ROOT_DIR = Path(r"/Volumes/01785304894/Uni Stuff/Measurement_Data/2025/June-July/NPG-500mM-H2SO4-whole")

# Options
CONCENTRATIONS = [f"{i}mM" for i in range(1, 14)]  # 1mM..13mM
ALLOWED_EXTS   = {".csv"}                          # only touch CSVs
CONFLICT       = "overwrite"                       # "overwrite" | "skip" | "rename"
DRY_RUN        = False                              # set to False to apply changes

# Match stems like "..._pH=2.61_3" (i.e., trailing _<digits> replicate)
# and ensure the name starts with "EIS_whole_spectrum" (case-insensitive)
def parse_replicate(stem: str):
    m = re.match(r'(?i)^(?P<prefix>EIS_whole_spectrum.*)_(?P<rep>\d+)$', stem)
    if not m:
        return None, None
    return m.group('prefix'), int(m.group('rep'))

def unique_target(path: Path) -> Path:
    if not path.exists(): return path
    base, ext = path.stem, path.suffix
    i = 1
    while True:
        cand = path.with_name(f"{base} (copy{'' if i==1 else f' {i}'}){ext}")
        if not cand.exists():
            return cand
        i += 1

def apply_changes(root: Path) -> None:
    if not root.is_dir():
        raise FileNotFoundError(f"Root not found: {root}")

    # Walk concentrations 1mM..13mM
    for conc_dir in sorted(p for p in root.iterdir() if p.is_dir() and p.name in CONCENTRATIONS):
        for temp_dir in sorted(p for p in conc_dir.iterdir() if p.is_dir()):
            print(f"\n== {conc_dir.name} / {temp_dir.name} ==")

            # Group files by base (prefix without _rep)
            groups: Dict[str, Dict[int, Path]] = {}
            for f in temp_dir.iterdir():
                if not f.is_file(): 
                    continue
                if ALLOWED_EXTS and f.suffix.lower() not in ALLOWED_EXTS:
                    continue
                base, rep = parse_replicate(f.stem)
                if base is None:
                    continue
                groups.setdefault(base, {})[rep] = f

            if not groups:
                print("  [info] No matching replicated files here.")
                continue

            # For each base set of replicates: keep _3, delete others, rename kept
            for base, reps in sorted(groups.items()):
                keep = reps.get(3)
                others = [p for r,p in reps.items() if r != 3]

                if keep is None:
                    # No _3 present → delete all replicates of this base
                    print(f"  [warn] No _3 file for: {base}. Deleting {len(others)} replicate(s).")
                    for p in others:
                        print(f"    [delete] {p.name}")
                        if not DRY_RUN:
                            p.unlink(missing_ok=True)
                    continue

                # Delete non-3 replicates
                for p in others:
                    print(f"  [delete] {p.name}")
                    if not DRY_RUN:
                        p.unlink(missing_ok=True)

                # Rename _3 → base (drop suffix)
                target = keep.with_name(f"{base}{keep.suffix}")
                if target == keep:
                    print(f"  [keep]    {keep.name} (already without suffix)")
                else:
                    if target.exists():
                        if CONFLICT == "skip":
                            print(f"  [skip]    rename {keep.name} -> {target.name} (target exists)")
                        elif CONFLICT == "overwrite":
                            print(f"  [overwrite] {target.name} then rename {keep.name}")
                            if not DRY_RUN:
                                target.unlink(missing_ok=True)
                                keep.rename(target)
                        elif CONFLICT == "rename":
                            new_target = unique_target(target)
                            print(f"  [rename]  {keep.name} -> {new_target.name}")
                            if not DRY_RUN:
                                keep.rename(new_target)
                        else:
                            raise ValueError("CONFLICT must be overwrite|skip|rename")
                    else:
                        print(f"  [rename]  {keep.name} -> {target.name}")
                        if not DRY_RUN:
                            keep.rename(target)

# Run (inspect output first; set DRY_RUN=False above to apply)
apply_changes(ROOT_DIR)



== 10mM / 26C ==
  [delete] EIS_whole_spectrum_10mM_H2SO4_26C_60kHz_0.1Hz_pH=1.864_1.csv
  [delete] EIS_whole_spectrum_10mM_H2SO4_26C_60kHz_0.1Hz_pH=1.864_2.csv
  [delete] EIS_whole_spectrum_10mM_H2SO4_26C_60kHz_0.1Hz_pH=1.864_4.csv
  [delete] EIS_whole_spectrum_10mM_H2SO4_26C_60kHz_0.1Hz_pH=1.864_5.csv
  [rename]  EIS_whole_spectrum_10mM_H2SO4_26C_60kHz_0.1Hz_pH=1.864_3.csv -> EIS_whole_spectrum_10mM_H2SO4_26C_60kHz_0.1Hz_pH=1.864.csv

== 10mM / 28C ==
  [delete] EIS_whole_spectrum_10mM_H2SO4_28C_60kHz_0.1Hz_pH=1.870_1.csv
  [delete] EIS_whole_spectrum_10mM_H2SO4_28C_60kHz_0.1Hz_pH=1.870_2.csv
  [delete] EIS_whole_spectrum_10mM_H2SO4_28C_60kHz_0.1Hz_pH=1.870_4.csv
  [delete] EIS_whole_spectrum_10mM_H2SO4_28C_60kHz_0.1Hz_pH=1.870_5.csv
  [rename]  EIS_whole_spectrum_10mM_H2SO4_28C_60kHz_0.1Hz_pH=1.870_3.csv -> EIS_whole_spectrum_10mM_H2SO4_28C_60kHz_0.1Hz_pH=1.870.csv

== 10mM / 30C ==
  [delete] EIS_whole_spectrum_10mM_H2SO4_30C_60kHz_0.1Hz_pH=1.873_1.csv
  [delete] EIS_whole_spectru