# Startup Shear Protocol Validation

**Validates startup shear data: σ(t) at constant γ̇ (stress evolution during startup)**

## Protocol Description

Startup shear experiments apply a constant shear rate and measure the stress response over time.
This reveals transient material behavior including stress overshoot (thixotropic signature).

## Validation Checks

1. **Schema validation**: Required columns present (time, stress)
2. **Finite values**: No NaN or Inf in data arrays
3. **Positive time**: t ≥ 0 (non-negative)
4. **Monotonic time**: Time strictly increasing
5. **Positive stress**: σ > 0 after initial ramp
6. **Overshoot detection**: Identify σ_max/σ_ss ratio

## Standard Plots

- σ(t) vs t
- σ(t) vs γ(t) = γ̇·t

In [None]:
# Configuration
MODE = "FAST"  # "FAST" or "FULL"

if MODE == "FAST":
    MAX_FILES = 2
    SKIP_HEAVY_PLOTS = True
    SAVE_ARTIFACTS = False
else:
    MAX_FILES = None
    SKIP_HEAVY_PLOTS = False
    SAVE_ARTIFACTS = True

print(f"Running in {MODE} mode")

In [None]:
from pathlib import Path
import sys

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

# Add project root to path
project_root = Path.cwd()
while not (project_root / "pyproject.toml").exists() and project_root != project_root.parent:
    project_root = project_root.parent
if str(project_root) not in sys.path:
    sys.path.insert(0, str(project_root))

from examples.verification.utils.validation_utils import (
    DatasetValidation,
    ValidationResult,
    check_finite,
    check_monotonic,
    check_positive,
    create_output_directories,
    detect_startup_overshoot,
    get_data_dir,
    plot_startup,
    print_validation_summary,
    write_validation_report,
)

print(f"Project root: {project_root}")

## A) Dataset Inventory

Startup shear data is in the PNAS_DigitalRheometerTwin_Dataset.xlsx file.

In [None]:
data_dir = get_data_dir()
pnas_path = data_dir / "ikh" / "PNAS_DigitalRheometerTwin_Dataset.xlsx"

if pnas_path.exists():
    xl = pd.ExcelFile(pnas_path)
    startup_sheets = [s for s in xl.sheet_names if "StartUp" in s]
    print(f"Found {len(startup_sheets)} startup shear datasets:")
    for i, sheet in enumerate(startup_sheets):
        print(f"  {i+1}. {sheet}")
else:
    print(f"PNAS dataset not found at: {pnas_path}")
    startup_sheets = []

if MAX_FILES is not None and startup_sheets:
    startup_sheets = startup_sheets[:MAX_FILES]
    print(f"\nProcessing {len(startup_sheets)} sheets (FAST mode)")

## B) Data Loading

In [None]:
def load_startup_data(xlsx_path: Path, sheet_name: str) -> tuple[np.ndarray, np.ndarray, float, str]:
    """Load startup shear data from PNAS Excel file.
    
    Returns:
        Tuple of (time, stress, gamma_dot, status_message)
    """
    try:
        df = pd.read_excel(xlsx_path, sheet_name=sheet_name, header=None)
    except Exception as e:
        return None, None, None, f"Failed to read sheet: {e}"
    
    # Parse shear rate from sheet name (e.g., "StartUp_0.056" -> 0.056)
    try:
        gamma_dot = float(sheet_name.split("_")[1])
    except (IndexError, ValueError):
        gamma_dot = 1.0  # Default
    
    # Find header row (contains "Step time" or "time")
    header_row = None
    for i in range(min(5, len(df))):
        row_str = df.iloc[i].astype(str).str.lower()
        if row_str.str.contains("step time|time", regex=True).any():
            header_row = i
            break
    
    if header_row is None:
        return None, None, None, "Could not find header row"
    
    # Skip header and units rows
    data_start = header_row + 2  # Skip header and units
    
    # Find time and stress columns
    headers = df.iloc[header_row].astype(str).str.lower()
    time_col = None
    stress_col = None
    
    for i, h in enumerate(headers):
        if "time" in h:
            time_col = i
        elif "stress" in h:
            stress_col = i
    
    if time_col is None or stress_col is None:
        # Default to first two columns
        time_col = 0
        stress_col = 1
    
    try:
        time = pd.to_numeric(df.iloc[data_start:, time_col], errors="coerce").values
        stress = pd.to_numeric(df.iloc[data_start:, stress_col], errors="coerce").values
    except Exception as e:
        return None, None, None, f"Numeric conversion failed: {e}"
    
    # Remove NaN values
    mask = np.isfinite(time) & np.isfinite(stress)
    time = time[mask]
    stress = stress[mask]
    
    # Subsample if very large
    if len(time) > 500:
        indices = np.linspace(0, len(time) - 1, 500, dtype=int)
        time = time[indices]
        stress = stress[indices]
    
    return time, stress, gamma_dot, f"Loaded {len(time)} points at γ̇ = {gamma_dot} 1/s"

# Test loading
if startup_sheets and pnas_path.exists():
    test_sheet = startup_sheets[0]
    time, stress, gamma_dot, msg = load_startup_data(pnas_path, test_sheet)
    print(f"Test load: {test_sheet}")
    print(f"  {msg}")
    if time is not None:
        print(f"  t range: [{time.min():.2e}, {time.max():.2e}] s")
        print(f"  σ range: [{stress.min():.2e}, {stress.max():.2e}] Pa")

## C) Validation Pipeline

In [None]:
def validate_startup(xlsx_path: Path, sheet_name: str) -> DatasetValidation:
    """Run all validation checks on a startup shear dataset."""
    validation = DatasetValidation(
        file_path=f"{xlsx_path}::{sheet_name}",
        protocol="startup_shear",
    )
    
    time, stress, gamma_dot, load_msg = load_startup_data(xlsx_path, sheet_name)
    
    if time is None:
        validation.results.append(ValidationResult(
            check_name="data_loading",
            passed=False,
            message=load_msg,
        ))
        return validation
    
    validation.results.append(ValidationResult(
        check_name="data_loading",
        passed=True,
        message=load_msg,
        details={"n_points": len(time), "gamma_dot": gamma_dot},
    ))
    
    # Check 1: Finite values
    validation.results.append(check_finite(time, "time"))
    validation.results.append(check_finite(stress, "stress"))
    
    # Check 2: Non-negative time (startup can start at t=0)
    validation.results.append(check_positive(time, "time", strict=False))
    
    # Check 3: Monotonic time
    validation.results.append(check_monotonic(time, "time", increasing=True, strict=True))
    
    # Check 4: Positive stress (after initial points)
    n_skip = max(1, len(stress) // 20)  # Skip first 5%
    stress_after_ramp = stress[n_skip:]
    validation.results.append(check_positive(stress_after_ramp, "stress_after_ramp", strict=True))
    
    # Check 5: Overshoot detection
    overshoot_info = detect_startup_overshoot(time, stress)
    validation.results.append(ValidationResult(
        check_name="overshoot_detection",
        passed=True,  # Informational
        message=f"Overshoot ratio = {overshoot_info['overshoot_ratio']:.2f}" +
                (f" at t = {overshoot_info['t_peak']:.3f} s" if overshoot_info['has_overshoot'] else " (no overshoot)"),
        details=overshoot_info,
    ))
    
    validation.derived_quantities = {
        "time": time,
        "stress": stress,
        "gamma_dot": gamma_dot,
        "overshoot_info": overshoot_info,
    }
    
    return validation

In [None]:
# Run validation on all sheets
validations = []

for sheet_name in startup_sheets:
    print(f"\nValidating: {sheet_name}")
    v = validate_startup(pnas_path, sheet_name)
    validations.append(v)
    
    for r in v.results:
        status = "PASS" if r.passed else "FAIL"
        print(f"  [{status}] {r.check_name}: {r.message}")

## D) Standard Plots

In [None]:
if not SKIP_HEAVY_PLOTS:
    output_paths = create_output_directories("startup_shear")
    
    for v in validations:
        if v.passed and "time" in v.derived_quantities:
            sheet_name = Path(v.file_path).name.split("::")[1] if "::" in v.file_path else "unknown"
            save_path = output_paths["plots"] / f"{sheet_name}_startup.png" if SAVE_ARTIFACTS else None
            
            fig = plot_startup(
                v.derived_quantities["time"],
                v.derived_quantities["stress"],
                v.derived_quantities["overshoot_info"],
                gamma_dot=v.derived_quantities["gamma_dot"],
                save_path=save_path,
                title=sheet_name,
            )
            plt.show()
else:
    for v in validations:
        if v.passed and "time" in v.derived_quantities:
            sheet_name = v.file_path.split("::")[1] if "::" in v.file_path else "unknown"
            fig = plot_startup(
                v.derived_quantities["time"],
                v.derived_quantities["stress"],
                v.derived_quantities["overshoot_info"],
                gamma_dot=v.derived_quantities["gamma_dot"],
                title=sheet_name,
            )
            plt.show()
            break

## E) Validation Summary

In [None]:
print_validation_summary(validations)

## F) Export Artifacts

In [None]:
if SAVE_ARTIFACTS:
    output_paths = create_output_directories("startup_shear")
    
    report = {
        "protocol": "startup_shear",
        "mode": MODE,
        "n_files_validated": len(validations),
        "all_passed": all(v.passed for v in validations),
        "validations": validations,
    }
    
    report_path = output_paths["plots"].parent / "validation_report.json"
    write_validation_report(report, report_path)
    print(f"Validation report saved to: {report_path}")
    
    for v in validations:
        if v.passed and "time" in v.derived_quantities:
            sheet_name = v.file_path.split("::")[1] if "::" in v.file_path else "unknown"
            df = pd.DataFrame({
                "time": v.derived_quantities["time"],
                "stress": v.derived_quantities["stress"],
            })
            df.to_csv(output_paths["derived_quantities"] / f"{sheet_name}_derived.csv", index=False)
    
    print(f"Derived quantities saved to: {output_paths['derived_quantities']}")
else:
    print("Artifacts not saved (FAST mode). Set MODE='FULL' to save.")