In [74]:
import pandas as pd
import lasio 
import numpy as np

las = lasio.read(r"BATAI-1.las")
df = las.df()

In [75]:
df.columns

Index(['CALI_UM', 'DT_UM', 'GR_UM', 'NPHI_UM', 'RHOB_UM', 'RT_UM', 'VSHL_UM',
       'PHIA_UM', 'PHIE_UM', 'SWA_UM', 'SWT', 'GROSS_UM', 'NET_UM', 'NTG_UM',
       'PAY_UM'],
      dtype='object')

In [76]:
df.head()

Unnamed: 0_level_0,CALI_UM,DT_UM,GR_UM,NPHI_UM,RHOB_UM,RT_UM,VSHL_UM,PHIA_UM,PHIE_UM,SWA_UM,SWT,GROSS_UM,NET_UM,NTG_UM,PAY_UM
DEPT,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
849.9348,,,5.3588,,,,0.0,,,,,1.0,,,
850.0872,,,7.3132,,,,0.0,,,,,1.0,,,
850.2396,,,8.2444,,,,0.0,,,,,1.0,,,
850.392,,,7.6849,,,,0.0,,,,,1.0,,,
850.5444,,,7.0132,,,,0.0,,,,,1.0,,,


In [77]:
import numpy as np
import pandas as pd
from typing import List, Tuple, Dict, Optional
from scipy.integrate import trapezoid

# ------------------ helpers ------------------
def _prepare_df(df_in: pd.DataFrame, depth_col_name: str = "MD") -> pd.DataFrame:
    """Ensure depth is a numeric column named depth_col_name, sorted ascending."""
    df = df_in.copy()
    # If index contains depth info
    if df.index.name is not None and df.index.name.upper() in ("DEPT", "DEPTH", "MD", "MDEPTH"):
        df = df.reset_index().rename(columns={df.index.name: depth_col_name})
    else:
        # detect common depth column names
        candidates = [c for c in df.columns if c.upper() in ("DEPT", "DEPTH", "MD", "MDEPTH", "DEPT.M")]
        if candidates and depth_col_name not in df.columns:
            df = df.rename(columns={candidates[0]: depth_col_name})
        elif depth_col_name not in df.columns:
            # numeric unnamed index -> reset it into depth_col_name
            if np.issubdtype(df.index.dtype, np.number):
                df = df.reset_index().rename(columns={"index": depth_col_name})
            else:
                raise ValueError("Depth column not found. Provide a DataFrame with a depth column or numeric index.")
    # Make sure depth is numeric and drop rows without depth
    df[depth_col_name] = pd.to_numeric(df[depth_col_name], errors="coerce")
    df = df.dropna(subset=[depth_col_name])
    df = df.sort_values(by=depth_col_name).reset_index(drop=True)
    return df


def _weighted_average_over_depth(z: np.ndarray, y: np.ndarray) -> Optional[float]:
    """Depth-weighted average of y over z (trapz/integral / total length)."""
    mask = ~np.isnan(y)
    if mask.sum() == 0:
        return None
    zv = z[mask]
    yv = y[mask]
    if len(zv) == 1:
        return float(yv[0])
    total_len = float(zv[-1] - zv[0])
    if total_len <= 1e-6:  # avoid division by ~0
        return float(np.mean(yv))
    integral = float(trapezoid(yv, zv))
    return integral / total_len


def _to_indicator(series: pd.Series) -> np.ndarray:
    """
    Convert a series to an indicator array (1.0 where flag is present, 0.0 otherwise).
    Accepts numeric (non-zero -> 1), booleans, and common truthy strings ('y','yes','true','1').
    """
    if series.dtype == bool:
        return series.astype(float).values
    s_str = series.astype(str).str.strip().str.lower()
    true_set = {"1", "y", "yes", "true", "t", "on"}
    false_set = {"0", "n", "no", "false", "f", "off"}
    true_mask = s_str.isin(true_set)
    numeric = pd.to_numeric(series, errors="coerce")
    # Build indicator: true_mask ->1 ; else numeric non-zero ->1 ; else 0
    ind = np.where(true_mask, 1.0, np.where(~np.isnan(numeric) & (numeric != 0), 1.0, 0.0))
    return ind.astype(float)


def _integrate_indicator_over_depth(z: np.ndarray, ind: np.ndarray) -> float:
    """Integrate indicator (1/0) over z to get thickness (trapz)."""
    if len(z) < 2:
        # single sample -> thickness 0.0 (no interval)
        return 0.0
    order = np.argsort(z)
    return float(trapezoid(ind[order], z[order]))


# ------------------ main function ------------------
def generate_zone_summary_from_df(
    df: pd.DataFrame,
    zones: List[Tuple[float, float]],
    kb: float,
    curve_names: Dict[str, str],
    depth_col_name: str = "MD",
    round_decimals: int = 2,
    clip_to_data: bool = True
) -> pd.DataFrame:
    """
    Generate petrophysical zone summary from a DataFrame.

    Parameters
    ----------
    df : pd.DataFrame
        DataFrame containing depth and curves.
    zones : list of (top_md, base_md)
        MD intervals (units must match MD and KB).
    kb : float
        Kelly Bushing (TVDSS = MD - kb).
    curve_names : dict
        Mapping of keys -> column names in df. Required keys (recommended):
            {
              "vsh": "Vshl_UM",
              "phie": "PHIE_UM",
              "sw": "SwA_UM",
              "gross": "Gross_UM",
              "net": "Net_UM",
              "pay": "Pay_UM"
            }
    depth_col_name : str
        Depth column target name (default "MD"). Function will try to detect commonly-named depth columns.
    round_decimals : int
        Rounding for printed numeric results.
    clip_to_data : bool
        If True, clip requested zones to data min/max.

    Returns
    -------
    pd.DataFrame
        Summary table with columns: MD(m), TVDSS(m), Avg Shale, Avg PHIE, Avg PHIA, Avg SW,
        Gross(m), NET(m), N-G Ratio, Pay(m)
    """
    # Ensure required keys exist in mapping (fill defaults if missing)
    defaults = {
        "vsh": "Vshl_UM",
        "phie": "PHIE_UM",
        "phia": "PHIA_UM",
        "sw": "SwA_UM",
        "gross": "Gross_UM",
        "net": "Net_UM",
        "pay": "Pay_UM"
    }
    for k, v in defaults.items():
        curve_names.setdefault(k, v)

    df0 = _prepare_df(df, depth_col_name=depth_col_name)
    md_min, md_max = df0[depth_col_name].min(), df0[depth_col_name].max()

    rows = []
    for top, base in zones:
        top_md, base_md = (top, base) if top <= base else (base, top)
        if clip_to_data:
            top_md = max(top_md, md_min)
            base_md = min(base_md, md_max)
        # select zone rows inclusive
        mask_zone = (df0[depth_col_name] >= top_md) & (df0[depth_col_name] <= base_md)
        df_zone = df0.loc[mask_zone].copy()
        if df_zone.shape[0] == 0:
            # no data in zone
            row = {
                "MD(m)": f"{top_md:.{round_decimals}f}-{base_md:.{round_decimals}f}",
                "TVDSS(m)": f"{(top_md - kb):.{round_decimals}f}-{(base_md - kb):.{round_decimals}f}",
                "Avg Shale": np.nan, "Avg PHIE": np.nan, "Avg PHIA": np.nan, "Avg SW": np.nan,
                "Gross(m)": 0.0, "NET(m)": 0.0, "N-G Ratio": np.nan, "Pay(m)": 0.0
            }
            rows.append(row)
            continue

        z = df_zone[depth_col_name].values

        def avg_for(key):
            cname = curve_names.get(key)
            if cname not in df_zone.columns:
                return None
            y = pd.to_numeric(df_zone[cname], errors="coerce").values
            return _weighted_average_over_depth(z, y)

        avg_vsh = avg_for("vsh")
        avg_phie = avg_for("phie")
        avg_phia = avg_for("phia")
        avg_sw = avg_for("sw")

        def thickness_for_flag(key):
            cname = curve_names.get(key)
            if cname not in df_zone.columns:
                return 0.0
            # Convert the column into an indicator automatically
            ind = _to_indicator(df_zone[cname])
            return _integrate_indicator_over_depth(z, ind)

        gross_t = thickness_for_flag("gross")
        net_t = thickness_for_flag("net")
        pay_t = thickness_for_flag("pay")
        ng_ratio = (net_t / gross_t) if gross_t > 0 else np.nan

        rows.append({
            "MD(m)": f"{top_md:.{round_decimals}f}-{base_md:.{round_decimals}f}",
            "TVDSS(m)": f"{(top_md - kb):.{round_decimals}f}-{(base_md - kb):.{round_decimals}f}",
            "Avg Shale": (round(avg_vsh, round_decimals) if avg_vsh is not None else np.nan),
            "Avg PHIE": (round(avg_phie, round_decimals) if avg_phie is not None else np.nan),
            "Avg PHIA": (round(avg_phia, round_decimals) if avg_phia is not None else np.nan),
            "Avg SW": (round(avg_sw, round_decimals) if avg_sw is not None else np.nan),
            "Gross(m)": round(gross_t, round_decimals),
            "NET(m)": round(net_t, round_decimals),
            "N-G Ratio": (round(ng_ratio, round_decimals) if not np.isnan(ng_ratio) else np.nan),
            "Pay(m)": round(pay_t, round_decimals)
        })

    summary_df = pd.DataFrame(rows, columns=[
        "MD(m)", "TVDSS(m)", "Avg Shale", "Avg PHIE", "Avg PHIA", "Avg SW",
        "Gross(m)", "NET(m)", "N-G Ratio", "Pay(m)"
    ])
    return summary_df


In [78]:
df.columns

Index(['CALI_UM', 'DT_UM', 'GR_UM', 'NPHI_UM', 'RHOB_UM', 'RT_UM', 'VSHL_UM',
       'PHIA_UM', 'PHIE_UM', 'SWA_UM', 'SWT', 'GROSS_UM', 'NET_UM', 'NTG_UM',
       'PAY_UM'],
      dtype='object')

In [79]:
curve_map = {
    "vsh": "VSHL_UM",
    "phie": "PHIE_UM",
    "phia" : "PHIA_UM",
    "sw": "SWA_UM",
    "gross": "GROSS_UM",
    "net": "NET_UM",
    "pay": "PAY_UM"
}
zones = [(1894, 1995.1), 
         (1995.1, 2182.1), 
         (2182.1, 2287.1)
         
        ]

kb = 26.2

summary = generate_zone_summary_from_df(df=df, zones=zones, kb=kb, curve_names=curve_map)
print(summary.to_string(index=False))


          MD(m)        TVDSS(m)  Avg Shale  Avg PHIE  Avg PHIA  Avg SW  Gross(m)  NET(m)  N-G Ratio  Pay(m)
1894.00-1995.10 1867.80-1968.90       0.24      0.22      0.30    0.99    101.04  101.04        1.0    0.30
1995.10-2182.10 1968.90-2155.90       0.38      0.17      0.28    1.00    166.27  166.27        1.0    0.46
2182.10-2286.91 2155.90-2260.71       0.30      0.21      0.30    1.00     73.08   73.08        1.0    0.00


In [None]:
#summary.to_excel("petrophysical_summary_BATAI-1.xlsx", index=False)