In [1]:
#EXCEL_PATH = r"D:\FILIP\DOKTORSKE STUDIJE\III GODINA\AIC M21 CASOPIS\MATLAB CODE\1.PRIPREMLJENA BAZA PODATAKA\FUNDAMENTAL PERIOD PYTHON.xlsx"

In [2]:
# Great it works!

# Can you now provide directly in the same way to jupyter-lab updated script for: 
# Script 5 — LASSO Polynomial Regression (sparse closed-form)
# Script 4 — Additive Spline GAM (explicit equation)
# Script 3 — Model Tree (piecewise linear equations per region)
# Script 2 — MARS (py-earth) to get piecewise-linear equations
# Script 1 — Symbolic Regression

In [3]:
# === Script 4 (self-contained): Additive Spline GAM — hand-calculable piecewise formula ===

import os, json, math, warnings, re
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import RidgeCV
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
from patsy import dmatrix
from sympy import init_printing
init_printing(use_latex='mathjax')
warnings.filterwarnings("ignore")
import time
start = time.time()
# ---------- CONFIG ----------
EXCEL_PATH = r"D:\FILIP\DOKTORSKE STUDIJE\IIIII GODINA\8.CSP - NOVA KNJIGA SA VM\MOJE POGLAVLJE\CASE STUDIES\CFST columns Dataset.xlsx"
SHEET      = 0
FEATURES = ["B","t","L","fy","fc"]
TARGET = "Nexp"

TEST_SIZE   = 0.20
RANDOM_SEED = 42

# Spline settings (deterministic)
SPLINE_DEG     = 3            # cubic
N_INTERIOR_K   = 5            # interior knots per feature
KNOT_QRANGE    = (0.05, 0.95) # quantile range for interior knots
INCLUDE_INTERC = False        # keep model intercept separate (β0)

# Ridge regularization
RIDGE_ALPHAS = np.logspace(-6, 3, 15)

OUTDIR = "out_gam_additive_cfst_handcalc"
os.makedirs(OUTDIR, exist_ok=True)

# ---------- LOAD ----------
df = pd.read_excel(EXCEL_PATH, sheet_name=SHEET)
missing = [c for c in FEATURES + [TARGET] if c not in df.columns]
if missing:
    raise ValueError(f"Missing columns: {missing}\nPresent: {list(df.columns)}")

Xdf = df[FEATURES].apply(pd.to_numeric, errors="coerce")
y   = pd.to_numeric(df[TARGET], errors="coerce").values
mask = np.isfinite(Xdf.values).all(axis=1) & np.isfinite(y)
Xdf, y = Xdf.loc[mask].reset_index(drop=True), y[mask]

Xdf_tr, Xdf_te, y_tr, y_te = train_test_split(Xdf, y, test_size=TEST_SIZE, random_state=RANDOM_SEED)

# ---------- FIXED KNOTS & BOUNDS (train only) ----------
def interior_knots(x: pd.Series, n_k: int, qrange):
    if n_k <= 0: return []
    qs = np.linspace(qrange[0], qrange[1], n_k + 2)[1:-1]  # interior only
    ks = np.quantile(x.values, qs)
    return np.unique(ks.round(12)).tolist()

knots_interior = {f: interior_knots(Xdf_tr[f], N_INTERIOR_K, KNOT_QRANGE) for f in FEATURES}
bounds = {f: (float(Xdf_tr[f].min()), float(Xdf_tr[f].max())) for f in FEATURES}

def bs_term(name, knots, lb, ub, deg=SPLINE_DEG, include_intercept=INCLUDE_INTERC):
    klist = "[" + ",".join(f"{k:.12g}" for k in knots) + "]"
    return f"bs({name}, knots={klist}, lower_bound={lb:.12g}, upper_bound={ub:.12g}, degree={deg}, include_intercept={str(include_intercept)})"

FORMULA = " + ".join([bs_term(f, knots_interior[f], bounds[f][0], bounds[f][1]) for f in FEATURES])

# ---------- DESIGN & FIT ----------
Z_tr_df = dmatrix(FORMULA, Xdf_tr, return_type="dataframe")
Z_te_df = dmatrix(FORMULA, Xdf_te, return_type="dataframe")
col_names = list(Z_tr_df.columns)
Z_tr, Z_te = np.asarray(Z_tr_df), np.asarray(Z_te_df)

ridge = RidgeCV(alphas=RIDGE_ALPHAS, fit_intercept=True)
ridge.fit(Z_tr, y_tr)

def metrics(y_true, y_pred):
    return dict(
        R2   = r2_score(y_true, y_pred),
        MAE  = mean_absolute_error(y_true, y_pred),
        RMSE = math.sqrt(((y_true - y_pred)**2).mean()),
    )

yhat_tr = ridge.predict(Z_tr)
yhat_te = ridge.predict(Z_te)
m_train, m_test = metrics(y_tr, yhat_tr), metrics(y_te, yhat_te)

intercept = float(ridge.intercept_)
coef = ridge.coef_.ravel()

# ---------- GROUP COLUMNS BY FEATURE & BASIS INDEX ----------
def parse_col(col):
    # e.g. "bs(NoSt, knots=[...], lower_bound=..., upper_bound=..., degree=3, include_intercept=False)[2]"
    m_feat = re.match(r"bs\(([^,]+),", col)
    m_idx  = re.search(r"\[(\d+)\]\s*$", col)
    feat = m_feat.group(1) if m_feat else "UNKNOWN"
    idx  = int(m_idx.group(1)) if m_idx else None
    return feat, idx

by_feat = {}
for cval, cname in zip(coef, col_names):
    f, k = parse_col(cname)
    by_feat.setdefault(f, []).append((k, cval, cname))
for f in by_feat:
    by_feat[f].sort(key=lambda tup: tup[0])  # sort by basis index

# ---------- BUILD EXPLICIT PIECEWISE POLYNOMIALS (SymPy) ----------
from sympy import symbols, Piecewise, bspline_basis, S, Tuple, simplify, poly
from IPython.display import Math, display

sym_vars = {name: symbols(name, real=True) for name in FEATURES}

def clamped_knot_tuple(lb, knots, ub, kdeg):
    seq = [S(lb)]*(kdeg+1) + [S(k) for k in knots] + [S(ub)]*(kdeg+1)
    return Tuple(*seq)

# ---------- BUILD EXPLICIT PIECEWISE POLYNOMIALS (SymPy) ----------
from sympy import symbols, Piecewise, bspline_basis, S, Tuple, simplify, poly, piecewise_fold
from IPython.display import Math, display

sym_vars = {name: symbols(name, real=True) for name in FEATURES}

def clamped_knot_tuple(lb, knots, ub, kdeg):
    # SymPy needs a Tuple of SymPy numbers with clamped (kdeg+1) endpoints
    seq = [S(lb)]*(kdeg+1) + [S(k) for k in knots] + [S(ub)]*(kdeg+1)
    return Tuple(*seq)

sym_basis   = {}   # feature -> list of (basis_index, Piecewise-or-Expr)
basis_labels= {}   # feature -> list of column names in order

for f in FEATURES:
    # columns for this feature, sorted by trailing [k]
    f_cols = [c for c in col_names if c.startswith(f"bs({f},")]
    if not f_cols:
        continue

    def _idx_of(col):
        m = re.search(r"\[(\d+)\]\s*$", col)
        return int(m.group(1)) if m else 0

    f_cols.sort(key=_idx_of)

    x   = sym_vars[f]
    lb, ub = bounds[f]
    interior = knots_interior[f]
    T = clamped_knot_tuple(lb, interior, ub, SPLINE_DEG)

    basis_list = []
    for col in f_cols:
        k_idx = _idx_of(col)
        # ✅ Correct signature: (degree, knots, index, x)
        B = bspline_basis(SPLINE_DEG, T, k_idx, x)
        # Ensure canonical piecewise form if applicable
        B = piecewise_fold(B)
        B = simplify(B)
        basis_list.append((k_idx, B))

    sym_basis[f]    = basis_list
    basis_labels[f] = f_cols

# ---------- WRITE FULL HAND-CALCULABLE FORMULA ----------
# 1) Compact display in notebook
display(Math(r"y = \beta_0 + \sum_{j}\sum_{k} c_{j,k}\,B_{j,k}(x_j)\quad\text{(each }B_{j,k}\text{ is piecewise cubic)}"))
print("\n=== Additive Spline GAM — Fundamental Period (TFP) ===")
print(f"degree={SPLINE_DEG} | interior_knots_per_feature={N_INTERIOR_K} | knot_quantile_range={KNOT_QRANGE}")
print("Train:", {k: round(v, 6) for k, v in m_train.items()})
print("Test :", {k: round(v, 6) for k, v in m_test.items()})

# 2) Save detailed piecewise polynomials
detail_path = os.path.join(OUTDIR, "hand_calculable_piecewise.txt")
with open(detail_path, "w", encoding="utf-8") as f:
    f.write("Hand-calculable piecewise cubic polynomials for Additive Spline GAM\n")
    f.write(f"Intercept (beta0): {intercept:.12g}\n\n")

    for ftr in FEATURES:
        if ftr not in sym_basis:
            continue
        x = sym_vars[ftr]
        lb, ub = bounds[ftr]
        f.write(f"==== Feature: {ftr}  (domain: [{lb:.12g}, {ub:.12g}]) ====\n")
        f.write(f"Interior knots: {', '.join(f'{k:.12g}' for k in knots_interior[ftr]) or '(none)'}\n\n")

        # coefficients c_k aligned to basis order
        ck_list = []
        for col in basis_labels[ftr]:
            m = re.search(r"\[(\d+)\]\s*$", col)
            k_idx = int(m.group(1)) if m else 0
            cval  = coef[col_names.index(col)]
            ck_list.append((k_idx, cval))
        ck_list.sort(key=lambda t: t[0])
        f.write("Coefficients c_{k} for basis B_{k}:\n")
        for k_idx, cval in ck_list:
            f.write(f"  k={k_idx}: c={cval:.12g}\n")
        f.write("\n")

        # Each basis as explicit piecewise polynomial with intervals
        for (k_idx, Bexpr) in sym_basis[ftr]:
            f.write(f"B_{ftr},{k_idx}({ftr}) =\n")
            if isinstance(Bexpr, Piecewise):
                for expr, cond in Bexpr.args:
                    # Try to show as a*x^3 + b*x^2 + c*x + d (lower degree ok at edges)
                    try:
                        p = poly(expr, x)
                        coeffs = [float(p.nth(i)) for i in range(3, -1, -1)]
                        expr_txt = f"{coeffs[0]:+.12g}*{ftr}**3 {coeffs[1]:+.12g}*{ftr}**2 {coeffs[2]:+.12g}*{ftr} {coeffs[3]:+.12g}"
                    except Exception:
                        expr_txt = str(expr)
                    f.write(f"  {expr_txt}   if  {str(cond)}\n")
                f.write("  0  otherwise\n\n")
            else:
                # Non-piecewise (rare for interior splines), still expand/print
                try:
                    p = poly(Bexpr, x)
                    coeffs = [float(p.nth(i)) for i in range(3, -1, -1)]
                    expr_txt = f"{coeffs[0]:+.12g}*{ftr}**3 {coeffs[1]:+.12g}*{ftr}**2 {coeffs[2]:+.12g}*{ftr} {coeffs[3]:+.12g}"
                except Exception:
                    expr_txt = str(Bexpr)
                f.write(f"  {expr_txt}   on its support; 0 otherwise\n\n")

    # Final y in basis-label form
    f.write("\n==== Final equation (basis-label form) ====\n")
    f.write(f"y = {intercept:.12g}")
    for ftr in FEATURES:
        if ftr not in basis_labels:
            continue
        for col in basis_labels[ftr]:
            m = re.search(r"\[(\d+)\]\s*$", col)
            k_idx = int(m.group(1)) if m else 0
            cval  = coef[col_names.index(col)]
            f.write(f" {cval:+.12g}*B_{ftr},{k_idx}({ftr})")
    f.write("\n")

print(f"\nDetailed hand-calculable piecewise polynomials saved to:\n  {os.path.abspath(detail_path)}")


# === INLINE FULL MATH DISPLAY (no truncation) ================================
from IPython.display import Math, display
from sympy import latex as _sym_latex
import re

# Helper: pretty sign+magnitude
def _sgn_term(c, sig=6):
    s = f"{abs(float(c)):.{sig}g}"
    return (" + " if c >= 0 else " - ") + s

# Helper: show a very long LaTeX equation by splitting into blocks (prevents MathJax dropouts)
def _display_in_blocks(prefix, items, block_size=20):
    if not items:
        display(Math(prefix + r"\; 0"))
        return
    for i in range(0, len(items), block_size):
        chunk = items[i:i+block_size]
        line = prefix if i == 0 else r"\quad +"
        for piece in chunk:
            line += piece
        display(Math(line))

# 0) Intercept
display(Math(rf"\beta_0 = {intercept:.6g}"))

# 1) Full additive components per feature (ALL terms)
for feat in FEATURES:
    if feat not in basis_labels:
        display(Math(rf"f_{{{feat}}}({feat}) = 0"))
        continue

    # Build all terms for this feature, in patsy column order
    terms = []
    for col in basis_labels[feat]:
        cval = float(coef[col_names.index(col)])
        k = int(re.search(r"\[(\d+)\]\s*$", col).group(1))
        terms.append(_sgn_term(cval) + rf"\,B_{{{feat},{k}}}({feat})")

    # Show as multiple Math blocks to avoid overly long single lines
    _display_in_blocks(rf"f_{{{feat}}}({feat}) \;=\;", terms, block_size=20)

# 2) Optional: global equation y = beta0 + sum f_feat(feat), expanded by features
#    (Shown as a header + the f_feat lines already printed)
display(Math(r"y \;=\; \beta_0 \;+\; \sum_j f_j(x_j)"))

# 3) Show EVERY piecewise definition for EVERY basis (can be long!)
for feat in FEATURES:
    if feat not in sym_basis:
        continue
    display(Math(rf"\textbf{{Piecewise bases for }} {feat}:"))
    for (k_idx, Bexpr) in sym_basis[feat]:
        display(Math(rf"B_{{{feat},{k_idx}}}({feat}) \;=\; " + _sym_latex(Bexpr)))

# 4) (Very long) Single global equation with ALL terms
all_terms = []
for feat in FEATURES:
    if feat not in basis_labels:
        continue
    for col in basis_labels[feat]:
        cval = float(coef[col_names.index(col)])
        k = int(re.search(r"\[(\d+)\]\s*$", col).group(1))
        all_terms.append(_sgn_term(cval) + rf"\,B_{{{feat},{k}}}({feat})")

_display_in_blocks(rf"y \;=\; {intercept:.6g}", all_terms, block_size=20)

end = time.time()
running_time = (end - start)
print('Running Time: ', running_time, ' seconds')

<IPython.core.display.Math object>


=== Additive Spline GAM — Fundamental Period (TFP) ===
degree=3 | interior_knots_per_feature=5 | knot_quantile_range=(0.05, 0.95)
Train: {'R2': 0.915754, 'MAE': 441.156458, 'RMSE': 660.898191}
Test : {'R2': 0.875135, 'MAE': 542.577366, 'RMSE': 840.227463}

Detailed hand-calculable piecewise polynomials saved to:
  C:\Users\filip\Documents\POGLAVLJE KNJIGE\out_gam_additive_cfst_handcalc\hand_calculable_piecewise.txt


<IPython.core.display.Math object>

<IPython.core.display.Math object>

<IPython.core.display.Math object>

<IPython.core.display.Math object>

<IPython.core.display.Math object>

<IPython.core.display.Math object>

<IPython.core.display.Math object>

<IPython.core.display.Math object>

<IPython.core.display.Math object>

<IPython.core.display.Math object>

<IPython.core.display.Math object>

<IPython.core.display.Math object>

<IPython.core.display.Math object>

<IPython.core.display.Math object>

<IPython.core.display.Math object>

<IPython.core.display.Math object>

<IPython.core.display.Math object>

<IPython.core.display.Math object>

<IPython.core.display.Math object>

<IPython.core.display.Math object>

<IPython.core.display.Math object>

<IPython.core.display.Math object>

<IPython.core.display.Math object>

<IPython.core.display.Math object>

<IPython.core.display.Math object>

<IPython.core.display.Math object>

<IPython.core.display.Math object>

<IPython.core.display.Math object>

<IPython.core.display.Math object>

<IPython.core.display.Math object>

<IPython.core.display.Math object>

<IPython.core.display.Math object>

<IPython.core.display.Math object>

<IPython.core.display.Math object>

<IPython.core.display.Math object>

<IPython.core.display.Math object>

<IPython.core.display.Math object>

<IPython.core.display.Math object>

<IPython.core.display.Math object>

<IPython.core.display.Math object>

<IPython.core.display.Math object>

<IPython.core.display.Math object>

<IPython.core.display.Math object>

<IPython.core.display.Math object>

<IPython.core.display.Math object>

<IPython.core.display.Math object>

<IPython.core.display.Math object>

<IPython.core.display.Math object>

<IPython.core.display.Math object>

<IPython.core.display.Math object>

<IPython.core.display.Math object>

<IPython.core.display.Math object>

<IPython.core.display.Math object>

<IPython.core.display.Math object>

Running Time:  16.53845477104187  seconds


In [4]:
# Tuning guidance

# Simpler curves: lower --n_knots (e.g., 5–6), raise --ridge_alpha (e.g., 1e-2 to 1e-1).

# More flexible: increase --n_knots (e.g., 10–12).

# If you ever want mild interactions (e.g., allow NoSt × OP smooth), we can extend this to add a few 2D tensor-product spline blocks — just say the word.