In [2]:
"""
Math Agent: Fit a model, check R², plot residuals, and save CSV
----------------------------------------------------------------
Agent flow:
- Perception: read/parse (x,y) data (and optional user-added points)
- Reasoning: fit candidate models (linear, quadratic) and compare
- Memory: keep the current dataset and results in variables
- Planning: if R² < target → ask for more data; else stop
- Action: print a report, draw a plot, and export a CSV

Requirements:
    pip install openai matplotlib pandas
(You can skip 'openai' if you leave USE_LLM_FOR_PROMPTS = False.)
"""

from typing import List, Tuple, Dict
import math
import matplotlib.pyplot as plt
import pandas as pd

# --------------------------- 1) (Optional) LLM setup & helper ---------------------------
# Set this to True on *your* machine if you want the LLM to phrase the prompt that asks
# the user for more data. In this notebook, we leave it False to avoid API calls.
USE_LLM_FOR_PROMPTS = False
PRIMARY_MODEL = "gpt-4o-mini"

_openai_available = False
try:
    if USE_LLM_FOR_PROMPTS:
        from openai import OpenAI
        # --- Paste your API key to use the LLM prompt helper ---
        client = OpenAI(api_key="put_your_OpenAI_api_key_here")
        _openai_available = True
except Exception:
    _openai_available = False

def llm_say(system_instruction: str, user_msg: str) -> str:
    """
    Ask an LLM to write a friendly message. If LLM usage is off/unavailable,
    return the plain user_msg so the script still runs anywhere.

    Agent note (Action): This is just a helper to produce a nicer prompt when
    R² is too low and we want more data from the user.
    """
    if not (USE_LLM_FOR_PROMPTS and _openai_available):
        return user_msg  # offline-friendly fallback
    try:
        resp = client.chat.completions.create(
            model=PRIMARY_MODEL,
            messages=[
                {"role": "system", "content": system_instruction},
                {"role": "user", "content": user_msg},
            ],
            temperature=0.3,
        )
        return resp.choices[0].message.content.strip()
    except Exception as e:
        return user_msg + f"\n(Note: LLM phrasing skipped due to: {type(e).__name__}: {e})"

# --------------------------- 2) Utility functions ---------------------------
def parse_pairs(text: str) -> Tuple[List[float], List[float]]:
    """
    Perception: Read raw text like "7,14.8; 8,16.9; 9,18.2" and turn it into numbers.
    Returns x_add, y_add. Raises ValueError if a pair is malformed.
    """
    text = text.strip()
    if not text:
        return [], []
    x_add, y_add = [], []
    for token in text.split(";"):
        token = token.strip()
        if not token:
            continue
        if "," not in token:
            raise ValueError(f"Bad pair (missing comma): {token}")
        xs, ys = token.split(",", 1)
        x_add.append(float(xs.strip()))
        y_add.append(float(ys.strip()))
    return x_add, y_add

def pretty_eq(model_name: str, coeffs: Tuple[float, ...]) -> str:
    """
    Action: Turn coefficients into a readable equation string.
    """
    if model_name == "linear":
        b0, b1 = coeffs
        return f"ŷ = {b0:.4f} + {b1:.4f}·x"
    else:
        a0, a1, a2 = coeffs
        return f"ŷ = {a0:.4f} + {a1:.4f}·x + {a2:.4f}·x²"

# --------------------------- 3) OLS model-fitting tools ---------------------------
def fit_linear(x: List[float], y: List[float]) -> Dict[str, object]:
    """
    Tool: Simple Linear Regression (OLS): y ~ b0 + b1*x
    Returns a dict with: name, b, yhat, resid, SSE, MSE, R2
    """
    n = len(x)
    if n != len(y):
        raise ValueError("x and y must have the same length")
    if n < 2:
        raise ValueError("Need at least two points for a line")

    x_bar = sum(x) / n
    y_bar = sum(y) / n
    Sxy = sum((xi - x_bar) * (yi - y_bar) for xi, yi in zip(x, y))
    Sxx = sum((xi - x_bar) ** 2 for xi in x)
    if Sxx == 0:
        raise ValueError("All x values are identical; cannot fit a line")

    b1 = Sxy / Sxx
    b0 = y_bar - b1 * x_bar
    yhat = [b0 + b1 * xi for xi in x]
    resid = [yi - yhi for yi, yhi in zip(y, yhat)]
    SSE = sum(e * e for e in resid)
    MSE = SSE / (n - 2) if n > 2 else float("nan")
    SST = sum((yi - y_bar) ** 2 for yi in y)
    R2 = 1 - SSE / SST if SST > 0 else float("nan")
    return {"name": "linear", "b": (b0, b1), "yhat": yhat, "resid": resid,
            "SSE": SSE, "MSE": MSE, "R2": R2}

def solve_3x3(A: Tuple[Tuple[float, float, float], ...],
              b: Tuple[float, float, float]) -> Tuple[float, float, float]:
    """
    Tiny solver using Cramer's rule for a 3x3 system.
    (Teaching-friendly, avoids external deps.)
    """
    (a, b1, c), (d, e, f), (g, h, i) = A
    det = a*(e*i - f*h) - b1*(d*i - f*g) + c*(d*h - e*g)
    if det == 0:
        raise ValueError("Singular matrix (cannot solve quadratic)")
    def det3(M):
        (aa, bb, cc), (dd, ee, ff), (gg, hh, ii) = M
        return aa*(ee*ii - ff*hh) - bb*(dd*ii - ff*gg) + cc*(dd*hh - ee*gg)
    A0 = ((b[0], b1,   c), (b[1], e,    f), (b[2], h,    i))
    A1 = ((a,    b[0], c), (d,    b[1], f), (g,    b[2], i))
    A2 = ((a,    b1,   b[0]), (d,  e,   b[1]), (g,  h,   b[2]))
    return (det3(A0)/det, det3(A1)/det, det3(A2)/det)

def fit_quadratic(x: List[float], y: List[float]) -> Dict[str, object]:
    """
    Tool: Quadratic Regression (OLS): y ~ a0 + a1*x + a2*x^2
    """
    n = len(x)
    if n != len(y):
        raise ValueError("x and y must have the same length")
    if n < 3:
        raise ValueError("Need at least three points for a quadratic")

    X0 = n; X1 = sum(x); X2 = sum(xi*xi for xi in x)
    X3 = sum(xi**3 for xi in x); X4 = sum(xi**4 for xi in x)
    Y0 = sum(y)
    Y1 = sum(xi*yi for xi, yi in zip(x, y))
    Y2 = sum((xi*xi)*yi for xi, yi in zip(x, y))

    A = ((X0, X1, X2),
         (X1, X2, X3),
         (X2, X3, X4))
    b = (Y0, Y1, Y2)
    a0, a1, a2 = solve_3x3(A, b)

    yhat = [a0 + a1*xi + a2*(xi*xi) for xi in x]
    y_bar = Y0 / n
    resid = [yi - yh for yi, yh in zip(y, yhat)]
    SSE = sum(e*e for e in resid)
    MSE = SSE / (n - 3) if n > 3 else float("nan")
    SST = sum((yi - y_bar) ** 2 for yi in y)
    R2 = 1 - SSE / SST if SST > 0 else float("nan")
    return {"name": "quadratic", "b": (a0, a1, a2), "yhat": yhat, "resid": resid,
            "SSE": SSE, "MSE": MSE, "R2": R2}

# --------------------------- 4) Agent controller ---------------------------
def choose_best_model(x: List[float], y: List[float]) -> Tuple[Dict[str, object], Dict[str, object]]:
    """
    Reasoning: Fit linear and quadratic (if possible) and pick the higher R².
    Returns (best, other).
    """
    best = fit_linear(x, y)
    other = None
    try:
        quad = fit_quadratic(x, y)
        other = quad
        if math.isnan(best["R2"]) or quad["R2"] > best["R2"]:
            best, other = quad, best
    except Exception:
        pass
    return best, other

def report_model(model: Dict[str, object]) -> None:
    """
    Action: print a small report.
    """
    print("\n=== Model Report ===")
    print(f"Chosen: {model['name']}   {pretty_eq(model['name'], model['b'])}")
    print(f"SSE = {model['SSE']:.6f}")
    print(f"MSE = {model['MSE']:.6f}")
    print(f"R^2 = {model['R2']:.6f}")

def ask_for_more_data(target_r2: float, current_r2: float) -> str:
    """
    Planning/Action: craft a friendly request for more data when R² is too low.
    Uses LLM if enabled, else returns a plain message.
    """
    sys_prompt = "You write brief, friendly, practical data-collection prompts for students."
    user_msg = (
        f"Our current regression has R^2 = {current_r2:.3f}, below the target {target_r2:.2f}.\n"
        "Please add a few more (x,y) points that better cover the range. Use this format:\n"
        "x,y; x,y; x,y   (numbers only)"
    )
    return llm_say(sys_prompt, user_msg)

# --------------------------- 5) Plotting & CSV ---------------------------
def plot_with_model(x: List[float], y: List[float], model: Dict[str, object], out_png: str) -> None:
    """
    Draws a single chart:
      - data points
      - fitted line/curve
      - vertical residual sticks with labels (e1, e2, ...)
    """
    name = model["name"]
    coeffs = model["b"]
    yhat = model["yhat"]

    fig, ax = plt.subplots(figsize=(8, 5))
    ax.scatter(x, y, label="data")

    x_min, x_max = min(x), max(x)
    if name == "linear":
        xs = [x_min, x_max]
        b0, b1 = coeffs
        ys = [b0 + b1*xs[0], b0 + b1*xs[1]]
        ax.plot(xs, ys, linewidth=2, label="least squares fit")
    else:
        xs = [x_min + t*(x_max - x_min)/200.0 for t in range(201)]
        a0, a1, a2 = coeffs
        ys = [a0 + a1*xi + a2*(xi*xi) for xi in xs]
        ax.plot(xs, ys, linewidth=2, label="least squares fit")

    for i, (xi, yi, yhi) in enumerate(zip(x, y, yhat), start=1):
        ax.vlines(xi, yhi, yi)  # residual stick
        mid_y = (yi + yhi) / 2.0
        ax.text(xi + 0.05, mid_y, f"e{i}={yi - yhi:.2f}", fontsize=9, va="center")

    ax.set_title("Least Squares Fit and Residuals")
    ax.set_xlabel("X")
    ax.set_ylabel("Y")
    ax.legend(loc="best")
    plt.tight_layout()
    plt.savefig(out_png, dpi=160)
    plt.close(fig)

def save_results_csv(x: List[float], y: List[float], model: Dict[str, object], out_csv: str) -> None:
    """
    Export a CSV with x, y, y_hat, residual, residual^2, plus a summary row for SSE/R².
    """
    yhat = model["yhat"]
    resid = model["resid"]
    SSE = model["SSE"]
    R2  = model["R2"]

    df = pd.DataFrame({
        "i": list(range(1, len(x)+1)),
        "x": x,
        "y": y,
        "y_hat": [round(v, 6) for v in yhat],
        "residual": [round(e, 6) for e in resid],
        "residual_sq": [round(e*e, 6) for e in resid],
    })
    summary = pd.DataFrame({
        "i": ["—"],
        "x": ["—"],
        "y": ["—"],
        "y_hat": ["—"],
        "residual": [f"SSE={round(SSE, 6)}"],
        "residual_sq": [f"R2={round(R2, 6)}"],
    })
    df_out = pd.concat([df, summary], ignore_index=True)
    df_out.to_csv(out_csv, index=False)

# --------------------------- 6) Main flow (demo round) ---------------------------
# Seed data (edit these to try your own)
x = [1, 2, 3, 4, 5, 6]
y = [3.2, 5.1, 6.8, 9.2, 10.1, 13.0]

TARGET_R2 = 0.80

best, other = choose_best_model(x, y)
report_model(best)
if other:
    print(f"(Other model) {other['name']}  R^2={other['R2']:.4f}")

# Planning rule: if R² < target, ask the user for more data points
if best["R2"] < TARGET_R2:
    print("\nREQUEST FOR MORE DATA:\n" + ask_for_more_data(TARGET_R2, best["R2"]))
else:
    print(f"\nTarget met: R^2 {best['R2']:.3f} ≥ {TARGET_R2:.2f}")

# Action: produce plot and CSV
from pathlib import Path  # safe to import again

OUTPUT_DIR = Path.home() / "Desktop" / "agentic_AI"
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)

png_path = OUTPUT_DIR / "math_agent_fit.png"
csv_path = OUTPUT_DIR / "math_agent_results.csv"

plot_with_model(x, y, best, png_path)
save_results_csv(x, y, best, csv_path)

print("\nSaved files:")
print("Plot:", png_path)
print("CSV :", csv_path)



=== Model Report ===
Chosen: quadratic   ŷ = 1.5600 + 1.6721·x + 0.0321·x²
SSE = 0.656286
MSE = 0.218762
R^2 = 0.989694
(Other model) linear  R^2=0.9891

Target met: R^2 0.990 ≥ 0.80

Saved files:
Plot: /Users/armenpischdotchian/Desktop/agentic_AI/math_agent_fit.png
CSV : /Users/armenpischdotchian/Desktop/agentic_AI/math_agent_results.csv
