In [1]:
import math

# ---- Autogenerated constants from your trained pipeline ----
FEATURES = ['num__Age', 'num__Salary', 'cat__Country_Germany', 'cat__Country_Spain']

W = [-0.11365409190612158,
     -0.2498836760388902,
     -0.3341415989902063,
     -0.1597444172497891]

B = 0.1631880094644399

AGE_MEAN = 39.0
AGE_STD  = 7.664854858377946
SALARY_MEAN = 64847.22222222222
SALARY_STD  = 11430.808691675275


def _sigmoid(x: float) -> float:
    # numerically stable sigmoid
    if x >= 0:
        z = math.exp(-x)
        return 1.0 / (1.0 + z)
    else:
        z = math.exp(x)
        return z / (1.0 + z)


def predict_purchased_from_line(line: str) -> str:
    """
    Input format: "Spain,27.0,48000.0"
    Output: "Yes" or "No"
    Mirrors the trained sklearn pipeline:
      - StandardScaler on Age, Salary using AGE_MEAN/STD and SALARY_MEAN/STD
      - OneHotEncoder(drop='first') for Country with Germany/Spain dummies
      - LogisticRegression with W and B
    """
    country, age_s, salary_s = [p.strip() for p in line.split(",")]
    age = float(age_s)
    salary = float(salary_s)

    # One-hot (France is the dropped baseline => both 0)
    is_germany = 1.0 if country == "Germany" else 0.0
    is_spain   = 1.0 if country == "Spain" else 0.0

    # StandardScaler
    age_scaled = (age - AGE_MEAN) / AGE_STD
    salary_scaled = (salary - SALARY_MEAN) / SALARY_STD

    # Feature vector matches FEATURES order exactly
    x = [age_scaled, salary_scaled, is_germany, is_spain]

    score = B + sum(wi * xi for wi, xi in zip(W, x))
    p = _sigmoid(score)

    return "Yes" if p >= 0.5 else "No"


# Quick tests
print(predict_purchased_from_line("Spain,27.0,48000.0"))
print(predict_purchased_from_line("France,44.0,72000.0"))
print(predict_purchased_from_line("Germany,30.0,54000.0"))


Yes
No
Yes
