In [14]:
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
import numpy as np
import pandas as pd

# ================================================================
# 1) MAKE FAKE CLASSIFICATION DATA (INTO 2 FEATURES AND A LABEL)
# ================================================================
X, y = make_classification(
    n_samples=1000,
    n_features=2,        # 2 features â†’ we will call them Age & Income
    n_informative=2,
    n_redundant=0,
    n_classes=2,         # 0 = No Purchase, 1 = Purchase
    random_state=42
)

# ================================================================
# 2) CONVERT FEATURE 0 â†’ REALISTIC AGE (18â€“65)
# ================================================================
age_min, age_max = 18, 65

x0 = X[:, 0]
x0_norm = (x0 - x0.min()) / (x0.max() - x0.min())  # Normalize 0 â†’ 1

age = x0_norm * (age_max - age_min) + age_min

# ================================================================
# 3) CONVERT FEATURE 1 â†’ REALISTIC INCOME (3000â€“30000)
# ================================================================
inc_min, inc_max = 3000, 30000

x1 = X[:, 1]
x1_norm = (x1 - x1.min()) / (x1.max() - x1.min())

income = x1_norm * (inc_max - inc_min) + inc_min

# ================================================================
# 4) COMBINE BACK INTO NEW X WITH REALISTIC VALUES
# ================================================================
X_real = np.column_stack((age, income))

df = pd.DataFrame({
    "Age": age,
    "Income": income,
    "Purchase": y
})

print("\n### FIRST 5 ROWS OF DATA:")
print(df.head())

# ================================================================
# 5) TRAIN / TEST SPLIT USING REALISTIC FEATURES
# ================================================================
X_train, X_test, y_train, y_test = train_test_split(
    X_real, y, test_size=0.2, random_state=30
)

# ================================================================
# 6) TRAIN LOGISTIC REGRESSION
# ================================================================
model = LogisticRegression(random_state=30)
model.fit(X_train, y_train)

print("\nModel training complete.")

# ================================================================
# 7) PRINT INTERCEPT & COEFFICIENTS (TEACHING EXPLANATION)
# ================================================================

intercept = model.intercept_[0]
coef_age = model.coef_[0][0]
coef_income = model.coef_[0][1]

print("\n========================")
print("ðŸ“Œ MODEL LEARNED VALUES")
print("========================")

print(f"Intercept (baseline): {intercept:.4f}")
print("Meaning: This is the modelâ€™s starting point BEFORE adding age or income.")

print(f"\nCoefficient for AGE: {coef_age:.4f}")
print("Meaning: For every +1 year increase in age, the log-odds of buying change by this amount.")

print(f"\nCoefficient for INCOME: {coef_income:.6f}")
print("Meaning: For every 1 AED increase in income, the chance of buying changes by this small amount.")
print("ðŸ’¡ Higher income usually shifts prediction toward buying.")

print("\nInterpretation:")
print("Prediction = intercept + (coef_age * Age) + (coef_income * Income)")
print("Then logistic function converts this number into a probability between 0 and 1.")

# ================================================================
# 8) TEST AN EXAMPLE PERSON
# ================================================================
example = [[60, 60000]]  # Age 30, income 8000
pred = model.predict(example)
prob = model.predict_proba(example)

print("\n============================")
print("ðŸ“Œ EXAMPLE PREDICTION")
print("============================")
print(f"Person: Age=30, Income=8000")
print(f"Predicted class (0=no buy, 1=buy): {pred[0]}")
print(f"Predicted probabilities [no-buy, buy]: {prob[0]}")



### FIRST 5 ROWS OF DATA:
         Age        Income  Purchase
0  35.030791  15231.706200         1
1  48.508547  20961.859939         1
2  46.804709  20034.295842         1
3  23.278116  23736.513334         1
4  47.875280  20665.025504         1

Model training complete.

ðŸ“Œ MODEL LEARNED VALUES
Intercept (baseline): -9.4647
Meaning: This is the modelâ€™s starting point BEFORE adding age or income.

Coefficient for AGE: -0.0518
Meaning: For every +1 year increase in age, the log-odds of buying change by this amount.

Coefficient for INCOME: 0.000685
Meaning: For every 1 AED increase in income, the chance of buying changes by this small amount.
ðŸ’¡ Higher income usually shifts prediction toward buying.

Interpretation:
Prediction = intercept + (coef_age * Age) + (coef_income * Income)
Then logistic function converts this number into a probability between 0 and 1.

ðŸ“Œ EXAMPLE PREDICTION
Person: Age=30, Income=8000
Predicted class (0=no buy, 1=buy): 1
Predicted probabilities [no-b