<a href="https://colab.research.google.com/github/hbisgin/BigDatav1/blob/main/Lecture20_NN_prep.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [6]:
# Predict petal_width (y) from sepal_length (x1) only
# 1) OLS via scikit-learn
# 2) Single linear neuron via plain NumPy gradient descent

import numpy as np
from sklearn.datasets import load_iris
from sklearn.linear_model import LinearRegression

# -----------------------------
# Load data
# -----------------------------
iris = load_iris()
x1 = iris.data[:, 0]      # sepal_length
y  = iris.data[:, 3]      # petal_width
n  = x1.shape[0]

# -----------------------------
# (1) OLS with scikit-learn
# -----------------------------
X = x1.reshape(-1, 1)
lin = LinearRegression()
lin.fit(X, y)

b_ols  = float(lin.intercept_)
w1_ols = float(lin.coef_[0])

y_hat_ols = b_ols + w1_ols * x1
mse_ols = np.mean((y_hat_ols - y) ** 2)

print("OLS (sklearn) coefficients:")
print(f"  bias={b_ols:.4f}, w1(sepal_len)={w1_ols:.4f}")
print(f"OLS MSE: {mse_ols:.4f}\n")

# -----------------------------
# (2) Gradient Descent
# Model: y_hat = b + w1*x1
# Loss: MSE
# -----------------------------
rng = np.random.default_rng(42)
b  = float(rng.normal(scale=1e-3)) #random number generation
w1 = float(rng.normal(scale=1e-3))

lr = 1e-3
epochs = 20000

for t in range(epochs):
    # forward
    y_hat = b + w1 * x1
    err = y_hat - y #I will also use this when I calculate gradients/derivatives

    # loss and gradients
    mse = np.mean(err ** 2)
    db  = (2.0 / n) * np.sum(err)
    dw1 = (2.0 / n) * np.sum(err * x1)

    # update
    b  -= lr * db
    w1 -= lr * dw1

    if (t + 1) % 5000 == 0:
        print(f"iter {t+1:5d}: MSE={mse:.4f}")

# final metrics
y_hat = b + w1 * x1
mse_gd = np.mean((y_hat - y) ** 2)

print("\nGradient Descent (plain NumPy) coefficients:")
print(f"  bias={b:.4f}, w1(sepal_len)={w1:.4f}")
print(f"GD MSE: {mse_gd:.4f}")

# Comparison
print("\nDifference (GD - OLS):")
print(f"  bias={b - b_ols:+.4e}, w1={w1 - w1_ols:+.4e}")


OLS (sklearn) coefficients:
  bias=-3.2002, w1(sepal_len)=0.7529
OLS MSE: 0.1910

iter  5000: MSE=0.3310
iter 10000: MSE=0.2867
iter 15000: MSE=0.2564
iter 20000: MSE=0.2357

Gradient Descent (plain NumPy) coefficients:
  bias=-1.6883, w1(sepal_len)=0.4991
GD MSE: 0.2357

Difference (GD - OLS):
  bias=+1.5120e+00, w1=-2.5383e-01


In [5]:
print(w1, b)
print(w1*x1)

0.49909126952753347 -1.68826261227548
[2.54536547 2.44554722 2.34572897 2.29581984 2.49545635 2.69509286
 2.29581984 2.49545635 2.19600159 2.44554722 2.69509286 2.39563809
 2.39563809 2.14609246 2.89472936 2.84482024 2.69509286 2.54536547
 2.84482024 2.54536547 2.69509286 2.54536547 2.29581984 2.54536547
 2.39563809 2.49545635 2.49545635 2.5952746  2.5952746  2.34572897
 2.39563809 2.69509286 2.5952746  2.74500198 2.44554722 2.49545635
 2.74500198 2.44554722 2.19600159 2.54536547 2.49545635 2.24591071
 2.19600159 2.49545635 2.54536547 2.39563809 2.54536547 2.29581984
 2.64518373 2.49545635 3.49363889 3.19418412 3.44372976 2.74500198
 3.24409325 2.84482024 3.144275   2.44554722 3.29400238 2.5952746
 2.49545635 2.94463849 2.99454762 3.04445674 2.79491111 3.34391151
 2.79491111 2.89472936 3.09436587 2.79491111 2.94463849 3.04445674
 3.144275   3.04445674 3.19418412 3.29400238 3.39382063 3.34391151
 2.99454762 2.84482024 2.74500198 2.74500198 2.89472936 2.99454762
 2.69509286 2.99454762 3.

In [8]:
# Predict petal_width (y) from sepal_length (x1) and petal_length (x2)
# 1) Ordinary Least Squares (OLS) via sklearn
# 2) Single linear neuron via plain NumPy gradient descent

import numpy as np
from sklearn.datasets import load_iris
from sklearn.linear_model import LinearRegression

# -----------------------------
# Load data
# -----------------------------
iris = load_iris()
X = iris.data[:, [0, 2]]   # x1 = sepal_length, x2 = petal_length
y = iris.data[:, 3]        # petal_width
n = X.shape[0]

# -----------------------------
# (1) OLS with scikit-learn
# -----------------------------
lin = LinearRegression()
lin.fit(X, y)

b_ols  = float(lin.intercept_)
w1_ols = float(lin.coef_[0])
w2_ols = float(lin.coef_[1])

y_hat_ols = b_ols + w1_ols * X[:, 0] + w2_ols * X[:, 1]
mse_ols = np.mean((y_hat_ols - y) ** 2)

print("OLS (sklearn) coefficients:")
print(f"  bias={b_ols:.4f}, w1(sepal_len)={w1_ols:.4f}, w2(petal_len)={w2_ols:.4f}")
print(f"OLS MSE: {mse_ols:.4f}\n")

# -----------------------------
# (2) Single linear neuron via Gradient Descent
# Model: y_hat = b + w1*x1 + w2*x2
# Loss: MSE
# -----------------------------
rng = np.random.default_rng(42)
b, w1, w2 = rng.normal(scale=1e-3, size=3)  # produces 3 small random values centered around zero, with a tiny spread (std dev = 0.001).

lr = 1e-3
epochs = 20000

for t in range(epochs):
    y_hat = b + w1 * X[:, 0] + w2 * X[:, 1]
    err = y_hat - y

    # MSE and gradients
    mse = np.mean(err ** 2)
    db  = (2.0 / n) * np.sum(err)
    dw1 = (2.0 / n) * np.sum(err * X[:, 0])
    dw2 = (2.0 / n) * np.sum(err * X[:, 1])

    # update
    b  -= lr * db
    w1 -= lr * dw1
    w2 -= lr * dw2

    if (t + 1) % 5000 == 0:
        print(f"iter {t+1:5d}: MSE={mse:.4f}")

# final metrics
y_hat = b + w1 * X[:, 0] + w2 * X[:, 1]
mse_gd = np.mean((y_hat - y) ** 2)

print("\nGradient Descent (plain NumPy) coefficients:")
print(f"  bias={b:.4f}, w1(sepal_len)={w1:.4f}, w2(petal_len)={w2:.4f}")
print(f"GD MSE: {mse_gd:.4f}")

# Comparison
print("\nDifference (GD - OLS):")
print(f"  bias={b - b_ols:+.4e}, w1={w1 - w1_ols:+.4e}, w2={w2 - w2_ols:+.4e}")


OLS (sklearn) coefficients:
  bias=-0.0090, w1(sepal_len)=-0.0822, w2(petal_len)=0.4494
OLS MSE: 0.0410

iter  5000: MSE=0.0410
iter 10000: MSE=0.0410
iter 15000: MSE=0.0410
iter 20000: MSE=0.0410

Gradient Descent (plain NumPy) coefficients:
  bias=-0.0402, w1(sepal_len)=-0.0753, w2(petal_len)=0.4468
GD MSE: 0.0410

Difference (GD - OLS):
  bias=-3.1155e-02, w1=+6.9197e-03, w2=-2.5350e-03


# Logistic Regression w/ NNs

Deault LR from sklearn

In [9]:
# Logistic regression on Iris (binary: Virginica vs. not)
# Scenario A: ONE feature (x1)
#   1) sklearn LogisticRegression
#   2) Manual neuron with sigmoid + cross-entropy + explicit gradients (no @)

import numpy as np
from sklearn.datasets import load_iris
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

# -----------------------------
# Data: x1 only, label y = 1(virginica), 0(other)
# -----------------------------
iris = load_iris()
x1 = iris.data[:, 2]            # petal_length is very separable
y  = (iris.target == 2).astype(float)  # virginica=1

n = x1.shape[0]

# (Optional) standardize x1 for nicer GD convergence
x1_mean, x1_std = x1.mean(), x1.std()
x1s = (x1 - x1_mean) / (x1_std + 1e-12)

# -----------------------------
# (1) scikit-learn
# -----------------------------
X_sklearn = x1s.reshape(-1, 1)
clf = LogisticRegression(solver="lbfgs") #this is just an option if you would like to give
# scikit-learn solves logistic regression as a convex optimization problem using iterative optimization algorithms.

clf.fit(X_sklearn, y)
prob_sklearn = clf.predict_proba(X_sklearn)[:, 1]
pred_sklearn = (prob_sklearn >= 0.5).astype(float)
print("[sklearn-1feat] acc =", accuracy_score(y, pred_sklearn))

# -----------------------------
# (2) Manual logistic neuron
# Model: p = sigmoid(b + w1 * x1s)
# Loss: average binary cross-entropy
# Gradients:
#   db  = (1/n) * sum(p - y)
#   dw1 = (1/n) * sum((p - y) * x1s)
# -----------------------------
def sigmoid(z):
    return 1.0 / (1.0 + np.exp(-z))

rng = np.random.default_rng(0)
b  = float(rng.normal(scale=1e-2))
w1 = float(rng.normal(scale=1e-2))

lr = 0.1
epochs = 2000

for t in range(epochs):
    z = b + w1 * x1s
    p = sigmoid(z)
    # cross-entropy (for monitoring)
    eps = 1e-12
    ce = -np.mean(y * np.log(p + eps) + (1 - y) * np.log(1 - p + eps))
    db  = np.mean(p - y)
    dw1 = np.mean((p - y) * x1s)
    # update
    b  -= lr * db
    w1 -= lr * dw1
    # (optional) print every 500 iters
    # if (t+1) % 500 == 0: print(t+1, ce)

p_hat = sigmoid(b + w1 * x1s)
pred_gd = (p_hat >= 0.5).astype(float)
print("[manual-1feat]  acc =", accuracy_score(y, pred_gd))


[sklearn-1feat] acc = 0.9533333333333334
[manual-1feat]  acc = 0.9533333333333334


Let's look at the multi-variable scenario

In [10]:
# Logistic regression on Iris (binary: Virginica vs. not)
# Scenario B: TWO features (x1, x2)
#   1) sklearn LogisticRegression
#   2) Manual neuron with sigmoid + cross-entropy + explicit gradients (no @)

import numpy as np
from sklearn.datasets import load_iris
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

# -----------------------------
# Data: choose two good features
# -----------------------------
iris = load_iris()
x1 = iris.data[:, 2]            # petal_length
x2 = iris.data[:, 3]            # petal_width
y  = (iris.target == 2).astype(float)  # virginica=1

n = x1.shape[0]

# Standardize features for smoother GD
x1s = (x1 - x1.mean()) / (x1.std() + 1e-12)
x2s = (x2 - x2.mean()) / (x2.std() + 1e-12)

# -----------------------------
# (1) scikit-learn
# -----------------------------
X_sklearn = np.column_stack([x1s, x2s])  # just for sklearn
clf = LogisticRegression(solver="lbfgs")
clf.fit(X_sklearn, y)
prob_sklearn = clf.predict_proba(X_sklearn)[:, 1]
pred_sklearn = (prob_sklearn >= 0.5).astype(float)
print("[sklearn-2feat] acc =", accuracy_score(y, pred_sklearn))

# -----------------------------
# (2) Manual logistic neuron (no @)
# Model: p = sigmoid(b + w1*x1s + w2*x2s)
# Loss: average binary cross-entropy
# Gradients:
#   db  = (1/n) * sum(p - y)
#   dw1 = (1/n) * sum((p - y) * x1s)
#   dw2 = (1/n) * sum((p - y) * x2s)
# -----------------------------
def sigmoid(z):
    return 1.0 / (1.0 + np.exp(-z))

rng = np.random.default_rng(1)
b  = float(rng.normal(scale=1e-2))
w1 = float(rng.normal(scale=1e-2))
w2 = float(rng.normal(scale=1e-2))

lr = 0.1
epochs = 2000

for t in range(epochs):
    z = b + w1 * x1s + w2 * x2s
    p = sigmoid(z)
    # cross-entropy (monitor)
    eps = 1e-12
    ce = -np.mean(y * np.log(p + eps) + (1 - y) * np.log(1 - p + eps))
    # gradients (explicit)
    diff = (p - y)
    db  = np.mean(diff)
    dw1 = np.mean(diff * x1s)
    dw2 = np.mean(diff * x2s)
    # update
    b  -= lr * db
    w1 -= lr * dw1
    w2 -= lr * dw2
    # (optional) print every 500 iters
    # if (t+1) % 500 == 0: print(t+1, ce)

p_hat = sigmoid(b + w1 * x1s + w2 * x2s)
pred_gd = (p_hat >= 0.5).astype(float)
print("[manual-2feat]  acc =", accuracy_score(y, pred_gd))

[sklearn-2feat] acc = 0.9533333333333334
[manual-2feat]  acc = 0.9533333333333334
