<img src="images/0925/1.jpg">

<img src="images/0925/2.jpg">

In [None]:
# -*- coding: utf-8 -*-
# Linear Regression from scratch with MSE + manual partial derivatives (gradient descent)

import numpy as np
import matplotlib.pyplot as plt

# 1) 데이터 만들기: y = 2x + 3 을 정확히 만족하는 6개 점
x = np.array([0, 1, 2, 3, 4, 5], dtype=float)
y = 2 * x + 3

# 2) MSE와 그라디언트(편미분) 직접 구현
def mse(a, b, x, y):
    """
    MSE(a,b) = (1/n) * sum_i (y_i - (a*x_i + b))^2
    """
    y_hat = a * x + b
    return np.mean((y - y_hat) ** 2)

def grad(a, b, x, y):
    """
    d(MSE)/da = -(2/n) * sum_i x_i * (y_i - y_hat_i)
    d(MSE)/db = -(2/n) * sum_i (y_i - y_hat_i)
    """
    n = len(x)
    y_hat = a * x + b
    residual = y - y_hat
    d_a = (-2.0 / n) * np.sum(x * residual)
    d_b = (-2.0 / n) * np.sum(residual)
    return d_a, d_b

# 3) 경사하강법으로 학습
rng = np.random.default_rng(42)
a = rng.uniform(-5, 5)   # 기울기 초기값 (무작위)
b = rng.uniform(-5, 5)   # 절편 초기값 (무작위)

lr = 0.05     # 학습률
iters = 400   # 반복 횟수

history_mse = []
history_a = []
history_b = []

for _ in range(iters):
    d_a, d_b = grad(a, b, x, y)
    a -= lr * d_a
    b -= lr * d_b
    history_mse.append(mse(a, b, x, y))
    history_a.append(a)
    history_b.append(b)

print(f"최종 파라미터: a={a:.6f}, b={b:.6f}")
print(f"최종 MSE: {history_mse[-1]:.10f} (정답은 a=2, b=3)")

# 4) 결과 시각화
xx = np.linspace(x.min() - 0.5, x.max() + 0.5, 200)
yy = a * xx + b

plt.figure()
plt.scatter(x, y, label="Data (y=2x+3)", marker="x")
plt.plot(xx, yy, label=f"Fitted line (a={a:.4f}, b={b:.4f})")
plt.title("Linear Regression via Gradient Descent")
plt.xlabel("x"); plt.ylabel("y"); plt.legend(); plt.grid(True)
plt.show()

plt.figure()
plt.plot(range(1, iters + 1), history_mse)
plt.title(f"MSE over iterations (final MSE={history_mse[-1]:.6g})")
plt.xlabel("Iteration"); plt.ylabel("MSE"); plt.grid(True)
plt.show()

In [10]:
import numpy as np

x = np.array([0, 1, 2, 3, 4, 5], dtype=float)
y = np.array([100*i-20 for i in x])

a=100
b=1000
def mse(a, b, x, y):
    y_hat = a * x + b
    return np.mean((y - y_hat) ** 2)

def grad(a, b, x, y):
    n = len(x)
    y_hat = a * x + b
    e = y - y_hat
    d_a = (-2.0 / n) * np.sum(x * e)
    d_b = (-2.0 / n) * np.sum(e)
    return d_a, d_b



lr = 0.05     # 학습률
iters = 400   # 반복 횟수

history_mse = []
history_a = []
history_b = []

for j in range(iters):
    d_a, d_b = grad(a, b, x, y)
    a -= lr * d_a
    b -= lr * d_b
    cost = mse(a, b, x, y)
    history_mse.append(cost)
    history_a.append(a)
    history_b.append(b)
    print(a,b,cost)

-155.0 898.0 268336.5
-150.75 869.95 252595.5129166667
-143.38333333333335 843.6425 237889.0125053241
-136.19256944444447 818.1240833333334 224038.76567487177
-129.21373495370372 793.3598173611111 210994.9005094322
-122.4410989197531 769.327269363426 198710.4682838313
-115.86857558416926 746.0048171570216 187141.25369970422
-109.49025225460285 723.3714793373618 176245.61573813538
-103.30039085555737 701.4068944672764 165984.33778133642
-97.29342285478222 680.0913027344382 156320.4864604609
-91.4639442548414 659.40552817469 147219.2787215058
-85.80671073157593 639.3309614209313 138647.95663083115
-80.31663291619749 619.8495429617321 130575.66946968548
-74.98877181678283 600.9437468946082 122973.36269336067
-69.81833437505063 582.5965651593431 115813.67335530814
-64.80066915442332 564.7914922371714 109070.83161981522
-59.93126215549479 547.51251030206 102720.56800875864
-55.20573275513958 530.7440748107277 96740.02604858729
-50.619829765610234 514.4711005184398 91107.68000312628
-46.1694

In [4]:
import numpy as np

# 데이터 정의
x = np.array([0, 1, 2, 3, 4, 5], dtype=float)
y = np.array([100 * i - 20 for i in x])

# 초기값 고정
a = 100
b = 1000

# MSE 함수
def mse(a, b, x, y):
    y_hat = a * x + b
    return np.mean((y - y_hat) ** 2)

# 기울기(편미분) 함수
def grad(a, b, x, y):
    n = len(x)
    y_hat = a * x + b
    e = y - y_hat
    d_a = (-2.0 / n) * np.sum(x * e)
    d_b = (-2.0 / n) * np.sum(e)
    return d_a, d_b

# 경사하강법 학습
lr = 0.05
iters = 400

for j in range(iters):
    d_a, d_b = grad(a, b, x, y)
    print(a,b)
    a -= lr * d_a
    b -= lr * d_b
    cost = mse(a, b, x, y)
    print(f"iter={j+1}, a={a:.6f}, b={b:.6f}, MSE={cost:.6f}")

print("\n최종 결과:")
print(f"a={a:.6f}, b={b:.6f}, MSE={mse(a,b,x,y):.6f}")

100 1000
iter=1, a=-155.000000, b=898.000000, MSE=268336.500000
-155.0 898.0
iter=2, a=-150.750000, b=869.950000, MSE=252595.512917
-150.75 869.95
iter=3, a=-143.383333, b=843.642500, MSE=237889.012505
-143.38333333333335 843.6425
iter=4, a=-136.192569, b=818.124083, MSE=224038.765675
-136.19256944444447 818.1240833333334
iter=5, a=-129.213735, b=793.359817, MSE=210994.900509
-129.21373495370372 793.3598173611111
iter=6, a=-122.441099, b=769.327269, MSE=198710.468284
-122.4410989197531 769.327269363426
iter=7, a=-115.868576, b=746.004817, MSE=187141.253700
-115.86857558416926 746.0048171570216
iter=8, a=-109.490252, b=723.371479, MSE=176245.615738
-109.49025225460285 723.3714793373618
iter=9, a=-103.300391, b=701.406894, MSE=165984.337781
-103.30039085555737 701.4068944672764
iter=10, a=-97.293423, b=680.091303, MSE=156320.486460
-97.29342285478222 680.0913027344382
iter=11, a=-91.463944, b=659.405528, MSE=147219.278722
-91.4639442548414 659.40552817469
iter=12, a=-85.806711, b=639.330

In [15]:
import numpy as np

# 데이터 정의
x = np.array([0, 1, 2, 3, 4, 5], dtype=float)
y = np.array([100 * i - 20 + 0.03 for i in x])

# 초기값
a = 1000
b = 300

# Loss 함수: 1/(2n)
def mse(a, b, x, y):
    y_hat = a * x + b
    return np.mean((y - y_hat) ** 2) / 2   # 여기서 1/(2n) 처리가 됨

# Gradient 함수
def grad(a, b, x, y):
    n = len(x)
    y_hat = a * x + b
    e = y - y_hat
    d_a = (-1.0 / n) * np.sum(x * e)
    d_b = (-1.0 / n) * np.sum(e)
    return d_a, d_b

# 경사하강법 학습
lr = 0.05
iters = 1000

for j in range(iters):
    d_a, d_b = grad(a, b, x, y)
    a -= lr * d_a
    b -= lr * d_b
    cost = mse(a, b, x, y)
    print(f"iter={j+1}, a={a:.6f}, b={b:.6f}, MSE={cost:.6f}")

print("\n최종 결과:")
print(f"a={a:.6f}, b={b:.6f}, MSE={mse(a,b,x,y):.6f}")

iter=1, a=547.503750, b=171.501500, MSE=1150397.732037
iter=2, a=318.463927, b=105.989956, MSE=295473.622785
iter=3, a=202.589633, b=72.383968, MSE=76188.926661
iter=4, a=144.025138, b=54.942565, MSE=19934.511706
iter=5, a=114.482879, b=45.693795, MSE=5494.740543
iter=6, a=99.636919, b=40.600245, MSE=1780.001680
iter=7, a=92.232050, b=37.617118, MSE=816.366596
iter=8, a=88.593971, b=35.708756, MSE=558.658353
iter=9, a=86.861890, b=34.350571, MSE=482.316997
iter=10, a=86.093452, b=33.276807, MSE=452.793384
iter=11, a=85.811436, b=32.352785, MSE=435.569300
iter=12, a=85.774180, b=31.510216, MSE=421.782899
iter=13, a=85.859320, b=30.714433, MSE=409.152976
iter=14, a=86.004911, b=29.947796, MSE=397.086386
iter=15, a=86.179602, b=29.201293, MSE=385.423175
iter=16, a=86.367540, b=28.470278, MSE=374.114724
iter=17, a=86.560716, b=27.752321, MSE=363.141193
iter=18, a=86.755098, b=27.046116, MSE=352.490340
iter=19, a=86.948663, b=26.350923, MSE=342.152079
iter=20, a=87.140411, b=25.666294, MSE=