In [None]:
!pip install -U scikit-learn

In [None]:
import sklearn
print(sklearn.__version__)

In [None]:
from sklearn.datasets import load_diabetes

diabetes = load_diabetes()
diabetes

In [None]:
print(diabetes.feature_names) # 특성 10개 : ['age', 'sex', 'bmi', 'bp', 's1', 's2', 's3', 's4', 's5', 's6']
len(diabetes.feature_names)

In [None]:
df_X = diabetes.data
df_y = diabetes.target

df_X.shape, df_y.shape # 10개의 특성을 가지는 442개의 데이터

In [None]:
import numpy as np

X = np.array(df_X)
y = np.array(df_y)

X, y

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)
X_train.shape, X_test.shape, y_train.shape, y_test.shape

In [None]:
W = np.random.randn(X_train.shape[1])
b = np.random.rand()

print(W, b)

def model(X, W, b):
    predictions = 0
    for i in range(len(W)):
        predictions += X[:, i] * W[i]
    predictions += b
    return predictions

In [None]:
def MSE(a, b):
    mse = ((a - b) ** 2).mean()  # 두 값의 차이의 제곱의 평균
    return mse

def loss(X, W, b, y):
    predictions = model(X, W, b)
    L = MSE(predictions, y)
    return L

In [None]:
def gradient(X, W, b, y):
    N = len(y)
    y_pred = model(X, W, b)
    
    dW = 1/N * 2 * X.T.dot(y_pred - y)
    db = 2 * (y_pred - y).mean()
    return dW, db

In [None]:
LEARNING_RATE = 0.5
losses = []
for i in range(1, 10001):
    dW, db = gradient(X_train, W, b, y_train)
    W -= LEARNING_RATE * dW
    b -= LEARNING_RATE * db
    L = loss(X_train, W, b, y_train)
    losses.append(L)
    if i % 100 == 0:
        print('Iteration %d : Loss %0.4f' % (i, L))

In [None]:
import matplotlib.pyplot as plt
plt.plot(losses)
plt.show()

In [None]:
prediction = model(X_test, W, b)
mse = loss(X_test, W, b, y_test)

In [None]:
plt.scatter(X_test[:, 0], y_test)
plt.scatter(X_test[:, 0], prediction)
plt.show()