In [None]:
!pip install scikit-learn==1.0.2

Collecting scikit-learn==1.0.2
  Downloading scikit_learn-1.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (26.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m26.5/26.5 MB[0m [31m32.0 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: scikit-learn
  Attempting uninstall: scikit-learn
    Found existing installation: scikit-learn 1.2.2
    Uninstalling scikit-learn-1.2.2:
      Successfully uninstalled scikit-learn-1.2.2
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
bigframes 0.24.0 requires scikit-learn>=1.2.2, but you have scikit-learn 1.0.2 which is incompatible.[0m[31m
[0mSuccessfully installed scikit-learn-1.0.2


In [None]:
import numpy as np
import pandas as pd

In [None]:
from sklearn.datasets import load_boston

boston = load_boston()
bostonDF = pd.DataFrame(boston.data, columns=boston.feature_names)
bostonDF['PRICE'] = boston.target
bostonDF.head()

In [None]:
# RM, LSTAT 속성만 스케일링
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
scaled_features = scaler.fit_transform(bostonDF[['RM', 'LSTAT']])
scaled_features

### Stochastic Gradient Descent와 Mini Batch Gradient Descent 구현
* SGD 는 전체 데이터에서 **한건만** 임의로 선택하여 Gradient Descent 로 Weight/Bias Update 계산한 뒤 Weight/Bias 적용
* Mini Batch GD는 전체 데이터에서 **Batch 건수만큼** 데이터를 선택하여 Gradient Descent로 Weight/Bias Update 계산한 뒤 Weight/Bias 적용

### SGD 기반으로 Weight/Bias update 값 구하기

In [None]:
# 1건의 데이터만 입력으로 받음(rm_sgd, lstat_sgd, target_sgd)
def get_update_value_sgd(bias, w1, w2, rm_sgd, lstat_sgd, target_sgd, learning_rate=0.01):

    # 데이터 건수
    N = 1

    # 예측 값
    predicted_sgd = bias + (w1 * rm_sgd) + (w2 * lstat_sgd)

    # 실제값과 예측값의 차이
    diff_sgd = target_sgd - predicted_sgd

    # bias 를 array 기반으로 구하기 위해서 설정.
    bias_factors = np.ones((N,))

    # weight와 bias를 얼마나 update할 것인지를 계산.
    w1_update = (-2/N) * learning_rate * (np.dot(rm_sgd.T , diff_sgd))
    w2_update = (-2/N) * learning_rate * (np.dot(lstat_sgd.T , diff_sgd))
    bias_update =  (-2/N) * learning_rate * (np.dot(bias_factors.T , diff_sgd))

    # Mean Squared Error값을 계산.
    # mse_loss = np.mean(np.square(diff))

    # weight와 bias가 update되어야 할 값 반환
    return bias_update, w1_update, w2_update

### SGD 수행하기

In [None]:
# RM, LSTAT feature array와 PRICE target array를 입력 받아서 iter_epochs수만큼 반복적으로 Weight와 Bias를 update적용.
def s_gradient_descent(features, target, iter_epochs=1000, verbose=True):

    # w1, w2는 numpy array 연산을 위해 1차원 array로 변환하되 초기 값은 0으로 설정
    # bias도 1차원 array로 변환하되 초기 값은 1로 설정.
    w1 = np.zeros((1,))
    w2 = np.zeros((1,))
    bias = np.ones((1,))
    print('최초 w1, w2, bias:', w1, w2, bias)

    # learning_rate와 RM, LSTAT 피처 지정. 호출 시 numpy array형태로 RM과 LSTAT으로 된 2차원 feature가 입력됨.
    learning_rate = 0.01
    rm = features[:, 0]
    lstat = features[:, 1]

    # NumPy 난수 생성기의 시드(seed) 값을 2024로 설정 -> 같은 시드를 사용하면 항상 같은 무작위 수가 생성
    np.random.seed(2024)

    # iter_epochs 수만큼 반복하면서 weight와 bias update 수행.
    for i in range(iter_epochs):

        # iteration 시마다 stochastic gradient descent 를 수행할 데이터를 한개만 추출. 추출할 데이터의 인덱스를 random.choice() 로 선택.
        sto_index = np.random.choice(target.shape[0], 1)

        rm_sgd = rm[sto_index]
        lstat_sgd = lstat[sto_index]
        target_sgd = target[sto_index]

        # SGD 기반으로 Weight/Bias의 Update를 구함.
        bias_update, w1_update, w2_update = get_update_value_sgd(bias, w1, w2, rm_sgd, lstat_sgd, target_sgd, learning_rate)

        # SGD로 구한 weight/bias의 update 적용.
        w1 = w1 - w1_update
        w2 = w2 - w2_update
        bias = bias - bias_update

        if verbose:
            print('Epoch:', i+1,'/', iter_epochs)

            # Loss는 전체 학습 데이터 기반으로 구해야 함.
            predicted = (w1 * rm) + (w2 * lstat) + bias
            diff = target - predicted
            mse_loss = np.mean(np.square(diff))
            print('w1:', w1, 'w2:', w2, 'bias:', bias, 'loss:', mse_loss)

    return w1, w2, bias

In [None]:
# s_gradient_descent 함수 호출로 학습(epochs 5000번)
w1, w2, bias = s_gradient_descent(scaled_features, bostonDF['PRICE'].values, 5000)
print('####### 최종 w1, w2, bias #######')
print(w1, w2, bias)

In [None]:
# weight와 bias로 예측 값 생성 후 dataFrame에 'PREDICTED_PRICE_SGD' 컬럼 추가
predicted = (scaled_features[:, 0] * w1) + (scaled_features[:, 1] * w2) + bias
bostonDF['PREDICTED_PRICE_SGD'] = predicted
bostonDF.head(10)

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,PRICE,PREDICTED_PRICE_SGD
0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.09,1.0,296.0,15.3,396.9,4.98,24.0,29.165125
1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8,396.9,9.14,21.6,25.804278
2,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2.0,242.0,17.8,392.83,4.03,34.7,32.656545
3,0.03237,0.0,2.18,0.0,0.458,6.998,45.8,6.0622,3.0,222.0,18.7,394.63,2.94,33.4,32.459818
4,0.06905,0.0,2.18,0.0,0.458,7.147,54.2,6.0622,3.0,222.0,18.7,396.9,5.33,36.2,31.654259
5,0.02985,0.0,2.18,0.0,0.458,6.43,58.7,6.0622,3.0,222.0,18.7,394.12,5.21,28.7,28.332553
6,0.08829,12.5,7.87,0.0,0.524,6.012,66.6,5.5605,5.0,311.0,15.2,395.6,12.43,22.9,21.785331
7,0.14455,12.5,7.87,0.0,0.524,6.172,96.1,5.9505,5.0,311.0,15.2,396.9,19.15,27.1,18.29328
8,0.21124,12.5,7.87,0.0,0.524,5.631,100.0,6.0821,5.0,311.0,15.2,386.63,29.93,16.5,8.911594
9,0.17004,12.5,7.87,0.0,0.524,6.004,85.9,6.5921,5.0,311.0,15.2,386.71,17.1,18.9,18.793763
