### Gradient Descent Practice

In [1]:
import numpy as np 
import pandas as pd 

import os 
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"]="6"

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

#### Boston Residence Price dataset

In [2]:
from sklearn.datasets import load_boston 

boston = load_boston()

In [3]:
bostonDF = pd.DataFrame(boston.data,columns = boston.feature_names)

bostonDF['PRICE'] = boston.target 
print(bostonDF.shape)

bostonDF.head()

(506, 14)


Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,PRICE
0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.09,1.0,296.0,15.3,396.9,4.98,24.0
1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8,396.9,9.14,21.6
2,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2.0,242.0,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0.0,0.458,6.998,45.8,6.0622,3.0,222.0,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0.0,0.458,7.147,54.2,6.0622,3.0,222.0,18.7,396.9,5.33,36.2


In [4]:
type(bostonDF.columns)

pandas.core.indexes.base.Index

#### Weight와 Bias의 Update 값을 계산하는 함수 

- w1은 RM 피처의 Weight
- w2는 LSTAT 피처의 Weight
- bias 는 그냥 편향
- N은 입력 데이터 건수

In [5]:
#gradient_descent()함수에서 반복적으로 호출되면서 update될 weight / bias 값을 계산
#rm은 RM(방 개수), lstat(하위계층 비율),target은 PRICE임. 전체 array가 다 입력됨.
#반환 값은 weight와 bias가 update되어야 할 값과 MSE 값을 loss로 반환


def get_update_weights_value(bias,w1,w2,rm,lstat,target,learning_rate=0.01):
    #데이터 건수
    N = len(target)

    #예측 값
    predicted = w1*rm+w2*lstat+bias
    #실제값과 예측값의 차이
    diff = target-predicted 

    #bias를 array 기반으로 구하기 위해 설정
    bias_factors = np.ones((N,))

    # weight와 bias를 얼마나 update할 것인지를 계산
    w1_update = -(2/N)*learning_rate*(np.dot(rm.T,diff))
    w2_update = -(2/N)*learning_rate*(np.dot(lstat.T,diff))
    bias_update = -(2/N)*learning_rate*(np.dot(bias_factors.T,diff))


    #Mean Squared Error 값을 계산 
    mse_loss = np.mean(np.square(diff))

    # weight와 bias가 update 되어야 할 값과 MSE 값을 반환
    return bias_update,w1_update,w2_update,mse_loss

#### Gradient Descent를 적용하는 함수 생성

- iter_epoches 수만큼 반복적으로 
- get_update_weights_values()를 호출하여 update될 weight/bias 값을 구한 뒤 weight/Bias를 Update

In [6]:
#RM, LSTAT feature array와 PRICE target array를 입력 받아서 
#Iter_epoch 수만큼 반복 적용

def gradient_descent(features,target,iter_epochs=1000,verbose=True):
    #w1,w2는 numpy array연산을 위해 일단 1차원 array로 변환, 초기값은 0
    #bias도 1차원 array로 변환하되 초기 값은 1로 설정. (이건 곱)

    w1 = np.zeros((1,))
    w2= np.zeros((1,))
    bias = np.zeros((1,))
    print('최초 w1,w2, bias :',w1,w2,bias)

    #learning rate와 RM,LSTAT 피쳐 지정.
    #호출 시 numpy array 형태로 RM과 LSTAT로 된 2차원 array 입력

    #HyperParameter
    learning_rate = 0.01
    rm = features[:,0]
    lstat = features[:,1]


    #iter 수 만큼 반복하면서 weight와 bias update를 수행
    for i in range(iter_epochs):
        #weight/bias update 값 계산
        bias_update,w1_update,w2_update,loss = get_update_weights_value(bias,w1,w2,rm,lstat,target,learning_rate)

        #계산된 결과 만큼 업데이트
        w1 = w1-w1_update 
        w2 = w2-w2_update 
        bias = bias-bias_update 

        if verbose:
            print('Epoch :',i+1,'/',iter_epochs)
            print('w1:',w1,'w2:',w2,'bias:',bias,'loss:',loss)
    return w1,w2,bias 

#### Gradient Descent를 적용
- 신경망은 데이터를 정규화/표준화 작업을 선행해주어야 합니다.
- 이를 위해 사이킷런의 MinMaxScaler를 이용하여 개별 feature의 값을 0-1사이 값으로 변환 후 학습 적용합니다.

In [7]:
from sklearn.preprocessing import MinMaxScaler 

scaler = MinMaxScaler()

scaled_features = scaler.fit_transform(bostonDF[['RM','LSTAT']])

w1,w2,bias = gradient_descent(scaled_features,bostonDF['PRICE'].values,iter_epochs=5000,verbose=True)
print('##### 최종 w1,w2, bias #####')
print(w1,w2,bias)
print('w1 type:',type(w1), 'w2 type:',type(w2))
print('w1 shape:',w1.shape,'w2 shape:',w2.shape)


최초 w1,w2, bias : [0.] [0.] [0.]
Epoch : 1 / 5000
w1: [0.252369] w2: [0.10914761] bias: [0.45065613] loss: 592.1469169960474
Epoch : 2 / 5000
w1: [0.4982605] w2: [0.21458377] bias: [0.8890071] loss: 564.6567515182813
Epoch : 3 / 5000
w1: [0.73785103] w2: [0.31641055] bias: [1.315389] loss: 538.6424811965484
Epoch : 4 / 5000
w1: [0.97131229] w2: [0.41472723] bias: [1.73012873] loss: 514.0245946883915
Epoch : 5 / 5000
w1: [1.1988113] w2: [0.50963037] bias: [2.13354428] loss: 490.7278647125017
Epoch : 6 / 5000
w1: [1.42051052] w2: [0.60121392] bias: [2.52594493] loss: 468.6811172230454
Epoch : 7 / 5000
w1: [1.63656797] w2: [0.68956922] bias: [2.90763152] loss: 447.81701302090454
Epoch : 8 / 5000
w1: [1.84713735] w2: [0.77478516] bias: [3.27889669] loss: 428.07184113172934
Epoch : 9 / 5000
w1: [2.05236818] w2: [0.85694818] bias: [3.64002506] loss: 409.3853233168043
Epoch : 10 / 5000
w1: [2.25240586] w2: [0.93614234] bias: [3.9912935] loss: 391.700429116892
Epoch : 11 / 5000
w1: [2.44739187]

In [8]:
predicted = scaled_features[:,0]*w1+scaled_features[:,1]*w2+bias
print(scaled_features.shape)
print(predicted.shape)

(506, 2)
(506,)


#### 최적의 결과를 DF로 출력

In [9]:
bostonDF['PREDICTED_PRICE']=predicted
bostonDF['PREDICTED_PRICE'].head()

0    28.935533
1    25.483093
2    32.545474
3    32.334142
4    31.516284
Name: PREDICTED_PRICE, dtype: float64

In [10]:
bostonDF.head(5)

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,PRICE,PREDICTED_PRICE
0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.09,1.0,296.0,15.3,396.9,4.98,24.0,28.935533
1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8,396.9,9.14,21.6,25.483093
2,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2.0,242.0,17.8,392.83,4.03,34.7,32.545474
3,0.03237,0.0,2.18,0.0,0.458,6.998,45.8,6.0622,3.0,222.0,18.7,394.63,2.94,33.4,32.334142
4,0.06905,0.0,2.18,0.0,0.458,7.147,54.2,6.0622,3.0,222.0,18.7,396.9,5.33,36.2,31.516284


### Keras를 이용하여 보스턴 주택 가격 모델 학습 및 예측

- Dense Layer를 이용하여 퍼셉트론을 구현, units는 1로 설정

In [11]:
from tensorflow.keras.layers import Dense 
from tensorflow.keras.models import Sequential 
from tensorflow.keras.optimizers import Adam 

model = Sequential([ 
    #우리는 퍼셉트론을 만들 것이므로 단층, 단 하나의 unit을 설정합니다.
    #input_shape은 2차원이고, 
    #회귀이므로 activation은 사용하지 않습니다.

    #weight와 bias에 대한 초기화는 kernel_initializer와 bias_initializer를 사용합니다.
    Dense(1,input_shape=(2,),activation=None,kernel_initializer='zeros',bias_initializer='ones')
])

#최적화는 Adam, 손실함수는 Mean_Squared_error, 성능 측정 역시 MSE를 사용
model.compile(optimizer=Adam(learning_rate=0.01),loss='mse',metrics=['mse'])
model.fit(scaled_features,bostonDF['PRICE'].values,epochs=1000)

2021-10-22 18:26:53.948225: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2021-10-22 18:26:53.948266: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2021-10-22 18:26:55.330518: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2021-10-22 18:26:55.330609: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcublas.so.11'; dlerror: libcublas.so.11: cannot open shared object file: No such file or directory
2021-10-22 18:26:55.330669: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcublasLt.so.11'; dlerror: lib

Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000
Epoch 37/1000
Epoch 38/1000
Epoch 39/1000
Epoch 40/1000
Epoch 41/1000
Epoch 42/1000
Epoch 43/1000
Epoch 44/1000
Epoch 45/1000
Epoch 46/1000
Epoch 47/1000
Epoch 48/1000
Epoch 49/1000
Epoch 50/1000
Epoch 51/1000
Epoch 52/1000
Epoch 53/1000
Epoch 54/1000
Epoch 55/1000
Epoch 56/1000
Epoch 57/1000
Epoch 58/1000
Epoch 59/1000
Epoch 60/1000
Epoch 61/1000
Epoch 62/1000
Epoch 63/1000
Epoch 64/1000
Epoch 65/1000
Epoch 66/1000
Epoch 67/1000
Epoch 68/1000
Epoch 69/1000
Epoch 70/1000
Epoch 71/1000
Epoch 72/1000
E

<keras.callbacks.History at 0x7f383c5ab670>

#### Kears로 학습된 모델을 이용하여 주택 가격 예측 수행

In [12]:
predicted = model.predict(scaled_features)

bostonDF['KERAS_PREDICTED_PRICE']=predicted

bostonDF.head()

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,PRICE,PREDICTED_PRICE,KERAS_PREDICTED_PRICE
0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.09,1.0,296.0,15.3,396.9,4.98,24.0,28.935533,28.987225
1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8,396.9,9.14,21.6,25.483093,25.512527
2,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2.0,242.0,17.8,392.83,4.03,34.7,32.545474,32.645485
3,0.03237,0.0,2.18,0.0,0.458,6.998,45.8,6.0622,3.0,222.0,18.7,394.63,2.94,33.4,32.334142,32.422882
4,0.06905,0.0,2.18,0.0,0.458,7.147,54.2,6.0622,3.0,222.0,18.7,396.9,5.33,36.2,31.516284,31.610098


### Keras로 학습된 모델을 이용하여, 주택 가격 예측 수행

In [13]:
predicted = model.predict(scaled_features)
bostonDF['KERAS_PREDICTED_PRICE']=predicted
bostonDF.head()

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,PRICE,PREDICTED_PRICE,KERAS_PREDICTED_PRICE
0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.09,1.0,296.0,15.3,396.9,4.98,24.0,28.935533,28.987225
1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8,396.9,9.14,21.6,25.483093,25.512527
2,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2.0,242.0,17.8,392.83,4.03,34.7,32.545474,32.645485
3,0.03237,0.0,2.18,0.0,0.458,6.998,45.8,6.0622,3.0,222.0,18.7,394.63,2.94,33.4,32.334142,32.422882
4,0.06905,0.0,2.18,0.0,0.458,7.147,54.2,6.0622,3.0,222.0,18.7,396.9,5.33,36.2,31.516284,31.610098


#### Stochastic Gradient Descent 와 Mini Batch Gradient Descent 구현
- SGD는 전체 데이터에서 단 한건만 임의로 선택하여 Gradient Descent로 Weight/Bias Update한 뒤 Weight/Bias를 적용
- Mini Batch GD는 전체 데이터에서 Batch 건수 만큼 데이터를 선택하여 GD로 Weight/Bias를 계산한 뒤 Weight,Bias를 적용합니다.

In [14]:
import numpy as np
import pandas as pd 
from sklearn.datasets import load_boston 

boston = load_boston() 
bostonDF = pd.DataFrame(boston.data,columns = boston.feature_names)
bostonDF['PRICE']=boston.target 

print(bostonDF.shape)
bostonDF.head()

(506, 14)


Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,PRICE
0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.09,1.0,296.0,15.3,396.9,4.98,24.0
1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8,396.9,9.14,21.6
2,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2.0,242.0,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0.0,0.458,6.998,45.8,6.0622,3.0,222.0,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0.0,0.458,7.147,54.2,6.0622,3.0,222.0,18.7,396.9,5.33,36.2


#### SGD 기반으로 Weight/Bias update 구하기
- 먼저 하강률을 구하는 함수 get_update_weights_value_sgd를 정의

In [15]:
# N = bostonDF['PRICE'].shape[0]
# print(N)
# w1=np.zeros((N,))
# w2 = np.zeros((N,))

# features = bostonDF[['RM','LSTAT']]
# targets = bostonDF[['PRICE']]
count=20

In [16]:
def get_update_weights_value_sgd(bias,w1,w2,rm_sgd,lstat_sgd,target_sgd,learning_rate=0.01):
    #전체 데이터 건수
    N = target_sgd.shape[0]

    #예측 값
    predicated_sgd = w1*rm_sgd+w2*lstat_sgd+bias 

    #실제값과 예측값 사이의 차이
    diff_sgd = target_sgd - predicated_sgd
    bias_factors = np.ones((N,))

    #weight와 bias를 얼마나 update할 것인지를 계산


    w1_update = -(2/N)*learning_rate*np.dot(rm_sgd,diff_sgd)
    w2_update = -(2/N)*learning_rate*np.dot(lstat_sgd,diff_sgd)
    bias_update = -(2/N)*learning_rate*(np.dot(bias_factors,diff_sgd))

    print('-'*count)
    print('rm_sgd의 shape, rm_sgd.T의 shape:',rm_sgd.shape,rm_sgd.T.shape)
    print('lstat_sgd의 shape,lstat_sgd.T의 shape:',lstat_sgd.shape,lstat_sgd.T.shape)
    print('diff_sgd의 shape,diff_sgd.T의 shape:',diff_sgd.shape,diff_sgd.T.shape)
    print('current updated w1,w2,bias :',w1_update,w2_update,bias_update)
    print('-'*count)
    return bias_update,w1_update,w2_update
    

#### SGD 수행하기

#### np.random.choice() : p1만큼 입력된 범위에서 p2개의 원소를 무작위로 골라내는 함수 

In [17]:
def st_gradient_descent(features,target,iter_epochs=1000,verbose=True):
    #w1,w2,bias를 초기화
    np.random.seed = 2021
    w1 = np.zeros((1,))
    w2 = np.zeros((1,))
    bias = np.zeros((1,))

    learning_rate = 0.01 
    rm = features[:,0]
    lstat = features[:,1]

    #iter_epochs 수만큼 반복하면서 weight와 bias update 수행

    for i in range(iter_epochs):
        #iteration마다 stochastic을 수행할 데이터를 한개추출
        #이를 위한 index를 지정하여 임의로 rm,lstat에 각각 지정

        stochastic_index = np.random.choice(target.shape[0],1)
        rm_sgd = rm[stochastic_index]
        lstat_sgd = lstat[stochastic_index]
        target_sgd = target[stochastic_index]

        # SGD 기반으로 weight/bias의 Update를 구함
        bias_update,w1_update,w2_update = get_update_weights_value_sgd(bias,w1,w2,rm_sgd,lstat_sgd,target_sgd,learning_rate)

        # SGD로 구한 w1_update,w2_update로 적용
        w1 = w1-w1_update
        w2 = w2-w2_update 
        bias = bias - bias_update

        if verbose:
            print('Epoch:',i+1,'/',iter_epochs)
            #Loss는 전체 학습 데이터 기반으로 구해야함
            predicted = w1*rm + w2*lstat+bias 
            diff = target-predicted 
            mse_loss = np.mean(np.square(diff))

            print('-'*count)
            print('predicted의 type:',type(predicted))
            print('predicted.shape:',predicted.shape)
            print('diff의 type:',type(diff))
            print('diff.shape:',diff.shape)


            print('w1:',w1,'w2:',w2,'bias:',bias,'loss:',mse_loss)
    return w1,w2,bias

In [18]:
from sklearn.preprocessing import MinMaxScaler 

scaler = MinMaxScaler()
print('###')
scaled_features = scaler.fit_transform(bostonDF[['RM','LSTAT']])

w1,w2,bias = st_gradient_descent(scaled_features,bostonDF['PRICE'].values,iter_epochs=5000,verbose=True)

print('##### 최종 w1,w2,bias #####')
print(type(w1),type(w2),type(bias))
print(w1.shape,w2.shape,bias.shape)
print(w1,w2,bias)

###
--------------------
rm_sgd의 shape, rm_sgd.T의 shape: (1,) (1,)
lstat_sgd의 shape,lstat_sgd.T의 shape: (1,) (1,)
diff_sgd의 shape,diff_sgd.T의 shape: (1,) (1,)
current updated w1,w2,bias : -0.15489404100402376 -0.11441721854304633 -0.312
--------------------
Epoch: 1 / 5000
--------------------
predicted의 type: <class 'numpy.ndarray'>
predicted.shape: (506,)
diff의 type: <class 'numpy.ndarray'>
diff.shape: (506,)
w1: [0.15489404] w2: [0.11441722] bias: [0.312] loss: 573.1115320465797
--------------------
rm_sgd의 shape, rm_sgd.T의 shape: (1,) (1,)
lstat_sgd의 shape,lstat_sgd.T의 shape: (1,) (1,)
diff_sgd의 shape,diff_sgd.T의 shape: (1,) (1,)
current updated w1,w2,bias : -0.2090548254461076 -0.09396224176522991 -0.29079347921194976
--------------------
Epoch: 2 / 5000
--------------------
predicted의 type: <class 'numpy.ndarray'>
predicted.shape: (506,)
diff의 type: <class 'numpy.ndarray'>
diff.shape: (506,)
w1: [0.36394887] w2: [0.20837946] bias: [0.60279348] loss: 554.2558592836488
------------

In [19]:
#최적화된 w1,w2,bias로 최종 predicted Series를 생성
predicted = scaled_features[:,0]*w1+scaled_features[:,1]+bias 
bostonDF['PREDICTED_PRICE_SGD']=predicted 

bostonDF.head(10)

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,PRICE,PREDICTED_PRICE_SGD
0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.09,1.0,296.0,15.3,396.9,4.98,24.0,30.577968
1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8,396.9,9.14,21.6,29.934857
2,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2.0,242.0,17.8,392.83,4.03,34.7,33.553832
3,0.03237,0.0,2.18,0.0,0.458,6.998,45.8,6.0622,3.0,222.0,18.7,394.63,2.94,33.4,32.603446
4,0.06905,0.0,2.18,0.0,0.458,7.147,54.2,6.0622,3.0,222.0,18.7,396.9,5.33,36.2,33.402689
5,0.02985,0.0,2.18,0.0,0.458,6.43,58.7,6.0622,3.0,222.0,18.7,394.12,5.21,28.7,29.870706
6,0.08829,12.5,7.87,0.0,0.524,6.012,66.6,5.5605,5.0,311.0,15.2,395.6,12.43,22.9,28.012773
7,0.14455,12.5,7.87,0.0,0.524,6.172,96.1,5.9505,5.0,311.0,15.2,396.9,19.15,27.1,28.985633
8,0.21124,12.5,7.87,0.0,0.524,5.631,100.0,6.0821,5.0,311.0,15.2,386.63,29.93,16.5,26.620596
9,0.17004,12.5,7.87,0.0,0.524,6.004,85.9,6.5921,5.0,311.0,15.2,386.71,17.1,18.9,28.102264


#### Mini batch GD
 - 함수1 : 실제 하강률을 계산하는 get_update_weights_value_batch
 - 함수2 : 이를 토대로 계산된 update를 수행하는 함수

In [20]:
def get_update_weights_value_batch(bias,w1,w2,rm_batch,lstat_batch,target_batch,learning_rate = 0.01):

    #전체 데이터 건수
    N = target_batch.shape[0]

    #예측 값
    predicted_batch = w1*rm_batch+w2*lstat_batch+bias 

    #실제값과 예측값 사이의 차이 
    print('predicted.shape:',predicted_batch.shape)
    print('target.shape:',target_batch.shape)
    diff_batch = target_batch - predicted_batch

    #bias 연산을 위한 bias_factor 형성
    bias_factors = np.ones((N,))

    print('diff_batch.shape:',diff_batch.shape)
    print('rm_batch.shape,rm_batch.T.shape:',rm_batch.shape,rm_batch.T.shape)
    print('lstat_batch.shape,lstat_batch.T.shape:',lstat_batch.shape,lstat_batch.T.shape)

    w1_update = -(2/N)*learning_rate*(np.dot(rm_batch.T,diff_batch))
    w2_update = -(2/N)*learning_rate*(np.dot(lstat_batch.T,diff_batch))
    bias_update = -(2/N)*learning_rate*(np.dot(bias_factors.T,diff_batch))

    return bias_update,w1_update,w2_update


#### np.random.choice() : 전체 데이터셋에서 p2만큼 무작위롤 뽑아내는 API

In [21]:
batch_indexes = np.random.choice(bostonDF['PRICE'].shape[0],30)
print(batch_indexes)

[268 163 389 325 346 175 423  20 242 219 501 172 217  24 393 453  37 156
 295  81 367 203 124 274 466 242 292 118 121 449]


In [22]:
def batch_random_gradient_descent(features,target,iter_epochs=1000,batch_size=30,verbose=True):
    np.random.seed = 2021
    w1 = np.zeros((1,))
    w2 = np.zeros((1,))
    bias =np.zeros((1,))

    print('최초 w1,w2,bias:',w1,w2,bias)

    #learing_rate와 RM,LSTAT피쳐 지정
    learning_rate = 0.01
    rm = features[:,0]
    lstat = features[:,1]

    #iter_epochs 수만큼 반복하면서 weight와 bias update를 수행
    for i in range(iter_epochs):
        # batch_size 수만큼 데이터를 임의로 선택
        batch_indexes = np.random.choice(target.shape[0],batch_size)

        rm_batch = rm[batch_indexes]
        lstat_batch = lstat[batch_indexes]
        target_batch = target[batch_indexes]
        
        print('-'*30)
        print('rm_batch.shape:',rm_batch.shape)
        print('lstat_batch.shape:',lstat_batch.shape)
        print('target_batch.shape:',target_batch.shape)

        #이를 토대로 하강률 수행
        bias_update,w1_update,w2_update = get_update_weights_value_batch(bias,w1,w2,rm_batch,lstat_batch,target_batch,learning_rate)

        #Batch GD로 구한 weight/bias의 update를 적용합니다.
        w1 = w1-w1_update
        w2 = w2-w2_update 
        bias = bias - bias_update 

        if verbose:
            print('Epoch:',i+1,'/',iter_epochs)
            # Loss는 전체 학습 데이터 기반으로 구해야 함
            predicted = w1*rm+w2*lstat+bias 
            diff = target-predicted
            mse_loss = np.mean(np.square(diff))
            print('w1:',w1,'w2:',w2,'bias:',bias,'loss:',mse_loss)

    return w1,w2,bias 

In [23]:
w1,w2,bias = batch_random_gradient_descent(scaled_features,bostonDF['PRICE'].values,iter_epochs=1000,batch_size=30,verbose=True)
print('##### 최종 w1,w2,bias #####')
print(w1,w2,bias)

최초 w1,w2,bias: [0.] [0.] [0.]
------------------------------
rm_batch.shape: (30,)
lstat_batch.shape: (30,)
target_batch.shape: (30,)
predicted.shape: (30,)
target.shape: (30,)
diff_batch.shape: (30,)
rm_batch.shape,rm_batch.T.shape: (30,) (30,)
lstat_batch.shape,lstat_batch.T.shape: (30,) (30,)
Epoch: 1 / 1000
w1: [0.29178586] w2: [0.10230533] bias: [0.503] loss: 561.470226829439
------------------------------
rm_batch.shape: (30,)
lstat_batch.shape: (30,)
target_batch.shape: (30,)
predicted.shape: (30,)
target.shape: (30,)
diff_batch.shape: (30,)
rm_batch.shape,rm_batch.T.shape: (30,) (30,)
lstat_batch.shape,lstat_batch.T.shape: (30,) (30,)
Epoch: 2 / 1000
w1: [0.55682534] w2: [0.19634049] bias: [0.95393983] loss: 534.6646839070081
------------------------------
rm_batch.shape: (30,)
lstat_batch.shape: (30,)
target_batch.shape: (30,)
predicted.shape: (30,)
target.shape: (30,)
diff_batch.shape: (30,)
rm_batch.shape,rm_batch.T.shape: (30,) (30,)
lstat_batch.shape,lstat_batch.T.shape: (

In [24]:
predicted = scaled_features[:,0]*w1+scaled_features[:,1]*w2+bias
bostonDF['PREDICTED_PRICE_BATCH_RANDOM']=predicted 
bostonDF.head()

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,PRICE,PREDICTED_PRICE_SGD,PREDICTED_PRICE_BATCH_RANDOM
0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.09,1.0,296.0,15.3,396.9,4.98,24.0,30.577968,26.21152
1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8,396.9,9.14,21.6,29.934857,24.206968
2,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2.0,242.0,17.8,392.83,4.03,34.7,33.553832,28.716891
3,0.03237,0.0,2.18,0.0,0.458,6.998,45.8,6.0622,3.0,222.0,18.7,394.63,2.94,33.4,32.603446,28.432279
4,0.06905,0.0,2.18,0.0,0.458,7.147,54.2,6.0622,3.0,222.0,18.7,396.9,5.33,36.2,33.402689,28.126534


#### Iteration 시에 순차적으로 일정한 batch 크기만큼의 데이터를 전체 학습 데이터에 가져오는 Mini-Batch GD를 수행

In [25]:
def batch_gradient_descent(features,target,iter_epochs=1000,batch_size=30,verbose=True):
    np.random.seed=2021 
    w1 = np.zeros((1,))
    w2 = np.zeros((1,))
    bias = np.zeros((1,))

    print('최초 w1,w2,bias:',w1,w2,bias)

    #learning_rate, RM,LSTAT피처 지정
    #이건 rm,lstat의 전체를 말합니다.
    learning_rate = 0.01 
    rm = features[:,0]
    lstat = features[:,1]

    #iter_epochs만큼 반복하면서 weight와 bias 업데이트
    for i in range(iter_epochs):
        #여기에서 batch_Size만큼 rm,lstat,target에서 각각 가져오면서 수행
        for batch_step in range(0,target.shape[0],batch_size):
            #batch_size만큼 순차적인 데이터를 가져옵니다. 
            print('current batch step:',batch_step)
            rm_batch = rm[batch_step:batch_step+batch_size]
            lstat_batch= lstat[batch_step:batch_step+batch_size]
            target_batch = target[batch_step:batch_step+batch_size]

            #각각 batch_size만큼 똑똑 떼와서 rm,lstat,target을 결정하는 것입니다.
            
            #이를 토대로 각각 경하율을 추정합니다.
            bias_update,w1_update,w2_update = get_update_weights_value_batch(bias, w1, w2, rm_batch, lstat_batch, target_batch, learning_rate)

            #Batch GD로 구한 weight/bias의 update 적용
            w1 = w1-w1_update
            w2 = w2-w2_update
            bias = bias-bias_update 

            if verbose:
                print('Epoch:',i+1,'/',iter_epochs,'batch_step:',batch_step)

                #Loss는 전체 학습 데이터 기반으로 구해야 합니다.
                predicted = w1*rm+w2*lstat+bias 
                diff = target - predicted 
                mse_loss = np.mean(np.square(diff))
                print('w1:',w1,'w2:',w2,'bias:',bias,'loss:',mse_loss)
    return w1,w2,bias

In [26]:
# w1, w2, bias = batch_gradient_descent(scaled_features, bostonDF['PRICE'].values, iter_epochs=5000, batch_size=30, verbose=True)
# print('##### 최종 w1, w2, bias #######')
# print(w1, w2, bias)

### Mini Batch GD를 Keras로 수행
- keras는 기본적으로 Mini Batch GD를 수행합니다.

In [28]:
from tensorflow.keras.layers import Dense 
from tensorflow.keras.models import Sequential 
from tensorflow.keras.optimizers import Adam 

model = Sequential([ 
    # 단 하나의 units 설정. input_shape는 2차원, 회귀이므로 activation은 설정하지 않습니다.
    # weight와 bias 초기화는 kernel_initializer와 bias_initializer를 이용합니다. 
    Dense(1,input_shape=(2,),activation=None,kernel_initializer='zeros',bias_initializer='ones')
])
model.compile(optimizer=Adam(learning_rate=0.01),loss='mse',metrics=['mse'])

# Keras는 반드시 Batch GD를 적용합니다. batch_size가 None이면 기본적으로 32를 할당합니다.
model.fit(scaled_features,bostonDF['PRICE'].values,batch_size=30,epochs=1000)

Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000
Epoch 37/1000
Epoch 38/1000
Epoch 39/1000
Epoch 40/1000
Epoch 41/1000
Epoch 42/1000
Epoch 43/1000
Epoch 44/1000
Epoch 45/1000
Epoch 46/1000
Epoch 47/1000
Epoch 48/1000
Epoch 49/1000
Epoch 50/1000
Epoch 51/1000
Epoch 52/1000
Epoch 53/1000
Epoch 54/1000
Epoch 55/1000
Epoch 56/1000
Epoch 57/1000
Epoch 58/1000
Epoch 59/1000
Epoch 60/1000
Epoch 61/1000
Epoch 62/1000
Epoch 63/1000
Epoch 64/1000
Epoch 65/1000
Epoch 66/1000
Epoch 67/1000
Epoch 68/1000
Epoch 69/1000
Epoch 70/1000
Epoch 71/1000
Epoch 72/1000
E

<keras.callbacks.History at 0x7f383c34f8e0>

In [29]:
predicted = model.predict(scaled_features)
bostonDF['KERAS_PREDICTED_PRICE_BATCH']=predicted 
bostonDF.head()

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,PRICE,PREDICTED_PRICE_SGD,PREDICTED_PRICE_BATCH_RANDOM,KERAS_PREDICTED_PRICE_BATCH
0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.09,1.0,296.0,15.3,396.9,4.98,24.0,30.577968,26.21152,28.969919
1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8,396.9,9.14,21.6,29.934857,24.206968,25.495583
2,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2.0,242.0,17.8,392.83,4.03,34.7,33.553832,28.716891,32.631695
3,0.03237,0.0,2.18,0.0,0.458,6.998,45.8,6.0622,3.0,222.0,18.7,394.63,2.94,33.4,32.603446,28.432279,32.407578
4,0.06905,0.0,2.18,0.0,0.458,7.147,54.2,6.0622,3.0,222.0,18.7,396.9,5.33,36.2,33.402689,28.126534,31.596489
