In [None]:
# 자동 미분을 사용하여 그레디언트 계산

In [2]:
def f(w1, w2):
    return 3 * w1 ** 2 + 2 * w1 * w2

w1, w2 = 5, 3
eps = 1e-6
(f(w1 + eps, w2) - f(w1, w2)) / eps

36.000003007075065

In [3]:
(f(w1, w2 + eps) - f(w1, w2)) / eps

10.000000003174137

In [4]:
import tensorflow as tf

w1, w2 = tf.Variable(5.), tf.Variable(3.)
with tf.GradientTape() as tape:
    z = f(w1, w2)

gradients = tape.gradient(z, [w1, w2])
gradients

[<tf.Tensor: shape=(), dtype=float32, numpy=36.0>,
 <tf.Tensor: shape=(), dtype=float32, numpy=10.0>]

In [None]:
# gradient()메서드가 호출된 후에는 자동으로 테이프가 지워짐
# 한 번 사용하면 tape이 없어짐

In [5]:
with tf.GradientTape() as tape:
    z = f(w1, w2)

dz_dw1 = tape.gradient(z, w1)
try:
    dz_dw2 = tape.gradient(z, w2)
except RuntimeError as ex:
    print(ex)

A non-persistent GradientTape can only be used to compute one set of gradients (or jacobians)


In [6]:
with tf.GradientTape(persistent=True) as tape:
    z = f(w1, w2)

dz_dw1 = tape.gradient(z, w1)
dz_dw2 = tape.gradient(z, w2) # works now!
del tape
dz_dw1, dz_dw2

(<tf.Tensor: shape=(), dtype=float32, numpy=36.0>,
 <tf.Tensor: shape=(), dtype=float32, numpy=10.0>)

In [7]:
c1, c2 = tf.constant(5.), tf.constant(3.)
with tf.GradientTape() as tape:
    z = f(c1, c2)

gradients = tape.gradient(z, [c1, c2])
gradients

[None, None]

In [13]:
with tf.GradientTape() as tape:
    tape.watch(c1)
    tape.watch(c2)
    z = f(c1, c2)

gradients = tape.gradient(z, [c1, c2])
gradients

[<tf.Tensor: shape=(), dtype=float32, numpy=36.0>,
 <tf.Tensor: shape=(), dtype=float32, numpy=10.0>]

In [None]:
# 2변수 함수

In [14]:
with tf.GradientTape() as tape:
    z1 = f(w1, w2 + 2.)
    z2 = f(w1, w2 + 5.)
    z3 = f(w1, w2 + 7.)

tape.gradient([z1, z2, z3], [w1, w2])

[<tf.Tensor: shape=(), dtype=float32, numpy=136.0>,
 <tf.Tensor: shape=(), dtype=float32, numpy=30.0>]

In [15]:
# 신경망의 일부분에 그레디언트가 역전파 되지 않도록 막기 
# tf.stop_gradient()함수를 사용 
# 정방향때는 정상 계산 
# 역전파시에는 그레디언트를 전파하지 않음(상수처럼 동작)

In [None]:
def f(w1, w2):
    return 3 * w1 ** 2 + tf.stop_gradient(2 * w1 * w2)

with tf.GradientTape() as tape:
    z = f(w1, w2)

tape.gradient(z, [w1, w2])

In [16]:
# 사용자 정의 훈련 반복

In [21]:
import tensorflow as tf
from tensorflow import keras
import numpy as np

l2_reg = keras.regularizers.l2(0.05)
model = keras.models.Sequential([
    keras.layers.Dense(30, activation="elu", kernel_initializer="he_normal",
                       kernel_regularizer=l2_reg),
    keras.layers.Dense(1, kernel_regularizer=l2_reg)
])

def random_batch(X, y, batch_size=32):
    idx = np.random.randint(len(X), size=batch_size)
    return X[idx], y[idx]

def print_status_bar(iteration, total, loss, metrics=None):
    metrics = " - ".join(["{}: {:.4f}".format(m.name, m.result())
                         for m in [loss] + (metrics or [])])
    end = "" if iteration < total else "\n"
    print("\r{}/{} - ".format(iteration, total) + metrics,
          end=end)

import time
mean_loss = keras.metrics.Mean(name="loss")
mean_square = keras.metrics.Mean(name="mean_square")
for i in range(1, 50 + 1):
    loss = 1 / i
    mean_loss(loss)
    mean_square(i ** 2)
    print_status_bar(i, 50, mean_loss, [mean_square])
    time.sleep(0.05)


50/50 - loss: 0.0900 - mean_square: 858.5000


In [None]:
# 데이터 셋 로드

In [22]:
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

housing = fetch_california_housing()
X_train_full, X_test, y_train_full, y_test = train_test_split(
    housing.data, housing.target.reshape(-1, 1), random_state=42)
X_train, X_valid, y_train, y_valid = train_test_split(
    X_train_full, y_train_full, random_state=42)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_valid_scaled = scaler.transform(X_valid)
X_test_scaled = scaler.transform(X_test)

input_shape = X_train_scaled.shape[1:]
X_new_scaled = X_test_scaled

In [23]:
n_epochs = 5
batch_size = 32
n_steps = len(X_train) // batch_size
optimizer = keras.optimizers.Nadam(learning_rate=0.01)
loss_fn = keras.losses.mean_squared_error
mean_loss = keras.metrics.Mean()
metrics = [keras.metrics.MeanAbsoluteError()]

In [None]:
# 사용자 정의 훈련 반복

In [24]:

# 두 개의 반복문 : 하나는 에포크, 다른 하나는 에포크 안의 배치
# 훈련세트에서 배치를 랜덤하게 샘플링

for epoch in range(1, n_epochs + 1):
    print("Epoch {}/{}".format(epoch, n_epochs))
    for step in range(1, n_steps + 1):
        X_batch, y_batch = random_batch(X_train_scaled, y_train)
        
        # tf.GradientTape()블록 안
        #   배치 X_batch에 대한 예측 y_pred생성
        #   loss_fn()으로 손실을 계산 (샘플마다 하나의 손실을 반환)
        #     tf.reduce_mean() 배치에 대한 평균을 계산
        #   주 손실(main_loss)에 그 밖의 손실(여기서는 규제 손실)을 더한다 (l2규제)
        
        with tf.GradientTape() as tape:
            y_pred = model(X_batch)
            main_loss = tf.reduce_mean(loss_fn(y_batch, y_pred))
            loss = tf.add_n([main_loss] + model.losses)

        # 훈련 가능한 변수에 대한 손실의 그래디언트를 계산
        # 옵티마이저에 적용하여 경사하강법을 수행

        gradients = tape.gradient(loss, model.trainable_variables)
        optimizer.apply_gradients(zip(gradients, model.trainable_variables))

        # 모델에 가중치 제한이 추가되어 있는 경우 이를 처리해야 함

        for variable in model.variables:
            if variable.constraint is not None:
                variable.assign(variable.constraint(variable))
        
        # (현재 에포크에 대한) 평균 손실과 지표를 업데이트
        # 상태 막대를 출력
        
        mean_loss(loss)
        for metric in metrics:
            metric(y_batch, y_pred)
        print_status_bar(step * batch_size, len(y_train), mean_loss, metrics)
    
    # 매 에포크 끝에서 상태 막대를 다시 출력
    # 에포크 완료를 나타내고 줄바꿈을 수행하게 됨
    # 마지막으로 평균 손실과 지표값을 초기화
    
    print_status_bar(len(y_train), len(y_train), mean_loss, metrics)
    for metric in [mean_loss] + metrics:
        metric.reset_states()


Epoch 1/5
11610/11610 - mean: 1.7403 - mean_absolute_error: 0.5868
Epoch 2/5
11610/11610 - mean: 0.7008 - mean_absolute_error: 0.5199
Epoch 3/5
11610/11610 - mean: 0.6617 - mean_absolute_error: 0.5187
Epoch 4/5
11610/11610 - mean: 0.6333 - mean_absolute_error: 0.5103
Epoch 5/5
11610/11610 - mean: 0.6400 - mean_absolute_error: 0.5142


In [None]:
# 텐서플로 함수와 그래프

In [25]:
def cube(x):
    return x ** 3

cube(2)

8

In [26]:
cube(tf.constant(2.0))

<tf.Tensor: shape=(), dtype=float32, numpy=8.0>