In [165]:
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt

seed=42
tf.reset_default_graph() # graph 초기화
tf.set_random_seed(seed) # tf.random_seed 초기화
np.random.seed(seed) # np.random_seed 초기화


### 그래프 만들기

In [166]:
x = tf.Variable(3, name="x")
y = tf.Variable(4, name="y")
f = x*x*y + y + 2

In [167]:
f

<tf.Tensor 'add_1:0' shape=() dtype=int32>

In [168]:
sess = tf.Session()
sess.run([x.initializer, y.initializer])
print(sess.run(f))
sess.close()

42


In [169]:
with tf.Session() as sess:
    sess.run([x.initializer, y.initializer])
    print(sess.run(f))
    print(f.eval())

42
42


In [170]:
init = tf.global_variables_initializer()
with tf.Session() as sess:
    sess.run(init)
    print(sess.run(f))
    print(f.eval())

42
42


### Interactive Session 소개

In [16]:
sess = tf.InteractiveSession()
init.run()
result = f.eval()
print(result)
sess.close()

42


## 그래프 관리

In [20]:
tf.reset_default_graph() # graph 초기화
x1 = tf.Variable(1) # 초기값 1
x1.graph is tf.get_default_graph() ## graph 속성 가져옴

True

In [24]:
graph = tf.Graph()

with graph.as_default():
    x2 = tf.Variable(2)
x2.graph is graph

True

In [26]:
x1.graph is graph

False

In [27]:
w = tf.constant(3)
x = w + 2
y = x + 5
z = x * 3


10
15


In [31]:
with tf.Session() as sess:
    # 두번 계산합니다
    print(y.eval())  # 10
    print(z.eval())  # 15

10
15


In [30]:
with tf.Session() as sess:
    y_eval, z_eval = sess.run([y, z])
    print(y_eval)
    print(z_eval)

10
15


# Linear Regression

## 실행순서
### 1. Data Load (Califonia Data)
### 2. Data 전처리
### 3. Graph 정의( Linear Regression)
### 4. 실행

In [171]:
import tensorflow as tf
import numpy as np
from sklearn.datasets import fetch_california_housing

seed=42
tf.reset_default_graph() # graph 초기화
tf.set_random_seed(seed) # tf.random_seed 초기화
np.random.seed(seed) # np.random_seed 초기화


## 1.Data 로드

In [172]:
housing = fetch_california_housing()

In [173]:
housing.keys()

dict_keys(['data', 'target', 'feature_names', 'DESCR'])

In [174]:
housing_data = housing.data # X
housing_lable = housing.target # Y

m, n = housing_data.shape # shape 저장 (m X n)행렬
# bias 추가. (1)

## 2. 전처리하기

In [175]:
# 전처리가 필요하다.
# 데이터간 스케일이 크면 학습이 잘 안됨.(더딤)
print("mean: ", housing_data.mean(axis=0)) 
print("shape: ", housing_data.shape)

mean:  [ 3.87067100e+00  2.86394864e+01  5.42899974e+00  1.09667515e+00
  1.42547674e+03  3.07065516e+00  3.56318614e+01 -1.19569704e+02]
shape:  (20640, 8)


### 표준화

In [176]:
from sklearn.preprocessing import StandardScaler, MinMaxScaler # sklearn의 StandardScaler 사용
scaler = StandardScaler() # 표준화
scaled_housing_data = scaler.fit_transform(housing_data) 

In [177]:
# 정규화 예)
scaler2 = MinMaxScaler()
scaled_housing_data2 = scaler2.fit_transform(housing_data)

In [178]:
import pandas as pd
pd.DataFrame(housing_data, columns=housing.feature_names).info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20640 entries, 0 to 20639
Data columns (total 8 columns):
MedInc        20640 non-null float64
HouseAge      20640 non-null float64
AveRooms      20640 non-null float64
AveBedrms     20640 non-null float64
Population    20640 non-null float64
AveOccup      20640 non-null float64
Latitude      20640 non-null float64
Longitude     20640 non-null float64
dtypes: float64(8)
memory usage: 1.3 MB


In [140]:
print("mean(항목간): ", scaled_housing_data_plus_bias.mean(axis=0))
print("mean(전체): ", scaled_housing_data_plus_bias.mean())
print("shape: ",scaled_housing_data_plus_bias.shape)

mean(항목간):  [ 1.00000000e+00  6.60969987e-17  5.50808322e-18  6.60969987e-17
 -1.06030602e-16 -1.10161664e-17  3.44255201e-18 -1.07958431e-15
 -8.52651283e-15]
mean(전체):  0.11111111111111005
shape:  (20640, 9)


### bias 추가

In [164]:
scaled_housing_data_plus_bias = np.c_[np.ones((m, 1)), scaled_housing_data] ## bias 추가

## 3. Graph 정의

#### 0. epoch, learning_rate 정의
#### 1. X와 y정의  (Placeholder)
#### 2. Hypothesis&Cost 함수 정의
#### 3. Optimizer정의

In [142]:
# 그래프 초기화
tf.reset_default_graph()

In [143]:
n_epochs = 1000 # epoch --> 학습횟수(반복횟수)
lr = 0.01 # learning rate

#### 1. X와 y정의

In [144]:
X = tf.placeholder(dtype=tf.float32, name="X")
y = tf.placeholder(dtype=tf.float32, name='y')

#### Hypothesis&Cost 정의

In [145]:
# M: 데이터의 개수, N: Feature의 개수
# n(feature의 개수) +1 --> bias
W = tf.Variable(tf.random_uniform([n+1, 1], -1.0, 1.0), name='Weight')  # N+1 X 1 행렬
y_pred = tf.matmul(X, W, name='predictions') # hypothesis  --> M X 1행렬  = M X N+1 * N+1 X 1 
# ==> y값은 MX1 로 넣어줘야함 행렬연산 위해
error = y_pred - y # MX1행렬 (데이터의 개수 만큼 error가 나옴.) --> Data 1개당 에러 (오차)  

# Mean Sqaure Error
mse = tf.reduce_mean(tf.square(error), name='mse') # Cost Function

# Optimizer = Gradient Descent
gradients = 2/m * tf.matmul(tf.transpose(X), error) # feature 하나당 기울기 계산.  
# (N+1) X M  *  M X 1 = (N+1) X 1 행렬 --> (Feature+1)의 개수만큼 나오지요.

training_op = tf.assign(W, W - (lr * gradients)) # optimizer 

In [146]:
# 현재 y값
print(housing_lable.shape) # 1차원 행렬
reshaped_housing_lable = housing_lable.reshape(-1, 1)
print(reshaped_housing_lable.shape)

(20640,)
(20640, 1)


In [150]:
init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)

    for epoch in range(n_epochs):
        if epoch % 100 == 0:
            # placeholder 추가
            print("Epoch", epoch, "MSE =", mse.eval(feed_dict={X:scaled_housing_data_plus_bias, y:reshaped_housing_lable}))
        
        # weight 업데이트
        sess.run(training_op, feed_dict={X:scaled_housing_data_plus_bias, y:reshaped_housing_lable})
    
    best_weight = W.eval()

best_weight

Epoch 0 MSE = 5.7508087
Epoch 100 MSE = 0.7673069
Epoch 200 MSE = 0.64747536
Epoch 300 MSE = 0.6172484
Epoch 400 MSE = 0.59559906
Epoch 500 MSE = 0.5792897
Epoch 600 MSE = 0.5669332
Epoch 700 MSE = 0.5575261
Epoch 800 MSE = 0.5503291
Epoch 900 MSE = 0.54479486


array([[ 2.0685523 ],
       [ 0.9024117 ],
       [ 0.16157164],
       [-0.34849477],
       [ 0.35030842],
       [ 0.01012948],
       [-0.04450061],
       [-0.51938677],
       [-0.49587476]], dtype=float32)