In [37]:
import os
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
from tensorflow import keras
from tensorflow.keras import layers


In [38]:
data = pd.read_csv("kc_house_data.csv")
data.shape

(21613, 21)

In [39]:
# Display data
pd.options.display.max_columns = 25
data.head()

Unnamed: 0,id,date,price,bedrooms,bathrooms,sqft_living,sqft_lot,floors,waterfront,view,condition,grade,sqft_above,sqft_basement,yr_built,yr_renovated,zipcode,lat,long,sqft_living15,sqft_lot15
0,7129300520,20141013T000000,221900.0,3,1.0,1180,5650,1.0,0,0,3,7,1180,0,1955,0,98178,47.5112,-122.257,1340,5650
1,6414100192,20141209T000000,538000.0,3,2.25,2570,7242,2.0,0,0,3,7,2170,400,1951,1991,98125,47.721,-122.319,1690,7639
2,5631500400,20150225T000000,180000.0,2,1.0,770,10000,1.0,0,0,3,6,770,0,1933,0,98028,47.7379,-122.233,2720,8062
3,2487200875,20141209T000000,604000.0,4,3.0,1960,5000,1.0,0,0,5,7,1050,910,1965,0,98136,47.5208,-122.393,1360,5000
4,1954400510,20150218T000000,510000.0,3,2.0,1680,8080,1.0,0,0,3,8,1680,0,1987,0,98074,47.6168,-122.045,1800,7503


In [40]:
# Display data types
data.dtypes

id                 int64
date              object
price            float64
bedrooms           int64
bathrooms        float64
sqft_living        int64
sqft_lot           int64
floors           float64
waterfront         int64
view               int64
condition          int64
grade              int64
sqft_above         int64
sqft_basement      int64
yr_built           int64
yr_renovated       int64
zipcode            int64
lat              float64
long             float64
sqft_living15      int64
sqft_lot15         int64
dtype: object

In [41]:
# Data preprocess (pdf.72)
data['year'] = pd.to_numeric(data['date'].str.slice(0, 4))
data['month'] = pd.to_numeric(data['date'].str.slice(4, 6))
data['day'] = pd.to_numeric(data['date'].str.slice(6, 8))

data.drop(['id'], axis="columns", inplace=True)
data.drop(['date'], axis="columns", inplace=True)
data.head()


Unnamed: 0,price,bedrooms,bathrooms,sqft_living,sqft_lot,floors,waterfront,view,condition,grade,sqft_above,sqft_basement,yr_built,yr_renovated,zipcode,lat,long,sqft_living15,sqft_lot15,year,month,day
0,221900.0,3,1.0,1180,5650,1.0,0,0,3,7,1180,0,1955,0,98178,47.5112,-122.257,1340,5650,2014,10,13
1,538000.0,3,2.25,2570,7242,2.0,0,0,3,7,2170,400,1951,1991,98125,47.721,-122.319,1690,7639,2014,12,9
2,180000.0,2,1.0,770,10000,1.0,0,0,3,6,770,0,1933,0,98028,47.7379,-122.233,2720,8062,2015,2,25
3,604000.0,4,3.0,1960,5000,1.0,0,0,5,7,1050,910,1965,0,98136,47.5208,-122.393,1360,5000,2014,12,9
4,510000.0,3,2.0,1680,8080,1.0,0,0,3,8,1680,0,1987,0,98074,47.6168,-122.045,1800,7503,2015,2,18


In [42]:
# Split dataset to 3 parts (pdf.73)
data_num = data.shape[0]
# 打散資料 : permutation() 返回打亂後的array
indexes = np.random.permutation(data_num)

train_indexes = indexes[:int(data_num * 0.6)]
val_indexes = indexes[int(data_num * 0.6):int(data_num * 0.8)]
test_indexes = indexes[int(data_num * 0.8):]

train_data = data.loc[train_indexes]
val_data = data.loc[train_indexes]
test_data = data.loc[train_indexes]


In [43]:
# for test 
locs = [1, 3, 5]
data_locs = data.loc[locs]
print(data_locs)

       price  bedrooms  bathrooms  sqft_living  sqft_lot  floors  waterfront  \
1   538000.0         3       2.25         2570      7242     2.0           0   
3   604000.0         4       3.00         1960      5000     1.0           0   
5  1225000.0         4       4.50         5420    101930     1.0           0   

   view  condition  grade  sqft_above  sqft_basement  yr_built  yr_renovated  \
1     0          3      7        2170            400      1951          1991   
3     0          5      7        1050            910      1965             0   
5     0          3     11        3890           1530      2001             0   

   zipcode      lat     long  sqft_living15  sqft_lot15  year  month  day  
1    98125  47.7210 -122.319           1690        7639  2014     12    9  
3    98136  47.5208 -122.393           1360        5000  2014     12    9  
5    98053  47.6561 -122.005           4760      101930  2014      5   12  


In [44]:
# Normalization (pdf.73)
"""
Z-score :
    Xnorm = (X - mean) / std
"""
train_validation_data = pd.concat([train_data, val_data])
mean = train_validation_data.mean()
std = train_validation_data.std()
train_data = (train_data - mean) / std
val_data = (val_data - mean) /std


In [45]:
# Establish Numpy array type's training sets(
#print(train_data)

x_train = np.array(train_data.drop('price', axis='columns')) # drop "price" column
y_train = np.array(train_data['price'])
x_val = np.array(val_data.drop('price', axis='columns'))
y_val = np.array(val_data['price'])

#print(y_train)

In [46]:
x_train.shape

(12967, 21)

In [47]:
# Establish training network model (pdf.74)
model = keras.Sequential(name=' model-1')

model.add(layers.Dense(64, activation='relu', input_shape=(21,)))
# parameter 1408 = (21 * 1 + 1) * 64
model.add(layers.Dense(64, activation='relu'))
# parameter 4160 = (64 * 1 + 1) * 64
model.add(layers.Dense(1))
# parameter 65 = (64 * 1 + 1) * 1

model.summary()

Model: " model-1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_9 (Dense)              (None, 64)                1408      
_________________________________________________________________
dense_10 (Dense)             (None, 64)                4160      
_________________________________________________________________
dense_11 (Dense)             (None, 1)                 65        
Total params: 5,633
Trainable params: 5,633
Non-trainable params: 0
_________________________________________________________________


In [48]:
# Set optimizers, losses fun, metrics fun (pdf.74)
model.compile(keras.optimizers.Adam(0.001),
              loss=keras.losses.MeanSquaredError(),
              metrics=[keras.metrics.MeanAbsoluteError()])

In [49]:
#--- For test ---#
os.getcwd() # 取得當前路徑


'/home/earvin/workspaces/GithubProjects/tensorflow/scripts/BOOK_EasyStudyTF2'

In [50]:
# Set model dir
model_dir = 'lab2-logs/models/'
os.makedirs(model_dir)


FileExistsError: [Errno 17] File exists: 'lab2-logs/models/'

In [55]:
# Set Callback fun (pdf.75)
"""
log_dir = os.path.join('lab2-logs', 'model-1')
model_cbk = keras.callbacks.TensorBoard(log_dir=log_dir)
model_mckp = keras.callbacks.ModelCheckpoint(model_dir + 'Best-model-1.h5',
                                             monitor='val_mean_absolute_error',
                                             save_best_only=True,
                                             mode='min')
"""
# TensorBoard回調函數會幫忙紀錄訓練資訊，並存成TensorBoard的紀錄檔
log_dir = os.path.join('lab2-logs', 'model-1')
model_cbk = keras.callbacks.TensorBoard(log_dir=log_dir)
# ModelCheckpoint回調函數幫忙儲存網路模型，可以設定只儲存最好的模型，「monitor」表示被監測的數據，「mode」min則代表監測數據越小越好。
model_mckp = keras.callbacks.ModelCheckpoint(model_dir + '/Best-model-1.h5', 
                                        monitor='val_mean_absolute_error', 
                                        save_best_only=True, 
                                        mode='min')

2023-10-29 20:07:45.107260: I tensorflow/core/profiler/lib/profiler_session.cc:136] Profiler session initializing.
2023-10-29 20:07:45.107368: I tensorflow/core/profiler/lib/profiler_session.cc:155] Profiler session started.
2023-10-29 20:07:45.107450: I tensorflow/core/profiler/lib/profiler_session.cc:172] Profiler session tear down.


In [53]:
print(x_val)

[[ 0.70961401 -0.80329065 -0.28375078 ...  1.42862261 -1.4590533
   0.16198959]
 [ 0.70961401  2.15699825  2.23739404 ... -0.69994793 -0.18001435
   0.16198959]
 [-0.40208888 -0.14544867 -0.15105895 ... -0.69994793  0.45950513
   1.20471832]
 ...
 [-0.40208888  0.18347232  0.01480584 ... -0.69994793  1.0990246
  -0.64902163]
 [-1.51379177 -0.47436966 -0.32798139 ...  1.42862261 -1.4590533
   0.39370709]
 [-0.40208888  0.5123933  -0.41644261 ... -0.69994793  0.77926486
   1.20471832]]


In [57]:
# Training network model (pdf.75)
"""
history = model.fit(x_train, y_train,
                    batch_size=64,
                    epochs=300,
                    validation_data=(x_val, y_val),
                    callbacks=[model_cbk, model_mckp])
"""
history = model.fit(x_train, y_train,  # 傳入訓練數據
               batch_size=64,  # 批次大小設為64
               epochs=300,  # 整個dataset訓練300遍
               callbacks=[model_cbk, model_mckp])  # Tensorboard回調函數紀錄訓練過程，ModelCheckpoint回調函數儲存最好的模型
                    

Epoch 1/300


ValueError: in user code:

    /home/earvin/miniconda3/envs/tf/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py:805 train_function  *
        return step_function(self, iterator)
    /home/earvin/miniconda3/envs/tf/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py:795 step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    /home/earvin/miniconda3/envs/tf/lib/python3.8/site-packages/tensorflow/python/distribute/distribute_lib.py:1259 run
        return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
    /home/earvin/miniconda3/envs/tf/lib/python3.8/site-packages/tensorflow/python/distribute/distribute_lib.py:2730 call_for_each_replica
        return self._call_for_each_replica(fn, args, kwargs)
    /home/earvin/miniconda3/envs/tf/lib/python3.8/site-packages/tensorflow/python/distribute/distribute_lib.py:3417 _call_for_each_replica
        return fn(*args, **kwargs)
    /home/earvin/miniconda3/envs/tf/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py:788 run_step  **
        outputs = model.train_step(data)
    /home/earvin/miniconda3/envs/tf/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py:754 train_step
        y_pred = self(x, training=True)
    /home/earvin/miniconda3/envs/tf/lib/python3.8/site-packages/tensorflow/python/keras/engine/base_layer.py:1006 __call__
        with ops.name_scope_v2(name_scope):
    /home/earvin/miniconda3/envs/tf/lib/python3.8/site-packages/tensorflow/python/framework/ops.py:6650 __enter__
        scope_name = scope.__enter__()
    /home/earvin/miniconda3/envs/tf/lib/python3.8/contextlib.py:113 __enter__
        return next(self.gen)
    /home/earvin/miniconda3/envs/tf/lib/python3.8/site-packages/tensorflow/python/framework/ops.py:4246 name_scope
        raise ValueError("'%s' is not a valid scope name" % name)

    ValueError: ' model-1/' is not a valid scope name


In [None]:
# History record
history.history.keys()

In [None]:
# Draw pic
plt.plot(history.history['loss'], label='train')
plt.plot(history.history['val_loss'], label='validation')
plt.ylabel('loss')
plt.xlabel('epochs')
plt.legend(loc='upper right')


In [None]:
# Draw metrics index
plt.plot(history.history['mean_absolute_error'], label='train')
plt.plot(history.history['val_mean_absolute_error'], label='validation')
plt.ylabel('metrics')
pltxlabel('epochs')
plt.legend(loc='upper right')