In [1]:
from sklearn.datasets import fetch_california_housing

In [2]:
data = fetch_california_housing()

In [12]:
print(data.DESCR)

.. _california_housing_dataset:

California Housing dataset
--------------------------

**Data Set Characteristics:**

    :Number of Instances: 20640

    :Number of Attributes: 8 numeric, predictive attributes and the target

    :Attribute Information:
        - MedInc        median income in block group
        - HouseAge      median house age in block group
        - AveRooms      average number of rooms per household
        - AveBedrms     average number of bedrooms per household
        - Population    block group population
        - AveOccup      average number of household members
        - Latitude      block group latitude
        - Longitude     block group longitude

    :Missing Attribute Values: None

This dataset was obtained from the StatLib repository.
https://www.dcc.fc.up.pt/~ltorgo/Regression/cal_housing.html

The target variable is the median house value for California districts,
expressed in hundreds of thousands of dollars ($100,000).

This dataset was derived

In [None]:
x = data.data
y = data.target

x.shape, y.shape

In [4]:
import pandas as pd

In [10]:
pd.DataFrame(x, columns=data.feature_names).head(1)

Unnamed: 0,MedInc,HouseAge,AveRooms,AveBedrms,Population,AveOccup,Latitude,Longitude
0,8.3252,41.0,6.984127,1.02381,322.0,2.555556,37.88,-122.23


In [11]:
pd.DataFrame(y, columns=data.target_names).head(1)

Unnamed: 0,MedHouseVal
0,4.526


## 조건 
1. Sequential API로 작성하세요.
2. 전처리를 해주세요. 필수) train / test 구분!
    - hint : 데이터프레임화 하여 전처리가 필요한지 확인하면 좋습니다.
3. 모델링을 하세요
    - 1. 인풋 - 아웃풋 구조로 만드세요.
    - 2. 히든 레이어를 최소 2개 이상 추가하세요. (자율)
    - 3. 컴파일 과정에 어떤 것이 들어가야 하는지 생각해주세요.
4. 학습
    - 1. Validation set을 만들어주세요. 분할의 정도는 자율.
    - 2. 7번 이상 성능이 개선되지 않으면 학습을 멈춰주세요.
    - 3. 학습 횟수 지정은 500

In [None]:
pd.DataFrame(x, columns=data.feature_names).info()

In [16]:
from sklearn.model_selection import train_test_split

In [17]:
train_x, test_x, train_y, test_y = train_test_split(x, y, test_size=0.2, random_state=2022)

In [18]:
train_x.shape, train_y.shape, test_x.shape, test_y.shape

((16512, 8), (16512,), (4128, 8), (4128,))

In [20]:
## Min-Max scaling
from sklearn.preprocessing import MinMaxScaler

In [21]:
mm_scaler = MinMaxScaler()

In [22]:
train_x = mm_scaler.fit_transform(train_x)
test_x = mm_scaler.transform(test_x)

In [26]:
pd.DataFrame(train_x, columns=data.feature_names).describe()

Unnamed: 0,MedInc,HouseAge,AveRooms,AveBedrms,Population,AveOccup,Latitude,Longitude
count,16512.0,16512.0,16512.0,16512.0,16512.0,16512.0,16512.0,16512.0
mean,0.233177,0.542557,0.034783,0.022586,0.039944,0.003828,0.328671,0.476481
std,0.131694,0.24702,0.017689,0.01352,0.032176,0.008026,0.22699,0.199337
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.142655,0.333333,0.027271,0.019922,0.021974,0.002907,0.148188,0.253984
50%,0.209766,0.54902,0.033322,0.021192,0.032582,0.003551,0.182303,0.583665
75%,0.293577,0.705882,0.039569,0.02271,0.048376,0.004324,0.551173,0.631474
max,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [27]:
# 모델링을 하세요
# 인풋 - 아웃풋 구조로 만드세요.
# 히든 레이어를 최소 2개 이상 추가하세요. (자율)
# 컴파일 과정에 어떤 것이 들어가야 하는지 생각해주세요.
import tensorflow as tf
from tensorflow import keras

In [28]:
train_x.shape, train_y.shape

((16512, 8), (16512,))

In [33]:
## Sequential API
# 1. 세션 클리어
keras.backend.clear_session()

# 2. 모델 발판 생성
model = keras.models.Sequential()

# 3. 모델 블록 조립
model.add( keras.layers.Input(shape=(8,)) )
model.add( keras.layers.Dense(64, activation='relu') )
model.add( keras.layers.Dense(64, activation='relu') )
model.add( keras.layers.Dense(1) )

# 4. 컴파일
model.compile(loss='mse', optimizer='adam')

# 요약
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 64)                576       
                                                                 
 dense_1 (Dense)             (None, 64)                4160      
                                                                 
 dense_2 (Dense)             (None, 1)                 65        
                                                                 
Total params: 4,801
Trainable params: 4,801
Non-trainable params: 0
_________________________________________________________________


In [30]:
from tensorflow.keras.callbacks import EarlyStopping

In [34]:
es = EarlyStopping(monitor='val_loss',   # 관측 대상
                   min_delta=0,          # 모델 성능 개선의 조건 (값 이하면 개선 취급 X)
                   patience=7,
                   verbose=1,
                   restore_best_weights=True) # 학습 결과 최적의 가중치로 반환

In [35]:
# 학습
# Validation set을 만들어주세요. 분할의 정도는 자율.
# 7번 이상 성능이 개선되지 않으면 학습을 멈춰주세요.
# 학습 횟수 지정은 500
model.fit(train_x, train_y, validation_split=0.15, callbacks=[es],
          epochs=500, verbose=1)

Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500
Epoch 34/500
Epoch 35/500
Epoch 36/500
Epoch 37/500
Epoch 38/500
Epoch 39/500
Epoch 40/500
Epoch 41/500
Epoch 42/500
Epoch 43/500
Epoch 44/500
Epoch 45/500
Epoch 46/500
Epoch 47/500
Epoch 48/500
Epoch 49/500
Epoch 50/500
Epoch 51/500
Epoch 52/500
Epoch 53/500
Epoch 54/500
Epoch 54: early stopping


<keras.callbacks.History at 0x7f2d04c010d0>