In [1]:
from sklearn.datasets import fetch_california_housing

import pandas as pd
import numpy as np

In [2]:
data = fetch_california_housing()

In [3]:
X = data.data
y = data.target

In [4]:
X = pd.DataFrame(data=X, columns=data.feature_names)
X.head()

Unnamed: 0,MedInc,HouseAge,AveRooms,AveBedrms,Population,AveOccup,Latitude,Longitude
0,8.3252,41.0,6.984127,1.02381,322.0,2.555556,37.88,-122.23
1,8.3014,21.0,6.238137,0.97188,2401.0,2.109842,37.86,-122.22
2,7.2574,52.0,8.288136,1.073446,496.0,2.80226,37.85,-122.24
3,5.6431,52.0,5.817352,1.073059,558.0,2.547945,37.85,-122.25
4,3.8462,52.0,6.281853,1.081081,565.0,2.181467,37.85,-122.25


In [5]:
y = pd.DataFrame(data=y, columns=data.target_names)
y.head()

Unnamed: 0,MedHouseVal
0,4.526
1,3.585
2,3.521
3,3.413
4,3.422


In [6]:
print(data.DESCR)

.. _california_housing_dataset:

California Housing dataset
--------------------------

**Data Set Characteristics:**

    :Number of Instances: 20640

    :Number of Attributes: 8 numeric, predictive attributes and the target

    :Attribute Information:
        - MedInc        median income in block group
        - HouseAge      median house age in block group
        - AveRooms      average number of rooms per household
        - AveBedrms     average number of bedrooms per household
        - Population    block group population
        - AveOccup      average number of household members
        - Latitude      block group latitude
        - Longitude     block group longitude

    :Missing Attribute Values: None

This dataset was obtained from the StatLib repository.
https://www.dcc.fc.up.pt/~ltorgo/Regression/cal_housing.html

The target variable is the median house value for California districts,
expressed in hundreds of thousands of dollars ($100,000).

This dataset was derived

In [7]:
from sklearn.model_selection import train_test_split

In [8]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, random_state=2022)
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((16512, 8), (4128, 8), (16512, 1), (4128, 1))

In [9]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()

In [10]:
X_train_s = scaler.fit_transform(X_train)
X_test_s = scaler.transform(X_test)

In [11]:
X_train_s = pd.DataFrame(X_train_s, columns=X.columns)
X_test_s = pd.DataFrame(X_test_s, columns=X.columns)

In [12]:
X_train_s.describe()

Unnamed: 0,MedInc,HouseAge,AveRooms,AveBedrms,Population,AveOccup,Latitude,Longitude
count,16512.0,16512.0,16512.0,16512.0,16512.0,16512.0,16512.0,16512.0
mean,0.233177,0.542557,0.034783,0.022586,0.039944,0.003828,0.328671,0.476481
std,0.131694,0.24702,0.017689,0.01352,0.032176,0.008026,0.22699,0.199337
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.142655,0.333333,0.027271,0.019922,0.021974,0.002907,0.148188,0.253984
50%,0.209766,0.54902,0.033322,0.021192,0.032582,0.003551,0.182303,0.583665
75%,0.293577,0.705882,0.039569,0.02271,0.048376,0.004324,0.551173,0.631474
max,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [13]:
import keras
from keras.layers import Input, Flatten, Dense
from keras.models import *

In [14]:
# clear session
keras.backend.clear_session()

# make Sequential model
model = Sequential()

# add layers
model.add(Input(shape=(8,)))
model.add(Dense(128, activation='relu'))
model.add(Dense(128, activation='relu'))
model.add(Dense(128, activation='relu'))
model.add(Dense(1))

# compile model
model.compile(loss=keras.losses.mean_squared_error, optimizer='adam')

model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 128)               1152      
_________________________________________________________________
dense_1 (Dense)              (None, 128)               16512     
_________________________________________________________________
dense_2 (Dense)              (None, 128)               16512     
_________________________________________________________________
dense_3 (Dense)              (None, 1)                 129       
Total params: 34,305
Trainable params: 34,305
Non-trainable params: 0
_________________________________________________________________


In [15]:
es = keras.callbacks.EarlyStopping(monitor='val_loss',
                                   min_delta=0,
                                   patience=7,
                                   verbose=1,
                                   restore_best_weights=True)

In [16]:
model.fit(X_train_s, y_train, epochs=500, verbose=1,
          validation_split=.15,
          callbacks=[es])

Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500
Epoch 34/500
Epoch 35/500
Epoch 36/500
Epoch 37/500
Epoch 38/500
Epoch 39/500
Epoch 40/500
Epoch 41/500
Epoch 42/500
Epoch 43/500
Epoch 44/500
Epoch 45/500
Epoch 46/500
Epoch 47/500
Epoch 48/500
Epoch 49/500
Epoch 50/500
Epoch 51/500
Epoch 52/500
Epoch 53/500
Epoch 54/500
Epoch 55/500
Epoch 56/500
Epoch 57/500
Epoch 58/500
Epoch 59/500
Epoch 60/500
Epoch 61/500
Epoch 62/500
Epoch 63/500
Epoch 64/500
Epoch 65/500
Epoch 66/500
Epoch 67/500
Epoch 68/500
Epoch 69/500
Epoch 70/500
Epoch 71/500
Epoch 72/500
Epoch 73/500
Epoch 74/500
Epoch 75/500
Epoch 00075: early stopping


<tensorflow.python.keras.callbacks.History at 0x184013b7af0>

## Wine

In [17]:
from sklearn.datasets import load_wine

data = load_wine()

In [18]:
X = data.data
y = data.target

In [19]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.15, random_state=2022)
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((151, 13), (27, 13), (151,), (27,))

In [20]:
cat_n = len(np.unique(y_train))

y_train = keras.utils.to_categorical(y_train, cat_n)
y_test = keras.utils.to_categorical(y_test, cat_n)

In [21]:
X = pd.DataFrame(data=X, columns=data.feature_names)
X.head()

Unnamed: 0,alcohol,malic_acid,ash,alcalinity_of_ash,magnesium,total_phenols,flavanoids,nonflavanoid_phenols,proanthocyanins,color_intensity,hue,od280/od315_of_diluted_wines,proline
0,14.23,1.71,2.43,15.6,127.0,2.8,3.06,0.28,2.29,5.64,1.04,3.92,1065.0
1,13.2,1.78,2.14,11.2,100.0,2.65,2.76,0.26,1.28,4.38,1.05,3.4,1050.0
2,13.16,2.36,2.67,18.6,101.0,2.8,3.24,0.3,2.81,5.68,1.03,3.17,1185.0
3,14.37,1.95,2.5,16.8,113.0,3.85,3.49,0.24,2.18,7.8,0.86,3.45,1480.0
4,13.24,2.59,2.87,21.0,118.0,2.8,2.69,0.39,1.82,4.32,1.04,2.93,735.0


In [24]:
y_train = pd.DataFrame(data=y_train, columns=data.target_names)
y_train.head()

Unnamed: 0,class_0,class_1,class_2
0,1.0,0.0,0.0
1,0.0,0.0,1.0
2,1.0,0.0,0.0
3,0.0,1.0,0.0
4,0.0,0.0,1.0


In [25]:
print(data.DESCR)

.. _wine_dataset:

Wine recognition dataset
------------------------

**Data Set Characteristics:**

    :Number of Instances: 178 (50 in each of three classes)
    :Number of Attributes: 13 numeric, predictive attributes and the class
    :Attribute Information:
 		- Alcohol
 		- Malic acid
 		- Ash
		- Alcalinity of ash  
 		- Magnesium
		- Total phenols
 		- Flavanoids
 		- Nonflavanoid phenols
 		- Proanthocyanins
		- Color intensity
 		- Hue
 		- OD280/OD315 of diluted wines
 		- Proline

    - class:
            - class_0
            - class_1
            - class_2
		
    :Summary Statistics:
    
                                   Min   Max   Mean     SD
    Alcohol:                      11.0  14.8    13.0   0.8
    Malic Acid:                   0.74  5.80    2.34  1.12
    Ash:                          1.36  3.23    2.36  0.27
    Alcalinity of Ash:            10.6  30.0    19.5   3.3
    Magnesium:                    70.0 162.0    99.7  14.3
    Total Phenols:                0

In [27]:
X_train_s = scaler.fit_transform(X_train)
X_test_s = scaler.transform(X_test)

In [28]:
es = keras.callbacks.EarlyStopping(monitor='val_loss',
                                   min_delta=0,
                                   patience=5,
                                   restore_best_weights=True,
                                   verbose=1)

In [39]:
# clear session
keras.backend.clear_session()

il = Input(shape=X_train_s.shape[1:])
ol = Dense(3, activation='softmax')(il)

reg_model = Model(il, ol)

reg_model.compile(loss=keras.losses.categorical_crossentropy, optimizer='adam', metrics=['mse'])

reg_model.summary()


Model: "functional_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 13)]              0         
_________________________________________________________________
dense (Dense)                (None, 3)                 42        
Total params: 42
Trainable params: 42
Non-trainable params: 0
_________________________________________________________________


In [40]:
reg_model.fit(X_train_s, y_train, epochs=100, validation_split=.1,
              callbacks=[es],
              verbose=1)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<tensorflow.python.keras.callbacks.History at 0x187104ead30>

In [41]:
reg_model.evaluate(X_test_s, y_test)



[0.8556076288223267, 0.16945450007915497]

In [36]:
# clear session
keras.backend.clear_session()

il = Input(shape=X_train_s.shape[1:])
h1 = Dense(512, activation='relu')(il)
h2 = Dense(256, activation='relu')(h1)
ol = Dense(3, activation='softmax')(h2)

model = Model(il, ol)

model.compile(loss=keras.losses.categorical_crossentropy, optimizer='adam', metrics=['mse'])

model.summary()

Model: "functional_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 13)]              0         
_________________________________________________________________
dense (Dense)                (None, 512)               7168      
_________________________________________________________________
dense_1 (Dense)              (None, 256)               131328    
_________________________________________________________________
dense_2 (Dense)              (None, 3)                 771       
Total params: 139,267
Trainable params: 139,267
Non-trainable params: 0
_________________________________________________________________


In [37]:
model.fit(X_train_s, y_train, epochs=100, validation_split=.1,
              callbacks=[es],
              verbose=1)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
1/5 [=====>........................] - ETA: 0s - loss: 0.0054 - mse: 1.2826e-04Restoring model weights from the end of the best epoch.
Epoch 00045: early stopping


<tensorflow.python.keras.callbacks.History at 0x18710114820>

In [38]:
model.evaluate(X_test_s, y_test)



[0.046873196959495544, 0.008374566212296486]

In [None]:
# drkail6916.work@gmail.com / 강사 메일 김건영

In [42]:
feature_list = ['radius',
                'texture',
                'perimeter',
                'area',
                'smoothness',
                'compactness',
                'concavity',
                'concave points',
                'symmetry',
                'fractal dimension']

In [44]:
feature_list + ['sex']

['radius',
 'texture',
 'perimeter',
 'area',
 'smoothness',
 'compactness',
 'concavity',
 'concave points',
 'symmetry',
 'fractal dimension',
 'sex']