### 레모네이드 판매 예측해보기

In [1]:
# 라이브러리 사용
import pandas as pd
import tensorflow as tf

In [2]:
df = pd.read_csv('lemonade.csv')
df

Unnamed: 0,온도,판매량
0,20,40
1,21,42
2,22,44
3,23,46
4,24,48
5,25,50


In [3]:
features = df[['온도']]
label = df[['판매량']]
print(features.shape, label.shape)

(6, 1) (6, 1)


In [4]:
# 모델 준비하기

X = tf.keras.layers.Input(shape = [1]) # shape = 1인 이유 독립변수의 개수가 1개이기 떄문
y = tf.keras.layers.Dense(1)(X) # 종속변수의 개수에 따라 정해짐
model = tf.keras.models.Model(X, y)
model.compile(loss = 'mse')

# epochs
- 전체 데이터를 몇 번 반복하여 학습할 것인지를 결정해주는 숫자

# fit 한 결과 해석

Epoch 반복횟수,    시간이 얼마나 걸리는지,  loss : 얼마나 정답에 가까이 맞추고 있는지를 평가하는 지표


- loss : (예측 - 결과)제곱 값의 평균을 구하면 loss가 된다.
- 예측이 정답을 다 맞춘다면 loss는 0이 되고 0에 가까워지면 잘 학습이 되고 있다고 볼 수 있다.

In [5]:
# 모델 학습하기

model.fit(features, label, epochs = 10000, verbose = 0)

<keras.callbacks.History at 0x1c07fc8a9a0>

In [6]:
# 예측하기
model.predict(features)



array([[39.99621 ],
       [41.993153],
       [43.990097],
       [45.98704 ],
       [47.98399 ],
       [49.980934]], dtype=float32)

In [7]:
model.predict([[15]])



array([[30.011477]], dtype=float32)

# 보스턴 집값 예측

- CRIM : 범죄율
- CHAS : RKDQUS
- RM : 평균 방 수
- AGE : 노후주택비율
- TAX : 재산세 세율
- PTPATIO : 학생/교사 비율
- LSTAT : 하위계층비율
- MEDV : 집값 ---> 평균을 사용하지않고 중앙 값을 사용 : 평균은 이상치에 크게 영향을 받기에 중앙값을 사용 특히나 집값은 비싼 집과 싼 집의 차이가 너무 크다

In [8]:
df1 = pd.read_csv('boston.csv')
df1.shape

(506, 14)

In [9]:
df1.head()

Unnamed: 0,crim,zn,indus,chas,nox,rm,age,dis,rad,tax,ptratio,b,lstat,medv
0,0.00632,18.0,2.31,0,0.538,6.575,65.2,4.09,1,296,15.3,396.9,4.98,24.0
1,0.02731,0.0,7.07,0,0.469,6.421,78.9,4.9671,2,242,17.8,396.9,9.14,21.6
2,0.02729,0.0,7.07,0,0.469,7.185,61.1,4.9671,2,242,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0,0.458,6.998,45.8,6.0622,3,222,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0,0.458,7.147,54.2,6.0622,3,222,18.7,396.9,5.33,36.2


In [10]:
df1.columns

Index(['crim', 'zn', 'indus', 'chas', 'nox', 'rm', 'age', 'dis', 'rad', 'tax',
       'ptratio', 'b', 'lstat', 'medv'],
      dtype='object')

In [11]:
# 독립변수, 종속변수 분리

features = df1[['crim', 'zn', 'indus', 'chas', 'nox', 'rm', 'age', 'dis', 'rad', 'tax',
       'ptratio', 'b', 'lstat']]
label = df1[['medv']]
print(features.shape, label.shape)

(506, 13) (506, 1)


In [12]:
# 모델 구조 만들기

X = tf.keras.layers.Input(shape = [13]) #독립변수가 13개
y = tf.keras.layers.Dense(1)(X)
model1 = tf.keras.models.Model(X,y)
model1.compile(loss = 'mse')

In [13]:
model1.fit(features, label, epochs = 1000, verbose = 0)

<keras.callbacks.History at 0x1c07ff9a250>

In [14]:
model1.predict(features[0:5])



array([[29.295807],
       [24.772493],
       [30.321192],
       [29.649424],
       [29.007292]], dtype=float32)

In [15]:
label[0:5] # 학습이 잘 안됬을음 확인

Unnamed: 0,medv
0,24.0
1,21.6
2,34.7
3,33.4
4,36.2


In [16]:
model1.get_weights() # 각 변수가 가지는 가중치를 확인할 수 있다.

[array([[-0.08732795],
        [ 0.07372212],
        [-0.05190886],
        [ 3.1757472 ],
        [ 2.0862777 ],
        [ 3.848848  ],
        [ 0.02060073],
        [-0.82146204],
        [ 0.15052989],
        [-0.00929194],
        [ 0.02025608],
        [ 0.01616707],
        [-0.59504235]], dtype=float32),
 array([2.5138524], dtype=float32)]

## 아이리스 품종 분류하기

In [17]:
# 데이터셋 불러오기
df2 = pd.read_csv('iris.csv')  # 꽃잎길이, 꽃잎폭, 꽃받침길이, 꽃받침폭을 활용하여 품종을 예측하기
df2                            # 종속변수가 범주형이다!!!

Unnamed: 0,꽃잎길이,꽃잎폭,꽃받침길이,꽃받침폭,품종
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,virginica
146,6.3,2.5,5.0,1.9,virginica
147,6.5,3.0,5.2,2.0,virginica
148,6.2,3.4,5.4,2.3,virginica


In [18]:
# 원핫인코딩을 통해 범주형 변수를 분류
df2 = pd.get_dummies(df2)
df2

Unnamed: 0,꽃잎길이,꽃잎폭,꽃받침길이,꽃받침폭,품종_setosa,품종_versicolor,품종_virginica
0,5.1,3.5,1.4,0.2,1,0,0
1,4.9,3.0,1.4,0.2,1,0,0
2,4.7,3.2,1.3,0.2,1,0,0
3,4.6,3.1,1.5,0.2,1,0,0
4,5.0,3.6,1.4,0.2,1,0,0
...,...,...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,0,0,1
146,6.3,2.5,5.0,1.9,0,0,1
147,6.5,3.0,5.2,2.0,0,0,1
148,6.2,3.4,5.4,2.3,0,0,1


In [19]:
df2.columns

Index(['꽃잎길이', '꽃잎폭', '꽃받침길이', '꽃받침폭', '품종_setosa', '품종_versicolor',
       '품종_virginica'],
      dtype='object')

In [20]:
features = df2[['꽃잎길이', '꽃잎폭', '꽃받침길이', '꽃받침폭']]
label = df2[['품종_setosa', '품종_versicolor',
       '품종_virginica']]

In [21]:
X = tf.keras.layers.Input(shape = [4])
y = tf.keras.layers.Dense(3, activation = 'softmax')(X) # softmax 범주 클래스일 확률을 보여준다. 총합 = 1
model2 = tf.keras.models.Model(X, y)
model2.compile(loss = 'categorical_crossentropy',
               metrics = 'accuracy')

In [22]:
model2.fit(features, label, epochs = 1000, verbose = 0)

<keras.callbacks.History at 0x1c002048f40>

In [23]:
model2.predict(features[0:5]) # 확률에 따라 1번 클래스일 것으로 추정하는 모델이다.



array([[9.9902904e-01, 9.7099086e-04, 1.8195550e-08],
       [9.9633068e-01, 3.6691357e-03, 1.8668383e-07],
       [9.9829620e-01, 1.7036798e-03, 7.2937539e-08],
       [9.9569952e-01, 4.3001645e-03, 3.7489298e-07],
       [9.9922347e-01, 7.7655865e-04, 1.4818999e-08]], dtype=float32)

In [24]:
print(label[0:5])

   품종_setosa  품종_versicolor  품종_virginica
0          1              0             0
1          1              0             0
2          1              0             0
3          1              0             0
4          1              0             0


In [25]:
model2.get_weights()

[array([[ 0.972075  ,  0.66650385, -0.8902466 ],
        [ 2.8078158 ,  0.26584637, -1.1091416 ],
        [-4.0822024 , -0.86363965,  1.2263564 ],
        [-4.2782254 , -1.354225  ,  1.7581416 ]], dtype=float32),
 array([ 2.0633066,  0.4916026, -1.1898735], dtype=float32)]

## Hidden Layer

### 보스턴 집값 예측(hidden layer add)

In [26]:
features = df1[['crim', 'zn', 'indus', 'chas', 'nox', 'rm', 'age', 'dis', 'rad', 'tax',
       'ptratio', 'b', 'lstat']]
label = df1[['medv']]

In [27]:
X = tf.keras.layers.Input(shape = [13]) #독립변수가 13개
H = tf.keras.layers.Dense(10, activation = 'swish')(X)  # 히든 레이어(은닉층)을 추가
y = tf.keras.layers.Dense(1)(H) # 아웃풋이 하나
model3 = tf.keras.models.Model(X,y)
model3.compile(loss = 'mse')

In [28]:
model3.summary()

Model: "model_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_4 (InputLayer)        [(None, 13)]              0         
                                                                 
 dense_3 (Dense)             (None, 10)                140       
                                                                 
 dense_4 (Dense)             (None, 1)                 11        
                                                                 
Total params: 151
Trainable params: 151
Non-trainable params: 0
_________________________________________________________________


In [29]:
model3.fit(features, label, epochs = 1000, verbose = 0)

<keras.callbacks.History at 0x1c000fd9220>

In [30]:
model3.predict(features[0:5])



array([[28.339647],
       [25.147776],
       [30.05922 ],
       [30.914242],
       [30.901667]], dtype=float32)

In [31]:
label[0:5]

Unnamed: 0,medv
0,24.0
1,21.6
2,34.7
3,33.4
4,36.2


### 아이리스 품종 예측(hidden layer add)

In [32]:
features = df2[['꽃잎길이', '꽃잎폭', '꽃받침길이', '꽃받침폭']]
label = df2[['품종_setosa', '품종_versicolor',
       '품종_virginica']]

In [33]:
X = tf.keras.layers.Input(shape = [4])
H = tf.keras.layers.Dense(8, activation = 'swish')(X)
H = tf.keras.layers.Dense(8, activation = 'swish')(H)
H = tf.keras.layers.Dense(8, activation = 'swish')(H) # 3개의 층을 추가하였다.
y = tf.keras.layers.Dense(3, activation = 'softmax')(H) 
model4 = tf.keras.models.Model(X, y)
model4.compile(loss = 'categorical_crossentropy',
              metrics = 'accuracy')

"""
X = tf.keras.layers.Input(shape = [4])   위의 코드와 동일한 코드이다.
H = tf.keras.layers.Dense(8)(X)
(H = tf.keras.layers.BatchNormalization()(H)) --> 배치정규화를 해주는 코드 추가 (위와 동일하지않음 / 따로 추가)
H = tf.keras.layers.Activation('swish')(H)

H = tf.keras.layers.Dense(8)(H)
(H = tf.keras.layers.BatchNormalization()(H))
H = tf.keras.layers.Activation('swish')(H)

H = tf.keras.layers.Dense(8)(H)
(H = tf.keras.layers.BatchNormalization()(H))
H = tf.keras.layers.Activation('swish')(H)

y = tf.keras.layers.Dense(3, activation = 'softmax')(H)  
model4 = tf.keras.models.Model(X, y)
model4.compile(loss = 'categorical_crossentropy',
              metrics = 'accuracy')
"""

"\nX = tf.keras.layers.Input(shape = [4])   위의 코드와 동일한 코드이다.\nH = tf.keras.layers.Dense(8)(X)\n(H = tf.keras.layers.BatchNormalization()(H)) --> 배치정규화를 해주는 코드 추가 (위와 동일하지않음 / 따로 추가)\nH = tf.keras.layers.Activation('swish')(H)\n\nH = tf.keras.layers.Dense(8)(H)\n(H = tf.keras.layers.BatchNormalization()(H))\nH = tf.keras.layers.Activation('swish')(H)\n\nH = tf.keras.layers.Dense(8)(H)\n(H = tf.keras.layers.BatchNormalization()(H))\nH = tf.keras.layers.Activation('swish')(H)\n\ny = tf.keras.layers.Dense(3, activation = 'softmax')(H)  \nmodel4 = tf.keras.models.Model(X, y)\nmodel4.compile(loss = 'categorical_crossentropy',\n              metrics = 'accuracy')\n"

In [34]:
model4.fit(features, label, epochs = 1000, verbose = 0)

<keras.callbacks.History at 0x1c00322b280>

In [35]:
model4.predict(features[0:5])



array([[9.9999678e-01, 3.9966059e-07, 2.8797417e-06],
       [9.9999428e-01, 1.5759942e-06, 4.2214342e-06],
       [9.9999106e-01, 1.8846332e-06, 6.9808057e-06],
       [9.9998689e-01, 6.3121897e-06, 6.8290892e-06],
       [9.9999595e-01, 5.2683708e-07, 3.5511773e-06]], dtype=float32)

In [36]:
label[0:5]

Unnamed: 0,품종_setosa,품종_versicolor,품종_virginica
0,1,0,0
1,1,0,0
2,1,0,0
3,1,0,0
4,1,0,0


# 배치 정규화 사용시 성능이 더 올라간다. --> WHY? 공부하기

In [37]:
X = tf.keras.layers.Input(shape = [4])   
H = tf.keras.layers.Dense(8)(X)
H = tf.keras.layers.BatchNormalization()(H) 
H = tf.keras.layers.Activation('swish')(H)

H = tf.keras.layers.Dense(8)(H)
H = tf.keras.layers.BatchNormalization()(H)
H = tf.keras.layers.Activation('swish')(H)

H = tf.keras.layers.Dense(8)(H)
H = tf.keras.layers.BatchNormalization()(H)
H = tf.keras.layers.Activation('swish')(H)

y = tf.keras.layers.Dense(3, activation = 'softmax')(H)  
model4 = tf.keras.models.Model(X, y)
model4.compile(loss = 'categorical_crossentropy',
              metrics = 'accuracy')

In [38]:
model4.fit(features, label, epochs = 1000, verbose = 0)

<keras.callbacks.History at 0x1c0054d1fa0>

In [39]:
model4.predict(features[0:5])  # 거의 1에 가까운 확률을 보여준다.



array([[9.9999976e-01, 1.5667855e-07, 1.7465860e-07],
       [9.9999905e-01, 4.8926381e-07, 5.1331455e-07],
       [9.9999988e-01, 4.7911392e-08, 8.9671929e-08],
       [9.9999809e-01, 9.5523990e-07, 9.7296811e-07],
       [9.9999976e-01, 1.1950048e-07, 1.3268382e-07]], dtype=float32)

In [40]:
label[0:5] 

Unnamed: 0,품종_setosa,품종_versicolor,품종_virginica
0,1,0,0
1,1,0,0
2,1,0,0
3,1,0,0
4,1,0,0
