# Iris Classification (분류)

# Machine learning

### 1. Logistic Regression : 독립변수와 종속변수의 선형 관계성에 기반

In [1]:
import numpy as np
import pandas as pd  
import warnings
warnings.filterwarnings('ignore')

In [2]:
from sklearn.datasets import load_iris
iris = load_iris()       

In [3]:
# load를 하면 data, target, target_names, feature_names, DESCR
data = iris.data
label = iris.target
columns = iris.feature_names
df = pd.DataFrame(data, columns = columns)
df.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2


#### 데이터가 준비되었으니 훈련용 데이터와 테스트용 데이터로 나눔

In [4]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = \
train_test_split(data, label, test_size = 0.2, shuffle = True, stratify = label, random_state = 2019)

#### 모델 객체를 생성

In [5]:
from sklearn.linear_model import LogisticRegression
lr = LogisticRegression(verbose = 1)

#### 모델 학습

In [6]:
lr.fit(x_train, y_train)

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished


LogisticRegression(verbose=1)

#### model save를 하려면 joblib

In [8]:
import joblib
joblib.dump(lr, 'model/iris_lr.pkl')

# 이 파일을 읽으면 학습결과가 나타남

['model/iris_lr.pkl']

### 2. Support Vector Machine : 판별경계까지의 거리가 가장 짧은 벡터

In [9]:
from sklearn.svm import SVC

#### 모델 객체를 생성

In [10]:
svc = SVC()

#### 모델 학습

In [11]:
svc.fit(x_train, y_train)

SVC()

#### model save

In [17]:
joblib.dump(svc, 'model/iris_svm.pkl')

['model/iris_svm.pkl']

### 3. Decision Tree : 데이터에 있는 규칙 학습을 통해 자동으로 찾아내 트리 기반의 분류 규칙을 만드는 것

In [13]:
from sklearn.tree import DecisionTreeClassifier

In [14]:
dtc = DecisionTreeClassifier()

In [15]:
dtc.fit(x_train, y_train)

DecisionTreeClassifier()

In [18]:
joblib.dump(dtc, 'model/iris_dt.pkl')

['model/iris_dt.pkl']

## 저장된 모델로 정확도 산출

#### dump로 저장하고, load로 불러와서 반환합니다.

In [19]:
model_lr = joblib.load('model/iris_lr.pkl')
model_svm = joblib.load('model/iris_svm.pkl')
model_dt = joblib.load('model/iris_dt.pkl')

In [20]:
y_pred_lr = model_lr.predict(x_test)
y_pred_svm = model_svm.predict(x_test)
y_pred_dt = model_dt.predict(x_test)

In [22]:
from sklearn.metrics import accuracy_score

In [23]:
acc_lr = accuracy_score(y_test, y_pred_lr)
acc_svm = accuracy_score(y_test, y_pred_svm)
acc_dt = accuracy_score(y_test, y_pred_dt)
print(acc_lr, acc_svm, acc_dt)

0.9666666666666667 0.9666666666666667 0.9666666666666667


## 한 개의 테스트 값 예측하기

In [27]:
df.tail()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
145,6.7,3.0,5.2,2.3
146,6.3,2.5,5.0,1.9
147,6.5,3.0,5.2,2.0
148,6.2,3.4,5.4,2.3
149,5.9,3.0,5.1,1.8


In [29]:
# 145번째 값
test_data = np.array([6.7, 3.0, 5.2, 2.3]).reshape(1, 4) 

In [30]:
model_lr.predict(test_data)
# 예측값이 2 (setosa = 0, versicolor = 1, virginica = 2)

array([2])

In [32]:
index_lr = model_lr.predict(test_data)[0]
index_svm = model_svm.predict(test_data)[0]
index_dt = model_dt.predict(test_data)[0]

In [33]:
sp_names = ['Setosa', 'Versicolor', 'Virginica']

In [34]:
print(sp_names[index_lr], sp_names[index_svm], sp_names[index_dt])

Virginica Virginica Virginica


#### server에서는 model.~과 sp_names만 필요

# Deep learning model

In [41]:
# 딥러닝 모델 설정
from tensorflow import keras
from keras.models import Sequential
from keras.layers import Dense

model = Sequential([
    Dense(12, input_shape = (4,), activation = 'relu'),
    Dense(8, input_shape = (4,), activation='relu'),
    Dense(3, activation='softmax')
])
model.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_4 (Dense)              (None, 12)                60        
_________________________________________________________________
dense_5 (Dense)              (None, 8)                 104       
_________________________________________________________________
dense_6 (Dense)              (None, 3)                 27        
Total params: 191
Trainable params: 191
Non-trainable params: 0
_________________________________________________________________


In [42]:
# 모델 컴파일
model.compile(loss='categorical_crossentropy',
             optimizer = 'adam',
             metrics = ['accuracy'])

In [43]:
from keras.callbacks import ModelCheckpoint
checkpointer = ModelCheckpoint(filepath = 'model/iris_deep.hdf5',
                              monitor = 'val_loss',
                              verbose =1, save_best_only = True)

In [44]:
# 원핫인코딩
Y_encoded = keras.utils.to_categorical(y_train)
Y_encoded[0], Y_encoded[50], Y_encoded[100]

(array([1., 0., 0.], dtype=float32),
 array([0., 0., 1.], dtype=float32),
 array([0., 0., 1.], dtype=float32))

In [47]:
# 모델학습
model.fit(x_train, Y_encoded, epochs = 100, batch_size = 30, verbose = 2,
         validation_split = 0.2, callbacks = [checkpointer])

Train on 96 samples, validate on 24 samples
Epoch 1/100
 - 0s - loss: 1.4428 - accuracy: 0.3438 - val_loss: 1.5453 - val_accuracy: 0.2917

Epoch 00001: val_loss improved from inf to 1.54532, saving model to model/iris_deep.hdf5
Epoch 2/100
 - 0s - loss: 1.3691 - accuracy: 0.3438 - val_loss: 1.4621 - val_accuracy: 0.2917

Epoch 00002: val_loss improved from 1.54532 to 1.46208, saving model to model/iris_deep.hdf5
Epoch 3/100
 - 0s - loss: 1.3059 - accuracy: 0.3438 - val_loss: 1.3811 - val_accuracy: 0.2917

Epoch 00003: val_loss improved from 1.46208 to 1.38107, saving model to model/iris_deep.hdf5
Epoch 4/100
 - 0s - loss: 1.2449 - accuracy: 0.3438 - val_loss: 1.3013 - val_accuracy: 0.2917

Epoch 00004: val_loss improved from 1.38107 to 1.30129, saving model to model/iris_deep.hdf5
Epoch 5/100
 - 0s - loss: 1.1864 - accuracy: 0.3438 - val_loss: 1.2439 - val_accuracy: 0.2917

Epoch 00005: val_loss improved from 1.30129 to 1.24394, saving model to model/iris_deep.hdf5
Epoch 6/100
 - 0s - 


Epoch 00044: val_loss improved from 0.86736 to 0.85719, saving model to model/iris_deep.hdf5
Epoch 45/100
 - 0s - loss: 0.8318 - accuracy: 0.6875 - val_loss: 0.8473 - val_accuracy: 0.5833

Epoch 00045: val_loss improved from 0.85719 to 0.84734, saving model to model/iris_deep.hdf5
Epoch 46/100
 - 0s - loss: 0.8228 - accuracy: 0.6875 - val_loss: 0.8380 - val_accuracy: 0.5833

Epoch 00046: val_loss improved from 0.84734 to 0.83797, saving model to model/iris_deep.hdf5
Epoch 47/100
 - 0s - loss: 0.8141 - accuracy: 0.6979 - val_loss: 0.8278 - val_accuracy: 0.5833

Epoch 00047: val_loss improved from 0.83797 to 0.82784, saving model to model/iris_deep.hdf5
Epoch 48/100
 - 0s - loss: 0.8046 - accuracy: 0.7292 - val_loss: 0.8169 - val_accuracy: 0.7083

Epoch 00048: val_loss improved from 0.82784 to 0.81690, saving model to model/iris_deep.hdf5
Epoch 49/100
 - 0s - loss: 0.7953 - accuracy: 0.7500 - val_loss: 0.8059 - val_accuracy: 0.8750

Epoch 00049: val_loss improved from 0.81690 to 0.80589

 - 0s - loss: 0.4633 - accuracy: 0.9375 - val_loss: 0.4526 - val_accuracy: 0.8750

Epoch 00088: val_loss improved from 0.46144 to 0.45259, saving model to model/iris_deep.hdf5
Epoch 89/100
 - 0s - loss: 0.4540 - accuracy: 0.9479 - val_loss: 0.4439 - val_accuracy: 0.9167

Epoch 00089: val_loss improved from 0.45259 to 0.44394, saving model to model/iris_deep.hdf5
Epoch 90/100
 - 0s - loss: 0.4429 - accuracy: 0.9583 - val_loss: 0.4345 - val_accuracy: 0.9167

Epoch 00090: val_loss improved from 0.44394 to 0.43450, saving model to model/iris_deep.hdf5
Epoch 91/100
 - 0s - loss: 0.4322 - accuracy: 0.9688 - val_loss: 0.4255 - val_accuracy: 0.9583

Epoch 00091: val_loss improved from 0.43450 to 0.42552, saving model to model/iris_deep.hdf5
Epoch 92/100
 - 0s - loss: 0.4219 - accuracy: 0.9583 - val_loss: 0.4195 - val_accuracy: 0.9583

Epoch 00092: val_loss improved from 0.42552 to 0.41952, saving model to model/iris_deep.hdf5
Epoch 93/100
 - 0s - loss: 0.4128 - accuracy: 0.9583 - val_loss: 0.4

<keras.callbacks.callbacks.History at 0x1e4e4b59f08>

In [49]:
from keras.models import load_model
model_deep = load_model('model/iris_deep.hdf5')

In [50]:
model.evaluate(x_test, keras.utils.to_categorical(y_test))[1]



0.9666666388511658

In [55]:
model_deep.predict(test_data)

array([[0.00613038, 0.2522221 , 0.7416475 ]], dtype=float32)

In [52]:
model_deep.predict_classes(test_data)[0]

2

In [54]:
np.argmax(model_deep.predict(test_data)[0])

2