# iris 분류

In [31]:
import numpy as np
import pandas as pd
import warnings
warnings.filterwarnings('ignore')

In [32]:
from sklearn.datasets import load_iris
iris = load_iris()

In [33]:
# data, target, target_names, feature_names, DESCR
data = iris.data
label = iris.target
columns = iris.feature_names
df = pd.DataFrame(data, columns=columns)
df.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2


In [34]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = \
    train_test_split(data, label, test_size=0.2, 
                     shuffle=True, stratify=label, random_state=2019)

- 모델생성

In [35]:
from sklearn.linear_model import LogisticRegression
lr = LogisticRegression(verbose=1)

In [36]:
lr.fit(X_train, y_train)

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished


LogisticRegression(verbose=1)

- 학습한 결과 저장

In [37]:
import joblib
joblib.dump(lr,'model/iris_lr.pkl')

['model/iris_lr.pkl']

- support Vector Machine

In [38]:
# 모델 객체 생성
from sklearn.svm import SVC
svc = SVC()
#svc.fit(X_train, y_train)
#y_pred_svc = svc.predict(X_test)
#accuracy_score(y_test, y_pred)

In [39]:
# 학습
svc.fit(X_train, y_train)

SVC()

In [40]:
import joblib
joblib.dump(lr,'model/iris_svm.pkl')

['model/iris_svm.pkl']

### Decision Tree

In [41]:
# 객체 모델 생성  / dtc 소문자만 가능
dtc = DecisionTreeClassifier()

In [42]:
# 학습
dtc.fit(X_train, y_train)

DecisionTreeClassifier()

In [43]:
import joblib
joblib.dump(lr,'model/iris_dt.pkl')

['model/iris_dt.pkl']

### 저장된 모델 정확도 산출

In [45]:
model_lr = joblib.load('model/iris_lr.pkl')
model_svm = joblib.load('model/iris_svm.pkl')
model_dt = joblib.load('model/iris_dt.pkl')

In [47]:
y_pred_lr = model_lr.predict(x_test)
y_pred_svm = model_svm.predict(x_test)
y_pred_dt = model_dt.predict(x_test)

In [48]:
from sklearn.metrics import accuracy_score
acc_lr = accuracy_score(y_test, y_pred_lr)
acc_svm = accuracy_score(y_test, y_pred_svm)
acc_dt = accuracy_score(y_test, y_pred_dt)
print(acc_lr, acc_svm, acc_dt)

0.9666666666666667 0.9666666666666667 0.9666666666666667


### 한개의 테스트 값 예측

In [49]:
df.tail()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
145,6.7,3.0,5.2,2.3
146,6.3,2.5,5.0,1.9
147,6.5,3.0,5.2,2.0
148,6.2,3.4,5.4,2.3
149,5.9,3.0,5.1,1.8


In [50]:
test_data = np.array([6.7, 3.0, 5.2 , 2.3]).reshape(1,4)   # 2차원을 1차원으로 reshape

In [51]:
index_lr = model_lr.predict(test_data)[0]
index_svm = model_svm.predict(test_data)[0]
index_dt = model_dt.predict(test_data)[0]

In [52]:
sp_names = ['Setosa', 'Versicolor', 'Virginica']
print(sp_names[index_lr], sp_names[index_svm], sp_names[index_dt])

Virginica Virginica Virginica


## Deep learnin model

In [53]:
# 딥러닝 모델 설정
from tensorflow import keras
from keras.models import Sequential
from keras.layers import Dense

model = Sequential([
    Dense(12, input_shape=(4,), activation='relu'),
    Dense(8, input_shape=(4,), activation='relu'),
    Dense(3, activation='softmax')
])
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 12)                60        
_________________________________________________________________
dense_2 (Dense)              (None, 8)                 104       
_________________________________________________________________
dense_3 (Dense)              (None, 3)                 27        
Total params: 191
Trainable params: 191
Non-trainable params: 0
_________________________________________________________________


Using TensorFlow backend.


In [54]:
# 모델 컴파일 
model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

In [55]:
from keras.callbacks import ModelCheckpoint
checkpointer = ModelCheckpoint(filepath="model/iris_deep.hdf5", 
                               monitor='val_loss', 
                               verbose=1, save_best_only=True)

In [56]:
# One hot encoding
Y_encoded = keras.utils.to_categorical(y_train)
Y_encoded[0], Y_encoded[50], Y_encoded[100]

(array([1., 0., 0.], dtype=float32),
 array([0., 0., 1.], dtype=float32),
 array([0., 0., 1.], dtype=float32))

In [57]:
# 모델 학습
model.fit(x_train, Y_encoded, epochs=100, batch_size=30, verbose=2,
          validation_split=0.2, callbacks=[checkpointer])

Train on 96 samples, validate on 24 samples
Epoch 1/100
 - 0s - loss: 2.9410 - accuracy: 0.0000e+00 - val_loss: 2.9917 - val_accuracy: 0.0000e+00

Epoch 00001: val_loss improved from inf to 2.99172, saving model to model/iris_deep.hdf5
Epoch 2/100
 - 0s - loss: 2.7963 - accuracy: 0.0000e+00 - val_loss: 2.8303 - val_accuracy: 0.0000e+00

Epoch 00002: val_loss improved from 2.99172 to 2.83032, saving model to model/iris_deep.hdf5
Epoch 3/100
 - 0s - loss: 2.6637 - accuracy: 0.0000e+00 - val_loss: 2.6779 - val_accuracy: 0.0000e+00

Epoch 00003: val_loss improved from 2.83032 to 2.67791, saving model to model/iris_deep.hdf5
Epoch 4/100
 - 0s - loss: 2.5403 - accuracy: 0.0000e+00 - val_loss: 2.5439 - val_accuracy: 0.0000e+00

Epoch 00004: val_loss improved from 2.67791 to 2.54385, saving model to model/iris_deep.hdf5
Epoch 5/100
 - 0s - loss: 2.4284 - accuracy: 0.0000e+00 - val_loss: 2.4260 - val_accuracy: 0.0000e+00

Epoch 00005: val_loss improved from 2.54385 to 2.42604, saving model to m

 - 0s - loss: 0.9365 - accuracy: 0.7500 - val_loss: 0.9142 - val_accuracy: 0.8333

Epoch 00043: val_loss improved from 0.92848 to 0.91425, saving model to model/iris_deep.hdf5
Epoch 44/100
 - 0s - loss: 0.9210 - accuracy: 0.7812 - val_loss: 0.9000 - val_accuracy: 0.8333

Epoch 00044: val_loss improved from 0.91425 to 0.89999, saving model to model/iris_deep.hdf5
Epoch 45/100
 - 0s - loss: 0.9066 - accuracy: 0.8021 - val_loss: 0.8856 - val_accuracy: 0.8333

Epoch 00045: val_loss improved from 0.89999 to 0.88559, saving model to model/iris_deep.hdf5
Epoch 46/100
 - 0s - loss: 0.8930 - accuracy: 0.8125 - val_loss: 0.8709 - val_accuracy: 0.8333

Epoch 00046: val_loss improved from 0.88559 to 0.87090, saving model to model/iris_deep.hdf5
Epoch 47/100
 - 0s - loss: 0.8802 - accuracy: 0.8021 - val_loss: 0.8540 - val_accuracy: 0.8333

Epoch 00047: val_loss improved from 0.87090 to 0.85401, saving model to model/iris_deep.hdf5
Epoch 48/100
 - 0s - loss: 0.8685 - accuracy: 0.7500 - val_loss: 0.8


Epoch 00086: val_loss improved from 0.54130 to 0.53719, saving model to model/iris_deep.hdf5
Epoch 87/100
 - 0s - loss: 0.5511 - accuracy: 0.8750 - val_loss: 0.5328 - val_accuracy: 0.8333

Epoch 00087: val_loss improved from 0.53719 to 0.53279, saving model to model/iris_deep.hdf5
Epoch 88/100
 - 0s - loss: 0.5463 - accuracy: 0.8750 - val_loss: 0.5296 - val_accuracy: 0.8333

Epoch 00088: val_loss improved from 0.53279 to 0.52961, saving model to model/iris_deep.hdf5
Epoch 89/100
 - 0s - loss: 0.5417 - accuracy: 0.8958 - val_loss: 0.5270 - val_accuracy: 0.9167

Epoch 00089: val_loss improved from 0.52961 to 0.52696, saving model to model/iris_deep.hdf5
Epoch 90/100
 - 0s - loss: 0.5381 - accuracy: 0.8958 - val_loss: 0.5265 - val_accuracy: 0.9167

Epoch 00090: val_loss improved from 0.52696 to 0.52647, saving model to model/iris_deep.hdf5
Epoch 91/100
 - 0s - loss: 0.5331 - accuracy: 0.9062 - val_loss: 0.5260 - val_accuracy: 1.0000

Epoch 00091: val_loss improved from 0.52647 to 0.52597

<keras.callbacks.callbacks.History at 0x2c71201ec08>

In [58]:
from keras.models import load_model
model_deep = load_model("model/iris_deep.hdf5")

In [59]:
model.evaluate(x_test, keras.utils.to_categorical(y_test))[1]



0.9333333373069763

In [60]:
model_deep.predict_classes(test_data)[0]

2

In [61]:
model_deep.predict(test_data)

array([[0.0258715 , 0.37937486, 0.5947537 ]], dtype=float32)

In [62]:
np.argmax(model_deep.predict(test_data)[0])

2

In [29]:
#iris_deep = model_deep.predict_classes(test_data)[0]

In [30]:
#print(sp_names[iris_deep])