In [0]:
from sklearn import datasets
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import Dense
from sklearn.preprocessing import LabelBinarizer
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score

In [0]:
encoder = LabelBinarizer()
seed = 42
 
iris = datasets.load_iris()
iris_data_df = pd.DataFrame(data=iris.data, columns=iris.feature_names,
                       dtype=np.float32)
target = encoder.fit_transform(iris.target)
iris_target_df = pd.DataFrame(data=target, columns=iris.target_names) 

In [0]:
X_train,X_test,y_train,y_test = train_test_split(iris_data_df,
                                                 iris_target_df,
                                                 test_size=0.25,
                                                 random_state=seed)

In [0]:
scaler = MinMaxScaler(feature_range=(0,1))
 
X_train = pd.DataFrame(scaler.fit_transform(X_train),
                               columns=X_train.columns,
                               index=X_train.index)
X_test = pd.DataFrame(scaler.transform(X_test),
                           columns=X_test.columns,
                           index=X_test.index)

#NN with 3 layers 

In [0]:
def model():
    """build the Keras model callback"""
    model = Sequential()
    model.add(Dense(8, input_dim=4, activation='tanh', name='layer_1'))
    model.add(Dense(10, activation='tanh', name='layer_2'))
    model.add(Dense(10, activation='tanh', name='layer_3'))
    model.add(Dense(3, activation='softmax', name='output_layer'))
     
    model.compile(loss="categorical_crossentropy",
                  optimizer="adam",
                  metrics=['accuracy'])
    return model

In [0]:
estimator = KerasClassifier(
        build_fn=model,
        epochs=200, batch_size=20,
        verbose=2)

In [90]:
kfold = KFold(n_splits=5, shuffle=True, random_state=seed)
results = cross_val_score(estimator, X_train, y_train, cv=kfold)
print("Model Performance: mean: %.2f%% std: (%.2f%%)" % (results.mean()*100, results.std()*100))


Epoch 1/200
 - 1s - loss: 1.2123 - acc: 0.3596
Epoch 2/200
 - 0s - loss: 1.1889 - acc: 0.4382
Epoch 3/200
 - 0s - loss: 1.1672 - acc: 0.4382
Epoch 4/200
 - 0s - loss: 1.1474 - acc: 0.3371
Epoch 5/200
 - 0s - loss: 1.1269 - acc: 0.3371
Epoch 6/200
 - 0s - loss: 1.1076 - acc: 0.3483
Epoch 7/200
 - 0s - loss: 1.0877 - acc: 0.3596
Epoch 8/200
 - 0s - loss: 1.0695 - acc: 0.3933
Epoch 9/200
 - 0s - loss: 1.0499 - acc: 0.4382
Epoch 10/200
 - 0s - loss: 1.0301 - acc: 0.5169
Epoch 11/200
 - 0s - loss: 1.0105 - acc: 0.6404
Epoch 12/200
 - 0s - loss: 0.9894 - acc: 0.6742
Epoch 13/200
 - 0s - loss: 0.9679 - acc: 0.6854
Epoch 14/200
 - 0s - loss: 0.9470 - acc: 0.6854
Epoch 15/200
 - 0s - loss: 0.9237 - acc: 0.6854
Epoch 16/200
 - 0s - loss: 0.9018 - acc: 0.6854
Epoch 17/200
 - 0s - loss: 0.8772 - acc: 0.6854
Epoch 18/200
 - 0s - loss: 0.8535 - acc: 0.6854
Epoch 19/200
 - 0s - loss: 0.8299 - acc: 0.6854
Epoch 20/200
 - 0s - loss: 0.8055 - acc: 0.6854
Epoch 21/200
 - 0s - loss: 0.7820 - acc: 0.6854
E

In [91]:
model = model()
model.fit(
       X_train,
       y_train,
       epochs=200,
       shuffle=True, # shuffle data randomly.
       #NNs perform best on randomly shuffled data
       verbose=2 # this will tell keras to print more detailed info
       # during trainnig to know what is going on
       )

Epoch 1/200
 - 1s - loss: 1.0690 - acc: 0.3482
Epoch 2/200
 - 0s - loss: 1.0422 - acc: 0.5089
Epoch 3/200
 - 0s - loss: 1.0236 - acc: 0.6339
Epoch 4/200
 - 0s - loss: 1.0058 - acc: 0.6607
Epoch 5/200
 - 0s - loss: 0.9901 - acc: 0.6607
Epoch 6/200
 - 0s - loss: 0.9762 - acc: 0.6607
Epoch 7/200
 - 0s - loss: 0.9633 - acc: 0.6786
Epoch 8/200
 - 0s - loss: 0.9496 - acc: 0.6964
Epoch 9/200
 - 0s - loss: 0.9354 - acc: 0.6964
Epoch 10/200
 - 0s - loss: 0.9203 - acc: 0.7054
Epoch 11/200
 - 0s - loss: 0.9061 - acc: 0.6964
Epoch 12/200
 - 0s - loss: 0.8891 - acc: 0.6964
Epoch 13/200
 - 0s - loss: 0.8731 - acc: 0.7054
Epoch 14/200
 - 0s - loss: 0.8572 - acc: 0.7054
Epoch 15/200
 - 0s - loss: 0.8400 - acc: 0.7054
Epoch 16/200
 - 0s - loss: 0.8228 - acc: 0.7143
Epoch 17/200
 - 0s - loss: 0.8065 - acc: 0.6964
Epoch 18/200
 - 0s - loss: 0.7886 - acc: 0.6964
Epoch 19/200
 - 0s - loss: 0.7712 - acc: 0.7054
Epoch 20/200
 - 0s - loss: 0.7544 - acc: 0.7232
Epoch 21/200
 - 0s - loss: 0.7373 - acc: 0.7054
E

<keras.callbacks.History at 0x7f5ab24797b8>

In [92]:
#run the test dataset
test_error_rate = model.evaluate(X_test, y_test, verbose=0)
print(
      "{} : {:.2f}%".format(model.metrics_names[1],
              test_error_rate[1]*100))
print(
      "{} : {:.2f}%".format(model.metrics_names[0],
              test_error_rate[0]*100))

acc : 97.37%
loss : 7.06%


In [0]:
predictions = model.predict(X_test)

In [104]:
predictions[:2]

array([[7.1593747e-03, 9.7984147e-01, 1.2999118e-02],
       [9.8688090e-01, 1.3084175e-02, 3.4955126e-05]], dtype=float32)

In [0]:
predictions.round()
#predictions = [round(i) for i in predictions]

In [105]:
from sklearn.metrics import recall_score
print(recall_score(y_test, predictions.round(),average='macro'))

0.9696969696969697


In [35]:
predicted_targets = model.predict_classes(X_test)
true_targets = encoder.inverse_transform(y_test.values)
 
def performance_tracker(actual, expected):
    flowers = {0:'setosa', 1:'versicolor', 2:'virginica'}
    print("Flowers in test set: Setosa={} Versicolor={} Virginica={}".format(
            y_test.setosa.sum(), y_test.versicolor.sum(),
            y_test.virginica.sum()))
    for act,exp in zip(actual, expected):
        if act != exp:
            print("ERROR: {} predicted as {}".format(flowers[exp],
                  flowers[act]))
             
performance_tracker(predicted_targets, true_targets)

Flowers in test set: Setosa=15 Versicolor=11 Virginica=12
ERROR: versicolor predicted as virginica


#NN with one layer

In [0]:
def model1():
    """build the Keras model callback"""
    model = Sequential()
    model.add(Dense(8, input_dim=4, activation='tanh', name='layer_1'))
    model.add(Dense(3, activation='softmax', name='output_layer'))
     
    model.compile(loss="categorical_crossentropy",
                  optimizer="adam",
                  metrics=['accuracy'])
    return model

In [0]:
estimator = KerasClassifier(
        build_fn=model1,
        epochs=200, batch_size=20,
        verbose=2)

In [117]:
kfold = KFold(n_splits=5, shuffle=True, random_state=seed)
results = cross_val_score(estimator, X_train, y_train, cv=kfold)
print("Model Performance: mean: %.2f%% std: (%.2f%%)" % (results.mean()*100, results.std()*100))

Epoch 1/200
 - 2s - loss: 1.2920 - acc: 0.0562
Epoch 2/200
 - 0s - loss: 1.2748 - acc: 0.0449
Epoch 3/200
 - 0s - loss: 1.2600 - acc: 0.0337
Epoch 4/200
 - 0s - loss: 1.2440 - acc: 0.0337
Epoch 5/200
 - 0s - loss: 1.2301 - acc: 0.0225
Epoch 6/200
 - 0s - loss: 1.2162 - acc: 0.0112
Epoch 7/200
 - 0s - loss: 1.2038 - acc: 0.0112
Epoch 8/200
 - 0s - loss: 1.1913 - acc: 0.0112
Epoch 9/200
 - 0s - loss: 1.1802 - acc: 0.0000e+00
Epoch 10/200
 - 0s - loss: 1.1700 - acc: 0.0000e+00
Epoch 11/200
 - 0s - loss: 1.1592 - acc: 0.0000e+00
Epoch 12/200
 - 0s - loss: 1.1493 - acc: 0.0112
Epoch 13/200
 - 0s - loss: 1.1407 - acc: 0.0112
Epoch 14/200
 - 0s - loss: 1.1310 - acc: 0.0225
Epoch 15/200
 - 0s - loss: 1.1225 - acc: 0.1011
Epoch 16/200
 - 0s - loss: 1.1138 - acc: 0.2022
Epoch 17/200
 - 0s - loss: 1.1055 - acc: 0.2360
Epoch 18/200
 - 0s - loss: 1.0975 - acc: 0.2697
Epoch 19/200
 - 0s - loss: 1.0894 - acc: 0.3258
Epoch 20/200
 - 0s - loss: 1.0817 - acc: 0.3483
Epoch 21/200
 - 0s - loss: 1.0741 - a

In [118]:
model1 = model1()
model1.fit(
       X_train,
       y_train,
       epochs=200,
       shuffle=True, # shuffle data randomly.
       #NNs perform best on randomly shuffled data
       verbose=2 # this will tell keras to print more detailed info
       # during trainnig to know what is going on
       )

Epoch 1/200
 - 2s - loss: 1.7002 - acc: 0.3482
Epoch 2/200
 - 0s - loss: 1.6736 - acc: 0.3482
Epoch 3/200
 - 0s - loss: 1.6506 - acc: 0.3482
Epoch 4/200
 - 0s - loss: 1.6265 - acc: 0.3482
Epoch 5/200
 - 0s - loss: 1.6043 - acc: 0.3482
Epoch 6/200
 - 0s - loss: 1.5806 - acc: 0.3482
Epoch 7/200
 - 0s - loss: 1.5597 - acc: 0.3482
Epoch 8/200
 - 0s - loss: 1.5373 - acc: 0.3482
Epoch 9/200
 - 0s - loss: 1.5175 - acc: 0.3482
Epoch 10/200
 - 0s - loss: 1.4976 - acc: 0.3482
Epoch 11/200
 - 0s - loss: 1.4773 - acc: 0.3482
Epoch 12/200
 - 0s - loss: 1.4580 - acc: 0.3482
Epoch 13/200
 - 0s - loss: 1.4395 - acc: 0.3482
Epoch 14/200
 - 0s - loss: 1.4220 - acc: 0.3482
Epoch 15/200
 - 0s - loss: 1.4055 - acc: 0.3482
Epoch 16/200
 - 0s - loss: 1.3892 - acc: 0.3482
Epoch 17/200
 - 0s - loss: 1.3743 - acc: 0.3482
Epoch 18/200
 - 0s - loss: 1.3579 - acc: 0.3482
Epoch 19/200
 - 0s - loss: 1.3433 - acc: 0.3393
Epoch 20/200
 - 0s - loss: 1.3296 - acc: 0.3393
Epoch 21/200
 - 0s - loss: 1.3166 - acc: 0.3393
E

<keras.callbacks.History at 0x7f5ab1736fd0>

In [119]:
#run the test dataset
test_error_rate = model1.evaluate(X_test, y_test, verbose=0)
print(
      "{} : {:.2f}%".format(model1.metrics_names[1],
              test_error_rate[1]*100))
print(
      "{} : {:.2f}%".format(model1.metrics_names[0],
              test_error_rate[0]*100))

acc : 94.74%
loss : 45.01%


In [0]:
predictions = model1.predict(X_test)

In [121]:
print(recall_score(y_test, predictions.round(),average='macro'))

0.6666666666666666


#we will use logistic regression

In [0]:
x_train, x_test, y_train, y_test = train_test_split(iris_data_df, iris.target, test_size=0.2, random_state=42)

In [123]:
from sklearn.linear_model import LogisticRegression
#Train the model
model = LogisticRegression()
model.fit(x_train, y_train)



LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='warn', n_jobs=None, penalty='l2',
                   random_state=None, solver='warn', tol=0.0001, verbose=0,
                   warm_start=False)

In [124]:
#Test the model
from sklearn.metrics import accuracy_score
predictions = model.predict(x_test)
print( accuracy_score(y_test, predictions))

1.0


In [125]:

print(recall_score(y_test, predictions,average='macro'))

1.0


In [46]:
iris_data_df.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2


In [73]:
predictions[:10]

array([1, 0, 2, 1, 1, 0, 1, 2, 1, 1])

In [75]:
y_test[:10]

array([1, 0, 2, 1, 1, 0, 1, 2, 1, 1])