In [14]:
# NN: https://machinelearningmastery.com/multi-class-classification-tutorial-keras-deep-learning-library/

# Setup and imports
import pandas as pd
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasClassifier
from keras.utils import np_utils
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.preprocessing import LabelEncoder
from sklearn.pipeline import Pipeline

# CROP DOWN THE DATA FOR TESTING...
from numpy.random import RandomState
rng = RandomState()

train = pd.read_csv("../../data/fetal_health_train.csv").sample(frac=1, random_state=rng)
print(train.shape)
test = pd.read_csv("../../data/fetal_health_test.csv").sample(frac=1, random_state=rng)
print(test.shape)

num_attributes = train.shape[1] - 1 # num attributes is #cols - 1 (for the result)

(1701, 22)
(425, 22)


In [15]:
# Seperate test/train data into X and y (input and output)
X_train = train.drop('fetal_health', axis=1) 
y_train = train['fetal_health']
X_test = test.drop('fetal_health', axis=1)
y_test = test['fetal_health']

#Normalizing the data
'''
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.fit_transform(X_test)
'''

'\nfrom sklearn.preprocessing import StandardScaler\nsc = StandardScaler()\nX_train = sc.fit_transform(X_train)\nX_test = sc.fit_transform(X_test)\n'

In [16]:
# encode class values as integers
def encode(data):  
    encoder = LabelEncoder()
    encoder.fit(data)
    encoded = encoder.transform(data)
    # convert integers to dummy variables (i.e. one hot encoded)
    dummy = np_utils.to_categorical(encoded)
    return dummy

y_train = encode(y_train)
y_test = encode(y_test)

In [17]:
# define baseline model
def baseline_model():
    # create model
    model = Sequential()
    model.add(Dense(8, input_dim=num_attributes, activation='relu'))
    model.add(Dense(3, activation='softmax'))
    # Compile model
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

In [18]:
# training the model
model = baseline_model()
estimator = KerasClassifier(build_fn=model, epochs=10, batch_size=5, verbose=1)

In [26]:
# predict
import numpy as np

y_pred = model.predict(X_test)
round_preds = np.around(y_pred)
print(round_preds)

[[0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 ...
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]]


In [27]:
# get some metrics
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score 

print(confusion_matrix(y_test,y_pred))
print(classification_report(y_test,y_pred))
print('Accuracy ' + str(accuracy_score(y_test, y_pred)))

# plot predictions vs expected
fig, ax = plt.subplots()
ax.scatter(y_test, y_pred)
ax.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'k--', lw=4)
ax.set_xlabel('Measured')
ax.set_ylabel('Predicted')
plt.show()

ValueError: Classification metrics can't handle a mix of multilabel-indicator and continuous-multioutput targets