Reference Code:
https://machinelearningmastery.com/multi-class-classification-tutorial-keras-deep-learning-library/

In [29]:
import numpy as np
import pandas as pd
from keras.models import Sequential
from keras import backend as K
from keras.layers import Dense, Dropout, Activation

from keras.wrappers.scikit_learn import KerasClassifier
from keras.utils import np_utils
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.preprocessing import LabelEncoder
from sklearn.pipeline import Pipeline
from sklearn import datasets

In [2]:
# fix random seed for reproducibility
seed = 7
np.random.seed(seed)
iris = datasets.load_iris()

In [3]:
url = "http://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data"
dataframe = pd.read_csv(url,header=None)
dataset = dataframe.values
X = dataset[:,0:4].astype(float)
Y = dataset[:,4]

from sklearn.datasets import load_iris
iris  = load_iris()
df = pd.DataFrame(iris.data)
target = pd.DataFrame(iris.target)

In [4]:
# encode class values as integers
encoder = LabelEncoder()
encoder.fit(Y)
encoded_Y = encoder.transform(Y)
# convert integers to dummy variables (i.e. one hot encoded)
dummy_y = np_utils.to_categorical(encoded_Y)

In [20]:
# define baseline model
def baseline_model_logistic():
    # create model
    model = Sequential()
    model.add(Dense(8, input_dim=4, activation='relu'))
    model.add(Dropout(0.2))
    model.add(Dense(16, input_dim=8, activation='relu'))
    model.add(Dropout(0.2))
    model.add(Dense(3, activation='softmax'))
    # Compile model
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model
# define baseline model
def baseline_model_svm():
    # create model
    model = Sequential()
    model.add(Dense(8, input_dim=4, activation='relu'))
    model.add(Dropout(0.2))
    model.add(Dense(16, input_dim=8, activation='relu'))
    model.add(Dropout(0.2))
    model.add(Dense(3, activation='softmax'))
    # Compile model
    # model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    model.compile(loss='categorical_hinge', optimizer='adam', metrics=['accuracy'])
    return model
estimator_b1 = KerasClassifier(build_fn=baseline_model_logistic, epochs=200, batch_size=5, verbose=0)
estimator_b2 = KerasClassifier(build_fn=baseline_model_svm, epochs=200, batch_size=5, verbose=0)
kfold_b1 = KFold(n_splits=10, shuffle=True, random_state=seed)
results_b1 = cross_val_score(estimator_b1, X, dummy_y, cv=kfold_b1)
print("Baseline_logistic: %.2f%% (%.2f%%)" % (results_b1.mean()*100, results_b1.std()*100))
kfold_b2 = KFold(n_splits=10, shuffle=True, random_state=seed)
results_b2 = cross_val_score(estimator_b2, X, dummy_y, cv=kfold_b2)
print("Baseline_svm: %.2f%% (%.2f%%)" % (results_b2.mean()*100, results_b2.std()*100))

Baseline_logistic: 96.00% (5.33%)
Baseline_svm: 90.00% (16.67%)


Add a Mallows penalty - to coerc ordinality in the predicted classification probabilities

In [31]:
# WIP
# custom_loss = loss + penalty 
# penalty is Mallow's loss. d_ij = C|i-j|, where i and j are two labels
# loss can be any loss suitable for classification such as cross_entropy

def logistic_with_mallows(y_true, y_pred, alpha=100):
    loss = K.categorical_crossentropy(y_true, y_pred)
    penalty = K.mean(K.square( K.cumsum(y_pred - y_true)), axis=-1)
    return loss + (alpha*penalty)

def ordinal_model_logistic():
    # create model
    model = Sequential()
    model.add(Dense(8, input_dim=4, activation='relu'))
    model.add(Dropout(0.2))
    model.add(Dense(16, input_dim=8, activation='relu'))
    model.add(Dropout(0.2))
    model.add(Dense(3, activation='softmax'))
    # Compile model
    model.compile(loss=lambda y_true,y_pred: logistic_with_mallows(y_true,y_pred,1), optimizer='adam', metrics=['accuracy'])
    return model
estimator_e1 = KerasClassifier(build_fn=ordinal_model_logistic, epochs=200, batch_size=10, verbose=0)
kfold_e1 = KFold(n_splits=10, shuffle=True, random_state=seed)
results_e1 = cross_val_score(estimator_e1, X, dummy_y, cv=kfold_e1)
print("Baseline_ord: %.2f%% (%.2f%%)" % (results_e1.mean()*100, results_e1.std()*100))

Baseline_ord: 96.00% (6.80%)


In [22]:
C = 1.0  # SVM regularization parameter
from sklearn import svm
# SVC with linear kernel
svc = svm.SVC(kernel='linear', C=C)
results_a1 = cross_val_score(svc, X, Y, cv=kfold_b1)
print("Baseline_svm: %.2f%% (%.2f%%)" % (results_a1.mean()*100, results_a1.std()*100))
lin_svc = svm.LinearSVC(C=C)
results_a1 = cross_val_score(lin_svc, X, Y, cv=kfold_b1)
print("Baseline_Lin_svm: %.2f%% (%.2f%%)" % (results_a1.mean()*100, results_a1.std()*100))
rbf_svc = svm.SVC(kernel='rbf', gamma=0.7, C=C)
results_a1 = cross_val_score(rbf_svc, X, Y, cv=kfold_b1)
print("Baseline_rbf_svm: %.2f%% (%.2f%%)" % (results_a1.mean()*100, results_a1.std()*100))
poly_svc = svm.SVC(kernel='poly', degree=3, C=C)
results_a1 = cross_val_score(poly_svc, X, Y, cv=kfold_b1)
print("Baseline_poly_svm: %.2f%% (%.2f%%)" % (results_a1.mean()*100, results_a1.std()*100))

Baseline_svm: 97.33% (3.27%)
Baseline_Lin_svm: 93.33% (8.94%)
Baseline_rbf_svm: 97.33% (4.42%)
Baseline_poly_svm: 94.00% (6.96%)


In [23]:
C = 1.0  # SVM regularization parameter
from sklearn import svm
# SVC with linear kernel
svc = svm.SVC(kernel='linear', C=C).fit(X, Y)
# LinearSVC (linear kernel)
lin_svc = svm.LinearSVC(C=C).fit(X, Y)
# SVC with RBF kernel
rbf_svc = svm.SVC(kernel='rbf', gamma=0.7, C=C).fit(X, Y)
# SVC with polynomial (degree 3) kernel
poly_svc = svm.SVC(kernel='poly', degree=3, C=C).fit(X, Y)
print("linear SVC: " + str(svc.score(X, Y)))
print("rbf SVC: " + str(rbf_svc.score(X, Y)))
print("poly SVC: " + str(poly_svc.score(X, Y)))

linear SVC: 0.993333333333
rbf SVC: 0.98
poly SVC: 0.98


In [40]:
from sklearn.datasets import load_iris
iris  = load_iris()
df = pd.DataFrame(iris.data)
target = pd.DataFrame(iris.target)

# standardize features (rescale to -1 to 1)
df=(df-df.min())/(df.max()-df.min())
df = (2*df-1)
print(df.describe())

                0           1           2           3
count  150.000000  150.000000  150.000000  150.000000
mean    -0.142593   -0.121667   -0.064859   -0.084444
std      0.460037    0.361329    0.598109    0.635967
min     -1.000000   -1.000000   -1.000000   -1.000000
25%     -0.555556   -0.333333   -0.796610   -0.833333
50%     -0.166667   -0.166667    0.135593    0.000000
75%      0.166667    0.083333    0.389831    0.416667
max      1.000000    1.000000    1.000000    1.000000
