In [1]:
import numpy as np
import pandas as pd
from keras.models import Sequential
from keras.layers import Dense
from sklearn.model_selection import train_test_split, cross_val_score, KFold
from sklearn.metrics import plot_confusion_matrix, confusion_matrix, accuracy_score
from keras.utils import np_utils
from sklearn.preprocessing import LabelEncoder
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegressionCV

In [2]:
df = pd.read_csv('lab4.csv')
df.shape

(10868, 261)

In [353]:
y = df['Class']
y.shape

(10868,)

In [354]:
# encode class values as integers
encoder = LabelEncoder()
encoder.fit(y)
encoded_Y = encoder.transform(y)
# convert integers to dummy variables (i.e. one hot encoded)
y = np_utils.to_categorical(encoded_Y)
y

array([[0., 0., 0., ..., 0., 0., 1.],
       [0., 1., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 1.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]], dtype=float32)

In [355]:
X = df.drop(['ID', 'Class', 'Unnamed: 0'], axis='columns')
Y = df['Class']
X.shape

(10868, 258)

In [356]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42, stratify=y)

### Applying Keras Model

In [3]:
# define the keras model
model = Sequential()
model.add(Dense(350, input_dim=258, activation='relu'))
model.add(Dense(250, activation='relu'))
model.add(Dense(150, activation='relu'))
model.add(Dense(100, activation='relu'))
model.add(Dense(9, activation='softmax'))

In [4]:
# compile the keras model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [370]:
# fit the keras model on the dataset
model.fit(X_train, y_train, epochs=150, batch_size=250, verbose=0, validation_split=0.2)

<tensorflow.python.keras.callbacks.History at 0x7f8ac6902350>

In [371]:
# evaluate the keras model on train set
_, accuracy = model.evaluate(X_train, y_train)
print('Accuracy: %.2f' % (accuracy*100))

Accuracy: 96.48


In [372]:
# evaluate the keras model on test set
_, accuracy = model.evaluate(X_test, y_test)
print('Accuracy: %.2f' % (accuracy*100))

Accuracy: 94.76


In [373]:
predictions = model.predict_classes(X)
cm = confusion_matrix(y_true=Y, y_pred=predictions)
cm

array([[   0,    0,    0,    0,    0,    0,    0,    0,    0,    0],
       [1512,    2,    0,    1,    0,    5,    9,    9,    3,    0],
       [  21, 2409,    1,    1,    0,    6,    1,   27,   12,    0],
       [   0,    0, 2941,    0,    1,    0,    0,    0,    0,    0],
       [  14,    4,    9,  427,    0,   18,    2,    0,    1,    0],
       [   5,    0,    0,    0,   25,    6,    1,    5,    0,    0],
       [  23,    5,    4,   28,    2,  654,    2,   26,    7,    0],
       [   4,    0,   10,    0,    0,    0,  383,    1,    0,    0],
       [  45,    6,    9,   11,   12,   15,    9, 1114,    7,    0],
       [  13,    7,    0,    0,    0,    1,    1,    8,  983,    0]])

In [374]:
pred = model.predict(X_test)
pred.argmax(axis=-1)

array([2, 2, 7, ..., 0, 1, 1])

In [375]:
X_train_dt, X_test_dt, y_train_dt, y_test_dt = train_test_split(X, Y, test_size=0.20, random_state=42, stratify=Y)

### Decision Tree Classifier

In [376]:
dtree_model = DecisionTreeClassifier().fit(X_train_dt, y_train_dt)

In [377]:
dtree_model.score(X_train_dt,y_train_dt)

1.0

In [378]:
dtree_predictions = dtree_model.predict(X_test_dt) 
dtree_predictions

array([2, 8, 7, ..., 8, 1, 6])

In [379]:
dtree_model.score(X_test_dt,y_test_dt)

0.9599816007359706

In [384]:
cm = confusion_matrix(y_test_dt, dtree_predictions)
cm

array([[293,   3,   0,   1,   1,   4,   0,   4,   2],
       [  6, 475,   0,   3,   0,   4,   2,   4,   2],
       [  0,   0, 588,   0,   0,   0,   0,   0,   0],
       [  0,   1,   0,  89,   0,   4,   0,   1,   0],
       [  1,   1,   0,   0,   5,   1,   0,   0,   0],
       [  3,   1,   0,   1,   2, 136,   0,   7,   0],
       [  2,   0,   0,   0,   0,   1,  76,   0,   1],
       [  2,   3,   0,   1,   0,   2,   1, 235,   2],
       [  4,   2,   2,   1,   0,   3,   0,   1, 190]])