In [3]:
# install joblib. This will be used to save your model. 
# Restart your kernel after installing 
!pip install joblib



In [1]:
import pandas as pd
import joblib

# Read the CSV and Perform Basic Data Cleaning

In [2]:
mbti_df = pd.read_csv("Resources/mbti_final.csv")
mbti_df.head()

Unnamed: 0,type,posts,description,i_e,n_s,f_t,j_p,sentiment_score,words_per_comment,squared_total_words,...,adjectives,adjective_count,verbs,verb_count,determiners,determiner_count,interjections,interjection_count,prepositions,preposition_count
0,INFJ,'http://www.youtube.com/watch?v=qsXHcwe3krw|||...,Introvert Intuition Feeling Judging,I,N,F,J,0.0471,11.12,22.24,...,"['intj', 'life-changing', 'most', 'last', 'nex...",51,"['top', 'has', 'been', 'posted', 'committing',...",90,"['the', 'the', 'the', 'a', 'the', 'every', 'th...",52,[],0,"['in', 'On', 'for', 'of', 'on', 'before', 'in'...",78
1,ENTP,'I'm finding the lack of me in these posts ver...,Extrovert Intuition Thinking Perceiving,E,N,T,P,0.388976,23.4,46.8,...,"['same', 'missionary', 'new', 'theory.Hello', ...",96,"[""'m"", 'finding', 'be', 'boring', ""'s"", 'are',...",257,"['the', 'these', 'the', 'an', 'all', 'the', 't...",90,[],0,"['of', 'in', 'if', 'in', 'For', 'in', 'Than', ...",136
2,INTP,'Good one _____ https://www.youtube.com/wat...,Introvert Intuition Thinking Perceiving,I,N,T,P,0.620244,16.72,33.44,...,"['positive', 'best', 'amazing', 'more', 'So-ca...",82,"['say', 'know', ""'s"", 'being', 'be', ""'s"", 'be...",166,"['that', 'an', 'a', 'any', 'All', 'the', 'that...",52,"['yes', 'No', 'Oh', 'Yessss', 'Oh']",5,"['that', 'If', 'than', 'in', 'in', 'at', 'for'...",91
3,INTJ,"'Dear INTP, I enjoyed our conversation the o...",Introvert Intuition Thinking Judging,I,N,T,J,0.807546,21.28,42.56,...,"[""'Dear"", 'other', 'social', 'arbitrary', 'oth...",93,"['enjoyed', 'gabbing', 'being', 'created', 'hu...",233,"['the', 'the', 'the', 'the', 'every', 'no', 'A...",94,[],0,"['about', 'of', 'of', 'in', 'on', 'like', 'in'...",124
4,ENTJ,'You're fired.|||That's another silly misconce...,Extrovert Intuition Thinking Judging,E,N,T,J,0.861824,19.34,38.68,...,"['silly', 'super-duper-long-ass', 'permanent',...",87,"[""'re"", ""'s"", 'approaching', 'is', 'is', 'goin...",229,"['another', 'the', 'a', 'the', 'that', 'that',...",84,"['Oh', 'Yes']",2,"['That', 'with', 'on', 'on', 'about', 'If', 'f...",84


# Select features (columns)

In [3]:
mbti_df.columns

Index(['type', 'posts', 'description', 'i_e', 'n_s', 'f_t', 'j_p',
       'sentiment_score', 'words_per_comment', 'squared_total_words',
       'word_count_variance_per_comment', 'interrobangs_per_comment',
       'Tagged Posts PosTag', 'nouns', 'noun_count', 'adjectives',
       'adjective_count', 'verbs', 'verb_count', 'determiners',
       'determiner_count', 'interjections', 'interjection_count',
       'prepositions', 'preposition_count'],
      dtype='object')

In [4]:
# Set features. This will also be used as your x values.
selected_features = mbti_df[['sentiment_score', 'words_per_comment', 'squared_total_words',
       'word_count_variance_per_comment', 'interrobangs_per_comment','noun_count', 
       'adjective_count', 'verb_count', 
       'determiner_count', 'interjection_count','preposition_count']]

# Create a Train Test Split

Use `koi_disposition` for the y values

In [5]:
target = mbti_df["type"]
target_names = ["INFJ","INFP","INTJ",'INTP',"ISFJ","ISFP","ISTJ",'ISTP',"ENFJ","ENFP","ENTJ",'ENTP',"ESFJ","ESFP","ESTJ",'ESTP']

In [6]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(selected_features, target, random_state=42)

In [7]:
X_train.head()

Unnamed: 0,sentiment_score,words_per_comment,squared_total_words,word_count_variance_per_comment,interrobangs_per_comment,noun_count,adjective_count,verb_count,determiner_count,interjection_count,preposition_count
2706,571.406128,28.22,56.44,127.84,1.14,310,123,305,99,0,124
2521,534.330686,20.92,41.84,186.37,0.24,296,80,202,81,0,115
4192,891.721612,25.9,51.8,113.7856,0.74,213,113,318,68,1,132
6296,1363.673841,30.04,60.08,110.109954,0.16,291,92,348,107,3,167
3399,717.557672,28.98,57.96,131.2784,0.6,267,100,334,126,7,152


# Pre-processing

Scale the data using the MinMaxScaler and perform some feature selection

In [8]:
# Scale your data
from sklearn.preprocessing import StandardScaler

X_scaler = StandardScaler().fit(X_train)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [9]:
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
label_encoder = LabelEncoder()
label_encoder.fit(y_train)
encoded_y_train = label_encoder.transform(y_train)
encoded_y_test = label_encoder.transform(y_test)

In [10]:
from tensorflow.keras.utils import to_categorical

y_train_categorical = to_categorical(encoded_y_train)
y_test_categorical = to_categorical(encoded_y_test)

In [11]:
X_train_scaled.shape[1]

11

In [12]:
y_train_categorical.shape[1]

16

# Train the Model



In [13]:
from keras.models import Sequential
from keras.layers import Dense

model = Sequential()
model.add(Dense(units=100, activation='relu', input_dim=11))
model.add(Dense(units=100, activation='relu'))
model.add(Dense(units=16, activation='softmax'))

model.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

model.fit(X_train_scaled, y_train_categorical, epochs=100, shuffle=True, verbose=2)

Using TensorFlow backend.


Epoch 1/100
 - 1s - loss: 2.3386 - accuracy: 0.1990
Epoch 2/100
 - 0s - loss: 2.2467 - accuracy: 0.2272
Epoch 3/100
 - 0s - loss: 2.2293 - accuracy: 0.2264
Epoch 4/100
 - 0s - loss: 2.2154 - accuracy: 0.2344
Epoch 5/100
 - 0s - loss: 2.2086 - accuracy: 0.2355
Epoch 6/100
 - 0s - loss: 2.2001 - accuracy: 0.2384
Epoch 7/100
 - 0s - loss: 2.1948 - accuracy: 0.2369
Epoch 8/100
 - 0s - loss: 2.1871 - accuracy: 0.2409
Epoch 9/100
 - 1s - loss: 2.1819 - accuracy: 0.2458
Epoch 10/100
 - 1s - loss: 2.1793 - accuracy: 0.2456
Epoch 11/100
 - 1s - loss: 2.1720 - accuracy: 0.2472
Epoch 12/100
 - 1s - loss: 2.1695 - accuracy: 0.2464
Epoch 13/100
 - 1s - loss: 2.1622 - accuracy: 0.2461
Epoch 14/100
 - 1s - loss: 2.1579 - accuracy: 0.2479
Epoch 15/100
 - 1s - loss: 2.1545 - accuracy: 0.2544
Epoch 16/100
 - 1s - loss: 2.1477 - accuracy: 0.2551
Epoch 17/100
 - 1s - loss: 2.1409 - accuracy: 0.2596
Epoch 18/100
 - 1s - loss: 2.1353 - accuracy: 0.2608
Epoch 19/100
 - 1s - loss: 2.1301 - accuracy: 0.2639
Ep

<keras.callbacks.callbacks.History at 0x13034fdd8>

In [14]:
model_loss, model_accuracy = model.evaluate(X_test_scaled, y_test_categorical, verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

Loss: 2.6908737343541484, Accuracy: 0.17242969572544098


# Re-test for I vs. E

In [15]:
target = mbti_df["i_e"]
target_names = ["Introvert","Extrovert"]

X_train, X_test, y_train, y_test = train_test_split(selected_features, target, random_state=42)
X_train.head()

X_scaler = StandardScaler().fit(X_train)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

label_encoder = LabelEncoder()
label_encoder.fit(y_train)
encoded_y_train = label_encoder.transform(y_train)
encoded_y_test = label_encoder.transform(y_test)

y_train_categorical = to_categorical(encoded_y_train)
y_test_categorical = to_categorical(encoded_y_test)

model = Sequential()
model.add(Dense(units=100, activation='relu', input_dim=11))
model.add(Dense(units=100, activation='relu'))
model.add(Dense(units=2, activation='softmax'))

model.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

model.fit(X_train_scaled, y_train_categorical, epochs=100, shuffle=True, verbose=2)

model_loss, model_accuracy = model.evaluate(X_test_scaled, y_test_categorical, verbose=2)
print(f"I vs. E - Loss: {model_loss}, Accuracy: {model_accuracy}")

Epoch 1/100
 - 1s - loss: 0.5530 - accuracy: 0.7605
Epoch 2/100
 - 1s - loss: 0.5375 - accuracy: 0.7638
Epoch 3/100
 - 1s - loss: 0.5345 - accuracy: 0.7659
Epoch 4/100
 - 1s - loss: 0.5326 - accuracy: 0.7667
Epoch 5/100
 - 1s - loss: 0.5304 - accuracy: 0.7664
Epoch 6/100
 - 1s - loss: 0.5283 - accuracy: 0.7664
Epoch 7/100
 - 1s - loss: 0.5265 - accuracy: 0.7674
Epoch 8/100
 - 1s - loss: 0.5250 - accuracy: 0.7693
Epoch 9/100
 - 1s - loss: 0.5247 - accuracy: 0.7673
Epoch 10/100
 - 1s - loss: 0.5235 - accuracy: 0.7682
Epoch 11/100
 - 1s - loss: 0.5199 - accuracy: 0.7681
Epoch 12/100
 - 1s - loss: 0.5205 - accuracy: 0.7673
Epoch 13/100
 - 1s - loss: 0.5174 - accuracy: 0.7682
Epoch 14/100
 - 1s - loss: 0.5142 - accuracy: 0.7714
Epoch 15/100
 - 1s - loss: 0.5150 - accuracy: 0.7684
Epoch 16/100
 - 1s - loss: 0.5117 - accuracy: 0.7701
Epoch 17/100
 - 1s - loss: 0.5095 - accuracy: 0.7687
Epoch 18/100
 - 1s - loss: 0.5052 - accuracy: 0.7717
Epoch 19/100
 - 1s - loss: 0.5037 - accuracy: 0.7728
Ep

# Re-test for N vs. S

In [16]:
target = mbti_df["n_s"]
target_names = ["Intuition","Sensing"]

X_train, X_test, y_train, y_test = train_test_split(selected_features, target, random_state=42)
X_train.head()

X_scaler = StandardScaler().fit(X_train)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

label_encoder = LabelEncoder()
label_encoder.fit(y_train)
encoded_y_train = label_encoder.transform(y_train)
encoded_y_test = label_encoder.transform(y_test)

y_train_categorical = to_categorical(encoded_y_train)
y_test_categorical = to_categorical(encoded_y_test)

model = Sequential()
model.add(Dense(units=100, activation='relu', input_dim=11))
model.add(Dense(units=100, activation='relu'))
model.add(Dense(units=2, activation='softmax'))

model.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

model.fit(X_train_scaled, y_train_categorical, epochs=100, shuffle=True, verbose=2)

model_loss, model_accuracy = model.evaluate(X_test_scaled, y_test_categorical, verbose=2)
print(f"N vs. S - Loss: {model_loss}, Accuracy: {model_accuracy}")

Epoch 1/100
 - 1s - loss: 0.4296 - accuracy: 0.8515
Epoch 2/100
 - 1s - loss: 0.4027 - accuracy: 0.8632
Epoch 3/100
 - 1s - loss: 0.3987 - accuracy: 0.8632
Epoch 4/100
 - 1s - loss: 0.3956 - accuracy: 0.8629
Epoch 5/100
 - 1s - loss: 0.3932 - accuracy: 0.8630
Epoch 6/100
 - 1s - loss: 0.3911 - accuracy: 0.8635
Epoch 7/100
 - 1s - loss: 0.3916 - accuracy: 0.8630
Epoch 8/100
 - 1s - loss: 0.3885 - accuracy: 0.8634
Epoch 9/100
 - 1s - loss: 0.3889 - accuracy: 0.8624
Epoch 10/100
 - 1s - loss: 0.3872 - accuracy: 0.8630
Epoch 11/100
 - 1s - loss: 0.3854 - accuracy: 0.8634
Epoch 12/100
 - 1s - loss: 0.3844 - accuracy: 0.8637
Epoch 13/100
 - 1s - loss: 0.3830 - accuracy: 0.8632
Epoch 14/100
 - 1s - loss: 0.3818 - accuracy: 0.8635
Epoch 15/100
 - 1s - loss: 0.3808 - accuracy: 0.8640
Epoch 16/100
 - 1s - loss: 0.3782 - accuracy: 0.8630
Epoch 17/100
 - 1s - loss: 0.3775 - accuracy: 0.8637
Epoch 18/100
 - 1s - loss: 0.3724 - accuracy: 0.8638
Epoch 19/100
 - 1s - loss: 0.3738 - accuracy: 0.8641
Ep

# Re-test for F vs. J

In [17]:
target = mbti_df["f_t"]
target_names = ["Feeling","Thinking"]

X_train, X_test, y_train, y_test = train_test_split(selected_features, target, random_state=42)
X_train.head()

X_scaler = StandardScaler().fit(X_train)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

label_encoder = LabelEncoder()
label_encoder.fit(y_train)
encoded_y_train = label_encoder.transform(y_train)
encoded_y_test = label_encoder.transform(y_test)

y_train_categorical = to_categorical(encoded_y_train)
y_test_categorical = to_categorical(encoded_y_test)

model = Sequential()
model.add(Dense(units=100, activation='relu', input_dim=11))
model.add(Dense(units=100, activation='relu'))
model.add(Dense(units=2, activation='softmax'))

model.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

model.fit(X_train_scaled, y_train_categorical, epochs=100, shuffle=True, verbose=2)

model_loss, model_accuracy = model.evaluate(X_test_scaled, y_test_categorical, verbose=2)
print(f"F vs. J - Loss: {model_loss}, Accuracy: {model_accuracy}")

Epoch 1/100
 - 1s - loss: 0.6678 - accuracy: 0.5870
Epoch 2/100
 - 1s - loss: 0.6561 - accuracy: 0.6059
Epoch 3/100
 - 1s - loss: 0.6516 - accuracy: 0.6125
Epoch 4/100
 - 1s - loss: 0.6494 - accuracy: 0.6153
Epoch 5/100
 - 1s - loss: 0.6458 - accuracy: 0.6190
Epoch 6/100
 - 1s - loss: 0.6454 - accuracy: 0.6176
Epoch 7/100
 - 1s - loss: 0.6415 - accuracy: 0.6227
Epoch 8/100
 - 1s - loss: 0.6405 - accuracy: 0.6283
Epoch 9/100
 - 1s - loss: 0.6377 - accuracy: 0.6293
Epoch 10/100
 - 1s - loss: 0.6368 - accuracy: 0.6279
Epoch 11/100
 - 1s - loss: 0.6337 - accuracy: 0.6334
Epoch 12/100
 - 1s - loss: 0.6340 - accuracy: 0.6323
Epoch 13/100
 - 1s - loss: 0.6292 - accuracy: 0.6377
Epoch 14/100
 - 1s - loss: 0.6308 - accuracy: 0.6388
Epoch 15/100
 - 1s - loss: 0.6254 - accuracy: 0.6429
Epoch 16/100
 - 1s - loss: 0.6237 - accuracy: 0.6474
Epoch 17/100
 - 1s - loss: 0.6220 - accuracy: 0.6491
Epoch 18/100
 - 1s - loss: 0.6200 - accuracy: 0.6491
Epoch 19/100
 - 1s - loss: 0.6176 - accuracy: 0.6531
Ep

# Re-test for J vs. P

In [18]:
target = mbti_df["j_p"]
target_names = ["Judging","Perceiving"]

X_train, X_test, y_train, y_test = train_test_split(selected_features, target, random_state=42)
X_train.head()

X_scaler = StandardScaler().fit(X_train)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

label_encoder = LabelEncoder()
label_encoder.fit(y_train)
encoded_y_train = label_encoder.transform(y_train)
encoded_y_test = label_encoder.transform(y_test)

y_train_categorical = to_categorical(encoded_y_train)
y_test_categorical = to_categorical(encoded_y_test)

model = Sequential()
model.add(Dense(units=100, activation='relu', input_dim=11))
model.add(Dense(units=100, activation='relu'))
model.add(Dense(units=2, activation='softmax'))

model.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

model.fit(X_train_scaled, y_train_categorical, epochs=100, shuffle=True, verbose=2)

model_loss, model_accuracy = model.evaluate(X_test_scaled, y_test_categorical, verbose=2)
print(f"J vs. P - Loss: {model_loss}, Accuracy: {model_accuracy}")

Epoch 1/100
 - 1s - loss: 0.6784 - accuracy: 0.5876
Epoch 2/100
 - 1s - loss: 0.6702 - accuracy: 0.5974
Epoch 3/100
 - 1s - loss: 0.6691 - accuracy: 0.5979
Epoch 4/100
 - 1s - loss: 0.6673 - accuracy: 0.6010
Epoch 5/100
 - 1s - loss: 0.6655 - accuracy: 0.6042
Epoch 6/100
 - 1s - loss: 0.6631 - accuracy: 0.6007
Epoch 7/100
 - 1s - loss: 0.6643 - accuracy: 0.6039
Epoch 8/100
 - 1s - loss: 0.6620 - accuracy: 0.6033
Epoch 9/100
 - 1s - loss: 0.6614 - accuracy: 0.6034
Epoch 10/100
 - 1s - loss: 0.6586 - accuracy: 0.6082
Epoch 11/100
 - 1s - loss: 0.6579 - accuracy: 0.6097
Epoch 12/100
 - 1s - loss: 0.6561 - accuracy: 0.6102
Epoch 13/100
 - 1s - loss: 0.6539 - accuracy: 0.6153
Epoch 14/100
 - 1s - loss: 0.6536 - accuracy: 0.6142
Epoch 15/100
 - 1s - loss: 0.6498 - accuracy: 0.6239
Epoch 16/100
 - 1s - loss: 0.6491 - accuracy: 0.6222
Epoch 17/100
 - 1s - loss: 0.6453 - accuracy: 0.6259
Epoch 18/100
 - 1s - loss: 0.6436 - accuracy: 0.6267
Epoch 19/100
 - 1s - loss: 0.6429 - accuracy: 0.6299
Ep

# Hyperparameter Tuning

Use `GridSearchCV` to tune the model's parameters

In [32]:
# Tune Batch Size and Number of Epochs
import numpy as np
from sklearn.model_selection import GridSearchCV
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier
def create_model(init_mode='uniform'):
    # define model
    model = Sequential()
    model.add(Dense(units=100, activation='relu', input_dim=17))
    model.add(Dense(units=100, activation='relu'))
    model.add(Dense(units=3, activation='softmax'))
    # compile model
    model.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy']
    )
    return model
seed = 7
np.random.seed(seed)
batch_size = [10, 20, 40, 60, 80, 100]
epochs = [10, 50, 100]
model_CV = KerasClassifier(build_fn=create_model, epochs=epochs, 
                           batch_size=batch_size, verbose=2)
param_grid = dict(batch_size=batch_size, epochs=epochs)
grid = GridSearchCV(model_CV, param_grid, n_jobs=-1, cv=3)

In [34]:
grid_result=grid1.fit(X_train_scaled, y_train_categorical)
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

Epoch 1/50
 - 1s - loss: 0.7456 - accuracy: 0.6468
Epoch 2/50
 - 1s - loss: 0.6633 - accuracy: 0.7019
Epoch 3/50
 - 1s - loss: 0.6343 - accuracy: 0.7130
Epoch 4/50
 - 1s - loss: 0.6172 - accuracy: 0.7187
Epoch 5/50
 - 1s - loss: 0.6046 - accuracy: 0.7271
Epoch 6/50
 - 1s - loss: 0.5880 - accuracy: 0.7305
Epoch 7/50
 - 1s - loss: 0.5813 - accuracy: 0.7406
Epoch 8/50
 - 1s - loss: 0.5756 - accuracy: 0.7391
Epoch 9/50
 - 1s - loss: 0.5654 - accuracy: 0.7471
Epoch 10/50
 - 1s - loss: 0.5607 - accuracy: 0.7477
Epoch 11/50
 - 1s - loss: 0.5573 - accuracy: 0.7465
Epoch 12/50
 - 1s - loss: 0.5505 - accuracy: 0.7465
Epoch 13/50
 - 1s - loss: 0.5412 - accuracy: 0.7475
Epoch 14/50
 - 1s - loss: 0.5381 - accuracy: 0.7580
Epoch 15/50
 - 1s - loss: 0.5306 - accuracy: 0.7572
Epoch 16/50
 - 1s - loss: 0.5264 - accuracy: 0.7644
Epoch 17/50
 - 1s - loss: 0.5259 - accuracy: 0.7656
Epoch 18/50
 - 1s - loss: 0.5150 - accuracy: 0.7660
Epoch 19/50
 - 1s - loss: 0.5115 - accuracy: 0.7704
Epoch 20/50
 - 1s - l

In [39]:
# Tune the Training Optimization Algorithm
def create_model1(optimizer='adam'):
    # define model
    model = Sequential()
    model.add(Dense(units=100, activation='relu', input_dim=17))
    model.add(Dense(units=100, activation='relu'))
    model.add(Dense(units=3, activation='softmax'))
    # compile model
    model.compile(
    optimizer=optimizer,
    loss='categorical_crossentropy',
    metrics=['accuracy']
    )
    return model
model_CV1 = KerasClassifier(build_fn=create_model1, epochs=50, batch_size=20, verbose=2)
optimizer = ['SGD', 'RMSprop', 'Adagrad', 'Adadelta', 'Adam', 'Adamax', 'Nadam']
param_grid1 = dict(optimizer=optimizer)
grid = GridSearchCV(estimator=model_CV1, param_grid=param_grid1, n_jobs=-1, cv=3)
grid_result1 = grid.fit(X_train_scaled, y_train_categorical)

print("Best: %f using %s" % (grid_result1.best_score_, grid_result1.best_params_))
means = grid_result1.cv_results_['mean_test_score']
stds = grid_result1.cv_results_['std_test_score']
params = grid_result1.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

Epoch 1/50
 - 1s - loss: 0.7094 - accuracy: 0.6769
Epoch 2/50
 - 1s - loss: 0.6565 - accuracy: 0.7038
Epoch 3/50
 - 1s - loss: 0.6383 - accuracy: 0.7149
Epoch 4/50
 - 1s - loss: 0.6261 - accuracy: 0.7236
Epoch 5/50
 - 1s - loss: 0.6183 - accuracy: 0.7219
Epoch 6/50
 - 1s - loss: 0.6102 - accuracy: 0.7280
Epoch 7/50
 - 1s - loss: 0.6048 - accuracy: 0.7349
Epoch 8/50
 - 1s - loss: 0.5988 - accuracy: 0.7345
Epoch 9/50
 - 1s - loss: 0.5947 - accuracy: 0.7364
Epoch 10/50
 - 1s - loss: 0.5904 - accuracy: 0.7393
Epoch 11/50
 - 1s - loss: 0.5873 - accuracy: 0.7400
Epoch 12/50
 - 1s - loss: 0.5833 - accuracy: 0.7376
Epoch 13/50
 - 1s - loss: 0.5802 - accuracy: 0.7406
Epoch 14/50
 - 1s - loss: 0.5776 - accuracy: 0.7402
Epoch 15/50
 - 1s - loss: 0.5752 - accuracy: 0.7421
Epoch 16/50
 - 1s - loss: 0.5720 - accuracy: 0.7459
Epoch 17/50
 - 1s - loss: 0.5701 - accuracy: 0.7454
Epoch 18/50
 - 1s - loss: 0.5686 - accuracy: 0.7461
Epoch 19/50
 - 1s - loss: 0.5654 - accuracy: 0.7479
Epoch 20/50
 - 1s - l

In [40]:
# Tune the Neuron Activation Function
def create_model2(activation='relu'):
    # define model
    model = Sequential()
    model.add(Dense(units=100, activation=activation, input_dim=17))
    model.add(Dense(units=100, activation=activation))
    model.add(Dense(units=3, activation='softmax'))
    # compile model
    model.compile(
    optimizer='Adagrad',
    loss='categorical_crossentropy',
    metrics=['accuracy']
    )
    return model
model_CV2 = KerasClassifier(build_fn=create_model2, epochs=50, batch_size=20, verbose=2)
activation = ['softmax', 'softplus', 'softsign', 'relu', 'tanh', 'sigmoid', 'hard_sigmoid', 'linear']
param_grid2 = dict(activation=activation)
grid = GridSearchCV(estimator=model_CV2, param_grid=param_grid2, n_jobs=-1, cv=3)

grid_result2 = grid.fit(X_train_scaled, y_train_categorical)

print("Best: %f using %s" % (grid_result2.best_score_, grid_result2.best_params_))
means = grid_result2.cv_results_['mean_test_score']
stds = grid_result2.cv_results_['std_test_score']
params = grid_result2.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

Epoch 1/50
 - 1s - loss: 0.7080 - accuracy: 0.6790
Epoch 2/50
 - 1s - loss: 0.6498 - accuracy: 0.7118
Epoch 3/50
 - 1s - loss: 0.6316 - accuracy: 0.7173
Epoch 4/50
 - 1s - loss: 0.6200 - accuracy: 0.7263
Epoch 5/50
 - 1s - loss: 0.6113 - accuracy: 0.7301
Epoch 6/50
 - 1s - loss: 0.6033 - accuracy: 0.7330
Epoch 7/50
 - 1s - loss: 0.5977 - accuracy: 0.7358
Epoch 8/50
 - 1s - loss: 0.5926 - accuracy: 0.7345
Epoch 9/50
 - 1s - loss: 0.5872 - accuracy: 0.7383
Epoch 10/50
 - 1s - loss: 0.5842 - accuracy: 0.7385
Epoch 11/50
 - 1s - loss: 0.5804 - accuracy: 0.7461
Epoch 12/50
 - 1s - loss: 0.5775 - accuracy: 0.7419
Epoch 13/50
 - 1s - loss: 0.5736 - accuracy: 0.7423
Epoch 14/50
 - 1s - loss: 0.5713 - accuracy: 0.7429
Epoch 15/50
 - 1s - loss: 0.5690 - accuracy: 0.7482
Epoch 16/50
 - 1s - loss: 0.5661 - accuracy: 0.7479
Epoch 17/50
 - 1s - loss: 0.5636 - accuracy: 0.7467
Epoch 18/50
 - 1s - loss: 0.5616 - accuracy: 0.7480
Epoch 19/50
 - 1s - loss: 0.5602 - accuracy: 0.7492
Epoch 20/50
 - 1s - l

In [64]:
predictions = grid.predict(X_test_scaled)
predictions=to_categorical(predictions)

from sklearn.metrics import classification_report
print(classification_report(y_test_categorical, predictions, target_names=target_names))

                precision    recall  f1-score   support

     CONFIRMED       0.48      0.42      0.45       411
FALSE POSITIVE       0.67      0.72      0.69       484
     CANDIDATE       0.84      0.85      0.85       853

     micro avg       0.71      0.71      0.71      1748
     macro avg       0.66      0.66      0.66      1748
  weighted avg       0.71      0.71      0.71      1748
   samples avg       0.71      0.71      0.71      1748



# Save the Model

In [71]:
# save your model by updating "your_name" with your name
# and "your_model" with your model variable
# be sure to turn this in to BCS
# if joblib fails to import, try running the command to install in terminal/git-bash
filename = 'Neural.sav'
joblib.dump(model, 'Neural.sav')

['Neural.sav']