In [57]:
import pandas as pd
import joblib
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier
from tensorflow.keras.models import load_model

# Read the CSV and Perform Basic Data Cleaning

In [2]:
mbti_df = pd.read_csv("Resources/mbti_final.csv")
mbti_df.head()

Unnamed: 0,type,posts,description,i_e,n_s,f_t,j_p,sentiment_score,words_per_comment,squared_total_words,...,adjectives,adjective_count,verbs,verb_count,determiners,determiner_count,interjections,interjection_count,prepositions,preposition_count
0,INFJ,'http://www.youtube.com/watch?v=qsXHcwe3krw|||...,Introvert Intuition Feeling Judging,I,N,F,J,0.0471,11.12,22.24,...,"['intj', 'life-changing', 'most', 'last', 'nex...",51,"['top', 'has', 'been', 'posted', 'committing',...",90,"['the', 'the', 'the', 'a', 'the', 'every', 'th...",52,[],0,"['in', 'On', 'for', 'of', 'on', 'before', 'in'...",78
1,ENTP,'I'm finding the lack of me in these posts ver...,Extrovert Intuition Thinking Perceiving,E,N,T,P,0.388976,23.4,46.8,...,"['same', 'missionary', 'new', 'theory.Hello', ...",96,"[""'m"", 'finding', 'be', 'boring', ""'s"", 'are',...",257,"['the', 'these', 'the', 'an', 'all', 'the', 't...",90,[],0,"['of', 'in', 'if', 'in', 'For', 'in', 'Than', ...",136
2,INTP,'Good one _____ https://www.youtube.com/wat...,Introvert Intuition Thinking Perceiving,I,N,T,P,0.620244,16.72,33.44,...,"['positive', 'best', 'amazing', 'more', 'So-ca...",82,"['say', 'know', ""'s"", 'being', 'be', ""'s"", 'be...",166,"['that', 'an', 'a', 'any', 'All', 'the', 'that...",52,"['yes', 'No', 'Oh', 'Yessss', 'Oh']",5,"['that', 'If', 'than', 'in', 'in', 'at', 'for'...",91
3,INTJ,"'Dear INTP, I enjoyed our conversation the o...",Introvert Intuition Thinking Judging,I,N,T,J,0.807546,21.28,42.56,...,"[""'Dear"", 'other', 'social', 'arbitrary', 'oth...",93,"['enjoyed', 'gabbing', 'being', 'created', 'hu...",233,"['the', 'the', 'the', 'the', 'every', 'no', 'A...",94,[],0,"['about', 'of', 'of', 'in', 'on', 'like', 'in'...",124
4,ENTJ,'You're fired.|||That's another silly misconce...,Extrovert Intuition Thinking Judging,E,N,T,J,0.861824,19.34,38.68,...,"['silly', 'super-duper-long-ass', 'permanent',...",87,"[""'re"", ""'s"", 'approaching', 'is', 'is', 'goin...",229,"['another', 'the', 'a', 'the', 'that', 'that',...",84,"['Oh', 'Yes']",2,"['That', 'with', 'on', 'on', 'about', 'If', 'f...",84


# Select features (columns)

In [5]:
mbti_df.columns

Index(['type', 'posts', 'description', 'i_e', 'n_s', 'f_t', 'j_p',
       'sentiment_score', 'words_per_comment', 'squared_total_words',
       'word_count_variance_per_comment', 'interrobangs_per_comment',
       'Tagged Posts PosTag', 'nouns', 'noun_count', 'adjectives',
       'adjective_count', 'verbs', 'verb_count', 'determiners',
       'determiner_count', 'interjections', 'interjection_count',
       'prepositions', 'preposition_count'],
      dtype='object')

In [6]:
# Set features. This will also be used as your x values.
selected_features = mbti_df[['sentiment_score', 'words_per_comment', 'squared_total_words',
       'word_count_variance_per_comment', 'interrobangs_per_comment','noun_count', 
       'adjective_count', 'verb_count', 
       'determiner_count', 'interjection_count','preposition_count']]

# Create a Train Test Split

Use `koi_disposition` for the y values

In [7]:
target = mbti_df["type"]
target_names = ["INFJ","INFP","INTJ",'INTP',"ISFJ","ISFP","ISTJ",'ISTP',"ENFJ","ENFP","ENTJ",'ENTP',"ESFJ","ESFP","ESTJ",'ESTP']

In [8]:
X_train, X_test, y_train, y_test = train_test_split(selected_features, target, random_state=42)

In [9]:
X_train.head()

Unnamed: 0,sentiment_score,words_per_comment,squared_total_words,word_count_variance_per_comment,interrobangs_per_comment,noun_count,adjective_count,verb_count,determiner_count,interjection_count,preposition_count
2706,571.406128,28.22,56.44,127.84,1.14,310,123,305,99,0,124
2521,534.330686,20.92,41.84,186.37,0.24,296,80,202,81,0,115
4192,891.721612,25.9,51.8,113.7856,0.74,213,113,318,68,1,132
6296,1363.673841,30.04,60.08,110.109954,0.16,291,92,348,107,3,167
3399,717.557672,28.98,57.96,131.2784,0.6,267,100,334,126,7,152


# Pre-processing

Scale the data using the MinMaxScaler and perform some feature selection

In [10]:
# Scale your data
from sklearn.preprocessing import StandardScaler

X_scaler = StandardScaler().fit(X_train)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [11]:
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
label_encoder = LabelEncoder()
label_encoder.fit(y_train)
encoded_y_train = label_encoder.transform(y_train)
encoded_y_test = label_encoder.transform(y_test)

In [12]:
from tensorflow.keras.utils import to_categorical

y_train_categorical = to_categorical(encoded_y_train)
y_test_categorical = to_categorical(encoded_y_test)

In [11]:
X_train_scaled.shape[1]

11

In [12]:
y_train_categorical.shape[1]

16

# Train the Model



In [13]:
from keras.models import Sequential
from keras.layers import Dense

model = Sequential()
model.add(Dense(units=100, activation='relu', input_dim=11))
model.add(Dense(units=100, activation='relu'))
model.add(Dense(units=16, activation='softmax'))

model.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

model.fit(X_train_scaled, y_train_categorical, epochs=100, shuffle=True, verbose=2)

Using TensorFlow backend.


Epoch 1/100
 - 1s - loss: 2.3388 - accuracy: 0.2026
Epoch 2/100
 - 0s - loss: 2.2526 - accuracy: 0.2250
Epoch 3/100
 - 0s - loss: 2.2290 - accuracy: 0.2313
Epoch 4/100
 - 0s - loss: 2.2199 - accuracy: 0.2332
Epoch 5/100
 - 0s - loss: 2.2078 - accuracy: 0.2398
Epoch 6/100
 - 0s - loss: 2.2010 - accuracy: 0.2369
Epoch 7/100
 - 0s - loss: 2.1948 - accuracy: 0.2416
Epoch 8/100
 - 0s - loss: 2.1866 - accuracy: 0.2415
Epoch 9/100
 - 0s - loss: 2.1820 - accuracy: 0.2510
Epoch 10/100
 - 0s - loss: 2.1772 - accuracy: 0.2475
Epoch 11/100
 - 0s - loss: 2.1738 - accuracy: 0.2445
Epoch 12/100
 - 1s - loss: 2.1658 - accuracy: 0.2505
Epoch 13/100
 - 1s - loss: 2.1580 - accuracy: 0.2571
Epoch 14/100
 - 1s - loss: 2.1551 - accuracy: 0.2550
Epoch 15/100
 - 1s - loss: 2.1500 - accuracy: 0.2599
Epoch 16/100
 - 1s - loss: 2.1446 - accuracy: 0.2605
Epoch 17/100
 - 1s - loss: 2.1379 - accuracy: 0.2610
Epoch 18/100
 - 1s - loss: 2.1342 - accuracy: 0.2593
Epoch 19/100
 - 1s - loss: 2.1251 - accuracy: 0.2638
Ep

<keras.callbacks.callbacks.History at 0x135846cf8>

In [14]:
model_loss, model_accuracy = model.evaluate(X_test_scaled, y_test_categorical, verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

Loss: 2.6405672850065818, Accuracy: 0.1834947019815445


# Re-test for I vs. E

In [14]:
from keras.models import Sequential
from keras.layers import Dense

target = mbti_df["i_e"]
target_names = ["Introvert","Extrovert"]

X_train, X_test, y_train, y_test = train_test_split(selected_features, target, random_state=42)
X_train.head()

X_scaler = StandardScaler().fit(X_train)
X_train_scaled1 = X_scaler.transform(X_train)
X_test_scaled1 = X_scaler.transform(X_test)

label_encoder = LabelEncoder()
label_encoder.fit(y_train)
encoded_y_train = label_encoder.transform(y_train)
encoded_y_test = label_encoder.transform(y_test)

y_train_categorical1 = to_categorical(encoded_y_train)
y_test_categorical1 = to_categorical(encoded_y_test)

model1 = Sequential()
model1.add(Dense(units=100, activation='relu', input_dim=11))
model1.add(Dense(units=100, activation='relu'))
model1.add(Dense(units=2, activation='softmax'))

model1.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

model1.fit(X_train_scaled1, y_train_categorical1, epochs=100, shuffle=True, verbose=2)

model_loss, model_accuracy = model1.evaluate(X_test_scaled1, y_test_categorical1, verbose=2)
print(f"I vs. E - Loss: {model_loss}, Accuracy: {model_accuracy}")

Using TensorFlow backend.


Epoch 1/100
 - 1s - loss: 0.5498 - accuracy: 0.7628
Epoch 2/100
 - 1s - loss: 0.5390 - accuracy: 0.7650
Epoch 3/100
 - 1s - loss: 0.5346 - accuracy: 0.7658
Epoch 4/100
 - 1s - loss: 0.5313 - accuracy: 0.7671
Epoch 5/100
 - 1s - loss: 0.5289 - accuracy: 0.7665
Epoch 6/100
 - 1s - loss: 0.5292 - accuracy: 0.7673
Epoch 7/100
 - 1s - loss: 0.5262 - accuracy: 0.7665
Epoch 8/100
 - 1s - loss: 0.5270 - accuracy: 0.7674
Epoch 9/100
 - 1s - loss: 0.5246 - accuracy: 0.7678
Epoch 10/100
 - 1s - loss: 0.5232 - accuracy: 0.7673
Epoch 11/100
 - 1s - loss: 0.5213 - accuracy: 0.7679
Epoch 12/100
 - 1s - loss: 0.5214 - accuracy: 0.7678
Epoch 13/100
 - 1s - loss: 0.5173 - accuracy: 0.7704
Epoch 14/100
 - 1s - loss: 0.5167 - accuracy: 0.7691
Epoch 15/100
 - 1s - loss: 0.5138 - accuracy: 0.7702
Epoch 16/100
 - 1s - loss: 0.5116 - accuracy: 0.7698
Epoch 17/100
 - 1s - loss: 0.5116 - accuracy: 0.7731
Epoch 18/100
 - 1s - loss: 0.5074 - accuracy: 0.7734
Epoch 19/100
 - 1s - loss: 0.5065 - accuracy: 0.7733
Ep

# Hypertune

In [21]:
# Tune Batch Size and Number of Epochs
def create_model(init_mode='uniform'):
    # define model
    model = Sequential()
    model.add(Dense(units=100, activation='relu', input_dim=11))
    model.add(Dense(units=100, activation='relu'))
    model.add(Dense(units=2, activation='softmax'))
    # compile model
    model.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy']
    )
    return model
seed = 7
np.random.seed(seed)
batch_size = [10, 20, 40, 60, 80, 100]
epochs = [10, 50, 100]
model_CV1 = KerasClassifier(build_fn=create_model,verbose=2)
param_grid = dict(batch_size=batch_size, epochs=epochs)
grid1 = GridSearchCV(model_CV1, param_grid, n_jobs=-1, cv=3)

In [22]:
grid_result1=grid1.fit(X_train_scaled1, y_train_categorical1)
print("Best: %f using %s" % (grid_result1.best_score_, grid_result1.best_params_))
means = grid_result1.cv_results_['mean_test_score']
stds = grid_result1.cv_results_['std_test_score']
params = grid_result1.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

Epoch 1/10
 - 1s - loss: 0.5547 - accuracy: 0.7611
Epoch 2/10
 - 0s - loss: 0.5387 - accuracy: 0.7664
Epoch 3/10
 - 0s - loss: 0.5350 - accuracy: 0.7661
Epoch 4/10
 - 0s - loss: 0.5328 - accuracy: 0.7671
Epoch 5/10
 - 0s - loss: 0.5331 - accuracy: 0.7648
Epoch 6/10
 - 0s - loss: 0.5289 - accuracy: 0.7665
Epoch 7/10
 - 0s - loss: 0.5303 - accuracy: 0.7659
Epoch 8/10
 - 0s - loss: 0.5258 - accuracy: 0.7668
Epoch 9/10
 - 0s - loss: 0.5266 - accuracy: 0.7664
Epoch 10/10
 - 0s - loss: 0.5249 - accuracy: 0.7679
Best: 0.763449 using {'batch_size': 100, 'epochs': 10}
0.759453 (0.004906) with: {'batch_size': 10, 'epochs': 10}
0.708116 (0.010800) with: {'batch_size': 10, 'epochs': 50}
0.673840 (0.011924) with: {'batch_size': 10, 'epochs': 100}
0.761605 (0.007682) with: {'batch_size': 20, 'epochs': 10}
0.726253 (0.004098) with: {'batch_size': 20, 'epochs': 50}
0.670765 (0.012880) with: {'batch_size': 20, 'epochs': 100}
0.761758 (0.006669) with: {'batch_size': 40, 'epochs': 10}
0.725484 (0.012136)

In [23]:
# Tune the Training Optimization Algorithm
def create_model1(optimizer='adam'):
    # define model
    model = Sequential()
    model.add(Dense(units=100, activation='relu', input_dim=11))
    model.add(Dense(units=100, activation='relu'))
    model.add(Dense(units=2, activation='softmax'))
    # compile model
    model.compile(
    optimizer=optimizer,
    loss='categorical_crossentropy',
    metrics=['accuracy']
    )
    return model
model_CV2 = KerasClassifier(build_fn=create_model1, epochs=10, batch_size=100, verbose=2)
optimizer = ['SGD', 'RMSprop', 'Adagrad', 'Adadelta', 'Adam', 'Adamax', 'Nadam']
param_grid1 = dict(optimizer=optimizer)
grid2 = GridSearchCV(estimator=model_CV2, param_grid=param_grid1, n_jobs=-1, cv=3)
grid_result2 = grid2.fit(X_train_scaled1, y_train_categorical1)

print("Best: %f using %s" % (grid_result2.best_score_, grid_result2.best_params_))
means = grid_result2.cv_results_['mean_test_score']
stds = grid_result2.cv_results_['std_test_score']
params = grid_result2.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

Epoch 1/10
 - 0s - loss: 0.6193 - accuracy: 0.6835
Epoch 2/10
 - 0s - loss: 0.5499 - accuracy: 0.7654
Epoch 3/10
 - 0s - loss: 0.5445 - accuracy: 0.7654
Epoch 4/10
 - 0s - loss: 0.5417 - accuracy: 0.7654
Epoch 5/10
 - 0s - loss: 0.5395 - accuracy: 0.7654
Epoch 6/10
 - 0s - loss: 0.5380 - accuracy: 0.7656
Epoch 7/10
 - 0s - loss: 0.5368 - accuracy: 0.7659
Epoch 8/10
 - 0s - loss: 0.5359 - accuracy: 0.7661
Epoch 9/10
 - 0s - loss: 0.5351 - accuracy: 0.7661
Epoch 10/10
 - 0s - loss: 0.5346 - accuracy: 0.7664
Best: 0.765447 using {'optimizer': 'SGD'}
0.765447 (0.004649) with: {'optimizer': 'SGD'}
0.762220 (0.008338) with: {'optimizer': 'RMSprop'}
0.764371 (0.006943) with: {'optimizer': 'Adagrad'}
0.763142 (0.005585) with: {'optimizer': 'Adadelta'}
0.761605 (0.008575) with: {'optimizer': 'Adam'}
0.764064 (0.006306) with: {'optimizer': 'Adamax'}
0.762527 (0.004110) with: {'optimizer': 'Nadam'}


In [24]:
# Tune the Neuron Activation Function
def create_model2(activation='relu'):
    # define model
    model = Sequential()
    model.add(Dense(units=100, activation=activation, input_dim=11))
    model.add(Dense(units=100, activation=activation))
    model.add(Dense(units=2, activation='softmax'))
    # compile model
    model.compile(
    optimizer='SGD',
    loss='categorical_crossentropy',
    metrics=['accuracy']
    )
    return model
model_CV3 = KerasClassifier(build_fn=create_model2, epochs=10, batch_size=100, verbose=2)
activation = ['softmax', 'softplus', 'softsign', 'relu', 'tanh', 'sigmoid', 'hard_sigmoid', 'linear']
param_grid2 = dict(activation=activation)
grid3 = GridSearchCV(estimator=model_CV3, param_grid=param_grid2, n_jobs=-1, cv=3)

grid_result3 = grid3.fit(X_train_scaled1, y_train_categorical1)

print("Best: %f using %s" % (grid_result3.best_score_, grid_result3.best_params_))
means = grid_result3.cv_results_['mean_test_score']
stds = grid_result3.cv_results_['std_test_score']
params = grid_result3.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

Epoch 1/10
 - 0s - loss: 0.6310 - accuracy: 0.6846
Epoch 2/10
 - 0s - loss: 0.5627 - accuracy: 0.7642
Epoch 3/10
 - 0s - loss: 0.5463 - accuracy: 0.7650
Epoch 4/10
 - 0s - loss: 0.5408 - accuracy: 0.7653
Epoch 5/10
 - 0s - loss: 0.5385 - accuracy: 0.7658
Epoch 6/10
 - 0s - loss: 0.5374 - accuracy: 0.7661
Epoch 7/10
 - 0s - loss: 0.5366 - accuracy: 0.7662
Epoch 8/10
 - 0s - loss: 0.5362 - accuracy: 0.7658
Epoch 9/10
 - 0s - loss: 0.5359 - accuracy: 0.7662
Epoch 10/10
 - 0s - loss: 0.5358 - accuracy: 0.7664
Best: 0.766523 using {'activation': 'softsign'}
0.765447 (0.004785) with: {'activation': 'softmax'}
0.765447 (0.004649) with: {'activation': 'softplus'}
0.766523 (0.004031) with: {'activation': 'softsign'}
0.765140 (0.004815) with: {'activation': 'relu'}
0.766370 (0.004949) with: {'activation': 'tanh'}
0.765447 (0.004785) with: {'activation': 'sigmoid'}
0.765447 (0.004785) with: {'activation': 'hard_sigmoid'}
0.765601 (0.005441) with: {'activation': 'linear'}


In [25]:
predictions = grid3.predict(X_test_scaled1)
predictions=to_categorical(predictions)

from sklearn.metrics import classification_report
print(classification_report(y_test_categorical1, predictions, target_names=target_names))

              precision    recall  f1-score   support

   Introvert       0.73      0.02      0.03       473
   Extrovert       0.78      1.00      0.88      1696

   micro avg       0.78      0.78      0.78      2169
   macro avg       0.76      0.51      0.46      2169
weighted avg       0.77      0.78      0.69      2169
 samples avg       0.78      0.78      0.78      2169



In [54]:
model1 = Sequential()
model1.add(Dense(units=100, activation='softsign', input_dim=11))
model1.add(Dense(units=100, activation='softsign'))
model1.add(Dense(units=2, activation='softmax'))

model1.compile(
    optimizer='SGD',
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

model1.fit(X_train_scaled1, y_train_categorical1, batch_size=100, epochs=10, shuffle=True, verbose=2)

model_loss, model_accuracy = model1.evaluate(X_test_scaled1, y_test_categorical1, verbose=2)
print(f"I vs. E - Loss: {model_loss}, Accuracy: {model_accuracy}")

Epoch 1/10
 - 1s - loss: 0.6212 - accuracy: 0.7153
Epoch 2/10
 - 0s - loss: 0.5597 - accuracy: 0.7631
Epoch 3/10
 - 0s - loss: 0.5448 - accuracy: 0.7662
Epoch 4/10
 - 0s - loss: 0.5395 - accuracy: 0.7659
Epoch 5/10
 - 0s - loss: 0.5377 - accuracy: 0.7659
Epoch 6/10
 - 0s - loss: 0.5371 - accuracy: 0.7662
Epoch 7/10
 - 0s - loss: 0.5368 - accuracy: 0.7662
Epoch 8/10
 - 0s - loss: 0.5366 - accuracy: 0.7661
Epoch 9/10
 - 0s - loss: 0.5364 - accuracy: 0.7661
Epoch 10/10
 - 0s - loss: 0.5363 - accuracy: 0.7659
I vs. E - Loss: 0.5181894626360216, Accuracy: 0.7828492522239685


In [60]:
model1.save('Models/NN_ie.h5')

# Re-test for N vs. S

In [67]:
target = mbti_df["n_s"]
target_names = ["Intuition","Sensing"]

X_train, X_test, y_train, y_test = train_test_split(selected_features, target, random_state=42)
X_train.head()

X_scaler = StandardScaler().fit(X_train)
X_train_scaled2 = X_scaler.transform(X_train)
X_test_scaled2 = X_scaler.transform(X_test)

label_encoder = LabelEncoder()
label_encoder.fit(y_train)
encoded_y_train = label_encoder.transform(y_train)
encoded_y_test = label_encoder.transform(y_test)

y_train_categorical2 = to_categorical(encoded_y_train)
y_test_categorical2 = to_categorical(encoded_y_test)

model2 = Sequential()
model2.add(Dense(units=100, activation='relu', input_dim=11))
model2.add(Dense(units=100, activation='relu'))
model2.add(Dense(units=2, activation='softmax'))

model2.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

model2.fit(X_train_scaled2, y_train_categorical2, epochs=100, shuffle=True, verbose=2)

model_loss, model_accuracy = model2.evaluate(X_test_scaled2, y_test_categorical2, verbose=2)
print(f"N vs. S - Loss: {model_loss}, Accuracy: {model_accuracy}")

Epoch 1/100
 - 1s - loss: 0.4255 - accuracy: 0.8560
Epoch 2/100
 - 1s - loss: 0.4029 - accuracy: 0.8632
Epoch 3/100
 - 1s - loss: 0.3975 - accuracy: 0.8632
Epoch 4/100
 - 1s - loss: 0.3942 - accuracy: 0.8634
Epoch 5/100
 - 1s - loss: 0.3928 - accuracy: 0.8630
Epoch 6/100
 - 1s - loss: 0.3908 - accuracy: 0.8634
Epoch 7/100
 - 1s - loss: 0.3904 - accuracy: 0.8632
Epoch 8/100
 - 1s - loss: 0.3895 - accuracy: 0.8635
Epoch 9/100
 - 1s - loss: 0.3866 - accuracy: 0.8632
Epoch 10/100
 - 1s - loss: 0.3870 - accuracy: 0.8630
Epoch 11/100
 - 1s - loss: 0.3853 - accuracy: 0.8632
Epoch 12/100
 - 1s - loss: 0.3839 - accuracy: 0.8637
Epoch 13/100
 - 1s - loss: 0.3833 - accuracy: 0.8632
Epoch 14/100
 - 1s - loss: 0.3812 - accuracy: 0.8632
Epoch 15/100
 - 1s - loss: 0.3799 - accuracy: 0.8634
Epoch 16/100
 - 1s - loss: 0.3787 - accuracy: 0.8632
Epoch 17/100
 - 1s - loss: 0.3767 - accuracy: 0.8638
Epoch 18/100
 - 1s - loss: 0.3756 - accuracy: 0.8632
Epoch 19/100
 - 1s - loss: 0.3730 - accuracy: 0.8635
Ep

# Hypertune

In [68]:
# Tune Batch Size and Number of Epochs
def create_model(init_mode='uniform'):
    # define model
    model = Sequential()
    model.add(Dense(units=100, activation='relu', input_dim=11))
    model.add(Dense(units=100, activation='relu'))
    model.add(Dense(units=2, activation='softmax'))
    # compile model
    model.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy']
    )
    return model
seed = 7
np.random.seed(seed)
batch_size = [10, 20, 40, 60, 80, 100]
epochs = [10, 50, 100]
model_CV1 = KerasClassifier(build_fn=create_model,verbose=2)
param_grid = dict(batch_size=batch_size, epochs=epochs)
grid1 = GridSearchCV(model_CV1, param_grid, n_jobs=-1, cv=3)

In [69]:
grid_result1=grid1.fit(X_train_scaled2, y_train_categorical2)
print("Best: %f using %s" % (grid_result1.best_score_, grid_result1.best_params_))
means = grid_result1.cv_results_['mean_test_score']
stds = grid_result1.cv_results_['std_test_score']
params = grid_result1.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

Epoch 1/10
 - 1s - loss: 0.4396 - accuracy: 0.8552
Epoch 2/10
 - 0s - loss: 0.4067 - accuracy: 0.8632
Epoch 3/10
 - 0s - loss: 0.4000 - accuracy: 0.8632
Epoch 4/10
 - 0s - loss: 0.3970 - accuracy: 0.8630
Epoch 5/10
 - 0s - loss: 0.3942 - accuracy: 0.8632
Epoch 6/10
 - 0s - loss: 0.3917 - accuracy: 0.8635
Epoch 7/10
 - 0s - loss: 0.3912 - accuracy: 0.8632
Epoch 8/10
 - 0s - loss: 0.3896 - accuracy: 0.8634
Epoch 9/10
 - 0s - loss: 0.3864 - accuracy: 0.8635
Epoch 10/10
 - 0s - loss: 0.3862 - accuracy: 0.8635
Best: 0.863203 using {'batch_size': 80, 'epochs': 10}
0.862588 (0.003008) with: {'batch_size': 10, 'epochs': 10}
0.831694 (0.002876) with: {'batch_size': 10, 'epochs': 50}
0.802644 (0.012212) with: {'batch_size': 10, 'epochs': 100}
0.862896 (0.003084) with: {'batch_size': 20, 'epochs': 10}
0.835383 (0.003457) with: {'batch_size': 20, 'epochs': 50}
0.797571 (0.017496) with: {'batch_size': 20, 'epochs': 100}
0.862281 (0.003825) with: {'batch_size': 40, 'epochs': 10}
0.850753 (0.009098) 

In [70]:
# Tune the Training Optimization Algorithm
def create_model1(optimizer='adam'):
    # define model
    model = Sequential()
    model.add(Dense(units=100, activation='relu', input_dim=11))
    model.add(Dense(units=100, activation='relu'))
    model.add(Dense(units=2, activation='softmax'))
    # compile model
    model.compile(
    optimizer=optimizer,
    loss='categorical_crossentropy',
    metrics=['accuracy']
    )
    return model
model_CV2 = KerasClassifier(build_fn=create_model1, epochs=10, batch_size=80, verbose=2)
optimizer = ['SGD', 'RMSprop', 'Adagrad', 'Adadelta', 'Adam', 'Adamax', 'Nadam']
param_grid1 = dict(optimizer=optimizer)
grid2 = GridSearchCV(estimator=model_CV2, param_grid=param_grid1, n_jobs=-1, cv=3)
grid_result2 = grid2.fit(X_train_scaled2, y_train_categorical2)

print("Best: %f using %s" % (grid_result2.best_score_, grid_result2.best_params_))
means = grid_result2.cv_results_['mean_test_score']
stds = grid_result2.cv_results_['std_test_score']
params = grid_result2.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

Epoch 1/10
 - 1s - loss: 0.5017 - accuracy: 0.8140
Epoch 2/10
 - 0s - loss: 0.4259 - accuracy: 0.8632
Epoch 3/10
 - 0s - loss: 0.4181 - accuracy: 0.8632
Epoch 4/10
 - 0s - loss: 0.4134 - accuracy: 0.8632
Epoch 5/10
 - 0s - loss: 0.4100 - accuracy: 0.8632
Epoch 6/10
 - 0s - loss: 0.4074 - accuracy: 0.8632
Epoch 7/10
 - 0s - loss: 0.4052 - accuracy: 0.8632
Epoch 8/10
 - 0s - loss: 0.4037 - accuracy: 0.8632
Epoch 9/10
 - 0s - loss: 0.4025 - accuracy: 0.8632
Epoch 10/10
 - 0s - loss: 0.4015 - accuracy: 0.8632
Best: 0.863203 using {'optimizer': 'SGD'}
0.863203 (0.002841) with: {'optimizer': 'SGD'}
0.863049 (0.002957) with: {'optimizer': 'RMSprop'}
0.862742 (0.002869) with: {'optimizer': 'Adagrad'}
0.863203 (0.002841) with: {'optimizer': 'Adadelta'}
0.862896 (0.003084) with: {'optimizer': 'Adam'}
0.862896 (0.003084) with: {'optimizer': 'Adamax'}
0.859514 (0.005986) with: {'optimizer': 'Nadam'}


In [71]:
# Tune the Neuron Activation Function
def create_model2(activation='relu'):
    # define model
    model = Sequential()
    model.add(Dense(units=100, activation=activation, input_dim=11))
    model.add(Dense(units=100, activation=activation))
    model.add(Dense(units=2, activation='softmax'))
    # compile model
    model.compile(
    optimizer='SGD',
    loss='categorical_crossentropy',
    metrics=['accuracy']
    )
    return model
model_CV3 = KerasClassifier(build_fn=create_model2, epochs=10, batch_size=80, verbose=2)
activation = ['softmax', 'softplus', 'softsign', 'relu', 'tanh', 'sigmoid', 'hard_sigmoid', 'linear']
param_grid2 = dict(activation=activation)
grid3 = GridSearchCV(estimator=model_CV3, param_grid=param_grid2, n_jobs=-1, cv=3)

grid_result3 = grid3.fit(X_train_scaled2, y_train_categorical2)

print("Best: %f using %s" % (grid_result3.best_score_, grid_result3.best_params_))
means = grid_result3.cv_results_['mean_test_score']
stds = grid_result3.cv_results_['std_test_score']
params = grid_result3.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

Epoch 1/10
 - 1s - loss: 0.6015 - accuracy: 0.8632
Epoch 2/10
 - 0s - loss: 0.5022 - accuracy: 0.8632
Epoch 3/10
 - 0s - loss: 0.4553 - accuracy: 0.8632
Epoch 4/10
 - 0s - loss: 0.4312 - accuracy: 0.8632
Epoch 5/10
 - 0s - loss: 0.4184 - accuracy: 0.8632
Epoch 6/10
 - 0s - loss: 0.4108 - accuracy: 0.8632
Epoch 7/10
 - 0s - loss: 0.4064 - accuracy: 0.8632
Epoch 8/10
 - 0s - loss: 0.4038 - accuracy: 0.8632
Epoch 9/10
 - 0s - loss: 0.4021 - accuracy: 0.8632
Epoch 10/10
 - 0s - loss: 0.4010 - accuracy: 0.8632
Best: 0.863203 using {'activation': 'softmax'}
0.863203 (0.002841) with: {'activation': 'softmax'}
0.863203 (0.002841) with: {'activation': 'softplus'}
0.863203 (0.002841) with: {'activation': 'softsign'}
0.863203 (0.002841) with: {'activation': 'relu'}
0.863203 (0.002841) with: {'activation': 'tanh'}
0.863203 (0.002841) with: {'activation': 'sigmoid'}
0.863203 (0.002841) with: {'activation': 'hard_sigmoid'}
0.863203 (0.002841) with: {'activation': 'linear'}


In [79]:
model2 = Sequential()
model2.add(Dense(units=100, activation='softmax', input_dim=11))
model2.add(Dense(units=100, activation='softmax'))
model2.add(Dense(units=2, activation='softmax'))

model2.compile(
    optimizer='SGD',
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

model2.fit(X_train_scaled2, y_train_categorical2, batch_size=80, epochs=10, shuffle=True, verbose=2)

model_loss, model_accuracy = model2.evaluate(X_test_scaled2, y_test_categorical2, verbose=2)
print(f"N vs. S - Loss: {model_loss}, Accuracy: {model_accuracy}")

Epoch 1/10
 - 1s - loss: 0.6123 - accuracy: 0.8438
Epoch 2/10
 - 0s - loss: 0.5072 - accuracy: 0.8632
Epoch 3/10
 - 0s - loss: 0.4579 - accuracy: 0.8632
Epoch 4/10
 - 0s - loss: 0.4327 - accuracy: 0.8632
Epoch 5/10
 - 0s - loss: 0.4192 - accuracy: 0.8632
Epoch 6/10
 - 0s - loss: 0.4113 - accuracy: 0.8632
Epoch 7/10
 - 0s - loss: 0.4068 - accuracy: 0.8632
Epoch 8/10
 - 0s - loss: 0.4041 - accuracy: 0.8632
Epoch 9/10
 - 0s - loss: 0.4023 - accuracy: 0.8632
Epoch 10/10
 - 0s - loss: 0.4012 - accuracy: 0.8632
N vs. S - Loss: 0.4087249850446368, Accuracy: 0.8584601283073425


In [80]:
model1.save('Models/NN_ns.h5')

# Re-test for F vs. J

In [81]:
target = mbti_df["f_t"]
target_names = ["Feeling","Thinking"]

X_train, X_test, y_train, y_test = train_test_split(selected_features, target, random_state=42)
X_train.head()

X_scaler = StandardScaler().fit(X_train)
X_train_scaled3 = X_scaler.transform(X_train)
X_test_scaled3 = X_scaler.transform(X_test)

label_encoder = LabelEncoder()
label_encoder.fit(y_train)
encoded_y_train = label_encoder.transform(y_train)
encoded_y_test = label_encoder.transform(y_test)

y_train_categorical3 = to_categorical(encoded_y_train)
y_test_categorical3 = to_categorical(encoded_y_test)

model3 = Sequential()
model3.add(Dense(units=100, activation='relu', input_dim=11))
model3.add(Dense(units=100, activation='relu'))
model3.add(Dense(units=2, activation='softmax'))

model3.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

model3.fit(X_train_scaled3, y_train_categorical3, epochs=100, shuffle=True, verbose=2)

model_loss, model_accuracy = model3.evaluate(X_test_scaled3, y_test_categorical3, verbose=2)
print(f"F vs. T - Loss: {model_loss}, Accuracy: {model_accuracy}")

Epoch 1/100
 - 1s - loss: 0.6677 - accuracy: 0.5941
Epoch 2/100
 - 0s - loss: 0.6549 - accuracy: 0.6111
Epoch 3/100
 - 1s - loss: 0.6510 - accuracy: 0.6148
Epoch 4/100
 - 1s - loss: 0.6498 - accuracy: 0.6134
Epoch 5/100
 - 1s - loss: 0.6454 - accuracy: 0.6190
Epoch 6/100
 - 1s - loss: 0.6411 - accuracy: 0.6251
Epoch 7/100
 - 1s - loss: 0.6413 - accuracy: 0.6290
Epoch 8/100
 - 1s - loss: 0.6414 - accuracy: 0.6190
Epoch 9/100
 - 1s - loss: 0.6355 - accuracy: 0.6280
Epoch 10/100
 - 1s - loss: 0.6350 - accuracy: 0.6360
Epoch 11/100
 - 1s - loss: 0.6327 - accuracy: 0.6314
Epoch 12/100
 - 1s - loss: 0.6309 - accuracy: 0.6397
Epoch 13/100
 - 1s - loss: 0.6303 - accuracy: 0.6402
Epoch 14/100
 - 1s - loss: 0.6269 - accuracy: 0.6339
Epoch 15/100
 - 1s - loss: 0.6257 - accuracy: 0.6425
Epoch 16/100
 - 1s - loss: 0.6233 - accuracy: 0.6480
Epoch 17/100
 - 1s - loss: 0.6212 - accuracy: 0.6434
Epoch 18/100
 - 1s - loss: 0.6199 - accuracy: 0.6488
Epoch 19/100
 - 1s - loss: 0.6186 - accuracy: 0.6505
Ep

# Hypertune

In [82]:
# Tune Batch Size and Number of Epochs
def create_model(init_mode='uniform'):
    # define model
    model = Sequential()
    model.add(Dense(units=100, activation='relu', input_dim=11))
    model.add(Dense(units=100, activation='relu'))
    model.add(Dense(units=2, activation='softmax'))
    # compile model
    model.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy']
    )
    return model
seed = 7
np.random.seed(seed)
batch_size = [10, 20, 40, 60, 80, 100]
epochs = [10, 50, 100]
model_CV1 = KerasClassifier(build_fn=create_model,verbose=2)
param_grid = dict(batch_size=batch_size, epochs=epochs)
grid1 = GridSearchCV(model_CV1, param_grid, n_jobs=-1, cv=3)

In [83]:
grid_result1=grid1.fit(X_train_scaled3, y_train_categorical3)
print("Best: %f using %s" % (grid_result1.best_score_, grid_result1.best_params_))
means = grid_result1.cv_results_['mean_test_score']
stds = grid_result1.cv_results_['std_test_score']
params = grid_result1.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

Epoch 1/10
 - 1s - loss: 0.6684 - accuracy: 0.5868
Epoch 2/10
 - 0s - loss: 0.6555 - accuracy: 0.6091
Epoch 3/10
 - 0s - loss: 0.6514 - accuracy: 0.6147
Epoch 4/10
 - 0s - loss: 0.6508 - accuracy: 0.6088
Epoch 5/10
 - 0s - loss: 0.6478 - accuracy: 0.6105
Epoch 6/10
 - 0s - loss: 0.6466 - accuracy: 0.6214
Epoch 7/10
 - 0s - loss: 0.6475 - accuracy: 0.6137
Epoch 8/10
 - 0s - loss: 0.6434 - accuracy: 0.6200
Epoch 9/10
 - 0s - loss: 0.6410 - accuracy: 0.6257
Epoch 10/10
 - 0s - loss: 0.6385 - accuracy: 0.6280
Best: 0.601137 using {'batch_size': 100, 'epochs': 10}
0.600523 (0.004772) with: {'batch_size': 10, 'epochs': 10}
0.563326 (0.013165) with: {'batch_size': 10, 'epochs': 50}
0.559637 (0.007269) with: {'batch_size': 10, 'epochs': 100}
0.599293 (0.008580) with: {'batch_size': 20, 'epochs': 10}
0.567169 (0.002688) with: {'batch_size': 20, 'epochs': 50}
0.554872 (0.004981) with: {'batch_size': 20, 'epochs': 100}
0.600830 (0.006967) with: {'batch_size': 40, 'epochs': 10}
0.579465 (0.006050)

In [84]:
# Tune the Training Optimization Algorithm
def create_model1(optimizer='adam'):
    # define model
    model = Sequential()
    model.add(Dense(units=100, activation='relu', input_dim=11))
    model.add(Dense(units=100, activation='relu'))
    model.add(Dense(units=2, activation='softmax'))
    # compile model
    model.compile(
    optimizer=optimizer,
    loss='categorical_crossentropy',
    metrics=['accuracy']
    )
    return model
model_CV2 = KerasClassifier(build_fn=create_model1, epochs=10, batch_size=100, verbose=2)
optimizer = ['SGD', 'RMSprop', 'Adagrad', 'Adadelta', 'Adam', 'Adamax', 'Nadam']
param_grid1 = dict(optimizer=optimizer)
grid2 = GridSearchCV(estimator=model_CV2, param_grid=param_grid1, n_jobs=-1, cv=3)
grid_result2 = grid2.fit(X_train_scaled3, y_train_categorical3)

print("Best: %f using %s" % (grid_result2.best_score_, grid_result2.best_params_))
means = grid_result2.cv_results_['mean_test_score']
stds = grid_result2.cv_results_['std_test_score']
params = grid_result2.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

Epoch 1/10
 - 0s - loss: 0.6770 - accuracy: 0.5808
Epoch 2/10
 - 0s - loss: 0.6556 - accuracy: 0.6085
Epoch 3/10
 - 0s - loss: 0.6516 - accuracy: 0.6130
Epoch 4/10
 - 0s - loss: 0.6486 - accuracy: 0.6199
Epoch 5/10
 - 0s - loss: 0.6458 - accuracy: 0.6197
Epoch 6/10
 - 0s - loss: 0.6432 - accuracy: 0.6253
Epoch 7/10
 - 0s - loss: 0.6423 - accuracy: 0.6236
Epoch 8/10
 - 0s - loss: 0.6410 - accuracy: 0.6260
Epoch 9/10
 - 0s - loss: 0.6410 - accuracy: 0.6242
Epoch 10/10
 - 0s - loss: 0.6388 - accuracy: 0.6290
Best: 0.606671 using {'optimizer': 'Adagrad'}
0.587611 (0.011876) with: {'optimizer': 'SGD'}
0.595604 (0.006766) with: {'optimizer': 'RMSprop'}
0.606671 (0.011108) with: {'optimizer': 'Adagrad'}
0.593760 (0.000678) with: {'optimizer': 'Adadelta'}
0.601752 (0.003428) with: {'optimizer': 'Adam'}
0.602521 (0.006442) with: {'optimizer': 'Adamax'}
0.588841 (0.007016) with: {'optimizer': 'Nadam'}


In [85]:
# Tune the Neuron Activation Function
def create_model2(activation='relu'):
    # define model
    model = Sequential()
    model.add(Dense(units=100, activation=activation, input_dim=11))
    model.add(Dense(units=100, activation=activation))
    model.add(Dense(units=2, activation='softmax'))
    # compile model
    model.compile(
    optimizer='Adagrad',
    loss='categorical_crossentropy',
    metrics=['accuracy']
    )
    return model
model_CV3 = KerasClassifier(build_fn=create_model2, epochs=10, batch_size=100, verbose=2)
activation = ['softmax', 'softplus', 'softsign', 'relu', 'tanh', 'sigmoid', 'hard_sigmoid', 'linear']
param_grid2 = dict(activation=activation)
grid3 = GridSearchCV(estimator=model_CV3, param_grid=param_grid2, n_jobs=-1, cv=3)

grid_result3 = grid3.fit(X_train_scaled3, y_train_categorical3)

print("Best: %f using %s" % (grid_result3.best_score_, grid_result3.best_params_))
means = grid_result3.cv_results_['mean_test_score']
stds = grid_result3.cv_results_['std_test_score']
params = grid_result3.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

Epoch 1/10
 - 0s - loss: 0.6743 - accuracy: 0.5931
Epoch 2/10
 - 0s - loss: 0.6542 - accuracy: 0.6054
Epoch 3/10
 - 0s - loss: 0.6497 - accuracy: 0.6090
Epoch 4/10
 - 0s - loss: 0.6463 - accuracy: 0.6165
Epoch 5/10
 - 0s - loss: 0.6443 - accuracy: 0.6137
Epoch 6/10
 - 0s - loss: 0.6435 - accuracy: 0.6263
Epoch 7/10
 - 0s - loss: 0.6412 - accuracy: 0.6227
Epoch 8/10
 - 0s - loss: 0.6407 - accuracy: 0.6254
Epoch 9/10
 - 0s - loss: 0.6392 - accuracy: 0.6257
Epoch 10/10
 - 0s - loss: 0.6375 - accuracy: 0.6287
Best: 0.608054 using {'activation': 'relu'}
0.540271 (0.003805) with: {'activation': 'softmax'}
0.606824 (0.017051) with: {'activation': 'softplus'}
0.604365 (0.017703) with: {'activation': 'softsign'}
0.608054 (0.007418) with: {'activation': 'relu'}
0.606978 (0.020915) with: {'activation': 'tanh'}
0.593299 (0.014545) with: {'activation': 'sigmoid'}
0.597449 (0.016622) with: {'activation': 'hard_sigmoid'}
0.593760 (0.011485) with: {'activation': 'linear'}


In [91]:
model3 = Sequential()
model3.add(Dense(units=100, activation='relu', input_dim=11))
model3.add(Dense(units=100, activation='relu'))
model3.add(Dense(units=2, activation='softmax'))

model3.compile(
    optimizer='Adagrad',
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

model3.fit(X_train_scaled3, y_train_categorical3, batch_size=100, epochs=10, shuffle=True, verbose=2)

model_loss, model_accuracy = model3.evaluate(X_test_scaled3, y_test_categorical3, verbose=2)
print(f"F vs. T - Loss: {model_loss}, Accuracy: {model_accuracy}")

Epoch 1/10
 - 0s - loss: 0.6714 - accuracy: 0.5915
Epoch 2/10
 - 0s - loss: 0.6531 - accuracy: 0.6053
Epoch 3/10
 - 0s - loss: 0.6497 - accuracy: 0.6159
Epoch 4/10
 - 0s - loss: 0.6469 - accuracy: 0.6223
Epoch 5/10
 - 0s - loss: 0.6443 - accuracy: 0.6242
Epoch 6/10
 - 0s - loss: 0.6423 - accuracy: 0.6293
Epoch 7/10
 - 0s - loss: 0.6412 - accuracy: 0.6254
Epoch 8/10
 - 0s - loss: 0.6401 - accuracy: 0.6257
Epoch 9/10
 - 0s - loss: 0.6399 - accuracy: 0.6288
Epoch 10/10
 - 0s - loss: 0.6378 - accuracy: 0.6306
F vs. T - Loss: 0.6595578840274644, Accuracy: 0.6067312359809875


In [92]:
model1.save('Models/NN_ft.h5')

# Re-test for J vs. P

In [88]:
target = mbti_df["j_p"]
target_names = ["Judging","Perceiving"]

X_train, X_test, y_train, y_test = train_test_split(selected_features, target, random_state=42)
X_train.head()

X_scaler = StandardScaler().fit(X_train)
X_train_scaled4 = X_scaler.transform(X_train)
X_test_scaled4 = X_scaler.transform(X_test)

label_encoder = LabelEncoder()
label_encoder.fit(y_train)
encoded_y_train = label_encoder.transform(y_train)
encoded_y_test = label_encoder.transform(y_test)

y_train_categorical4 = to_categorical(encoded_y_train)
y_test_categorical4 = to_categorical(encoded_y_test)

model4 = Sequential()
model4.add(Dense(units=100, activation='relu', input_dim=11))
model4.add(Dense(units=100, activation='relu'))
model4.add(Dense(units=2, activation='softmax'))

model4.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

model4.fit(X_train_scaled4, y_train_categorical4, epochs=100, shuffle=True, verbose=2)

model_loss, model_accuracy = model4.evaluate(X_test_scaled4, y_test_categorical4, verbose=2)
print(f"J vs. P - Loss: {model_loss}, Accuracy: {model_accuracy}")

Epoch 1/100
 - 1s - loss: 0.6782 - accuracy: 0.5959
Epoch 2/100
 - 0s - loss: 0.6714 - accuracy: 0.5951
Epoch 3/100
 - 0s - loss: 0.6690 - accuracy: 0.5968
Epoch 4/100
 - 0s - loss: 0.6665 - accuracy: 0.6031
Epoch 5/100
 - 0s - loss: 0.6658 - accuracy: 0.6008
Epoch 6/100
 - 0s - loss: 0.6650 - accuracy: 0.6044
Epoch 7/100
 - 1s - loss: 0.6626 - accuracy: 0.6076
Epoch 8/100
 - 0s - loss: 0.6608 - accuracy: 0.6048
Epoch 9/100
 - 0s - loss: 0.6604 - accuracy: 0.6084
Epoch 10/100
 - 1s - loss: 0.6593 - accuracy: 0.6107
Epoch 11/100
 - 1s - loss: 0.6577 - accuracy: 0.6111
Epoch 12/100
 - 0s - loss: 0.6551 - accuracy: 0.6187
Epoch 13/100
 - 0s - loss: 0.6530 - accuracy: 0.6190
Epoch 14/100
 - 1s - loss: 0.6514 - accuracy: 0.6190
Epoch 15/100
 - 1s - loss: 0.6497 - accuracy: 0.6228
Epoch 16/100
 - 1s - loss: 0.6483 - accuracy: 0.6234
Epoch 17/100
 - 0s - loss: 0.6442 - accuracy: 0.6267
Epoch 18/100
 - 0s - loss: 0.6443 - accuracy: 0.6320
Epoch 19/100
 - 0s - loss: 0.6401 - accuracy: 0.6336
Ep

# Hypertune

In [89]:
# Tune Batch Size and Number of Epochs
def create_model(init_mode='uniform'):
    # define model
    model = Sequential()
    model.add(Dense(units=100, activation='relu', input_dim=11))
    model.add(Dense(units=100, activation='relu'))
    model.add(Dense(units=2, activation='softmax'))
    # compile model
    model.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy']
    )
    return model
seed = 7
np.random.seed(seed)
batch_size = [10, 20, 40, 60, 80, 100]
epochs = [10, 50, 100]
model_CV1 = KerasClassifier(build_fn=create_model,verbose=2)
param_grid = dict(batch_size=batch_size, epochs=epochs)
grid1 = GridSearchCV(model_CV1, param_grid, n_jobs=-1, cv=3)

In [90]:
grid_result1=grid1.fit(X_train_scaled4, y_train_categorical4)
print("Best: %f using %s" % (grid_result1.best_score_, grid_result1.best_params_))
means = grid_result1.cv_results_['mean_test_score']
stds = grid_result1.cv_results_['std_test_score']
params = grid_result1.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

Epoch 1/10
 - 1s - loss: 0.6848 - accuracy: 0.5813
Epoch 2/10
 - 0s - loss: 0.6725 - accuracy: 0.5988
Epoch 3/10
 - 0s - loss: 0.6686 - accuracy: 0.6013
Epoch 4/10
 - 0s - loss: 0.6692 - accuracy: 0.6024
Epoch 5/10
 - 0s - loss: 0.6679 - accuracy: 0.5988
Epoch 6/10
 - 0s - loss: 0.6660 - accuracy: 0.6010
Epoch 7/10
 - 0s - loss: 0.6654 - accuracy: 0.6019
Epoch 8/10
 - 0s - loss: 0.6638 - accuracy: 0.6033
Epoch 9/10
 - 0s - loss: 0.6624 - accuracy: 0.6073
Epoch 10/10
 - 0s - loss: 0.6622 - accuracy: 0.6030
Best: 0.596065 using {'batch_size': 100, 'epochs': 10}
0.579311 (0.016284) with: {'batch_size': 10, 'epochs': 10}
0.536274 (0.013647) with: {'batch_size': 10, 'epochs': 50}
0.533508 (0.006453) with: {'batch_size': 10, 'epochs': 100}
0.585613 (0.011352) with: {'batch_size': 20, 'epochs': 10}
0.545035 (0.014079) with: {'batch_size': 20, 'epochs': 50}
0.529511 (0.012933) with: {'batch_size': 20, 'epochs': 100}
0.585613 (0.004257) with: {'batch_size': 40, 'epochs': 10}
0.555948 (0.011431)

In [93]:
# Tune the Training Optimization Algorithm
def create_model1(optimizer='adam'):
    # define model
    model = Sequential()
    model.add(Dense(units=100, activation='relu', input_dim=11))
    model.add(Dense(units=100, activation='relu'))
    model.add(Dense(units=2, activation='softmax'))
    # compile model
    model.compile(
    optimizer=optimizer,
    loss='categorical_crossentropy',
    metrics=['accuracy']
    )
    return model
model_CV2 = KerasClassifier(build_fn=create_model1, epochs=10, batch_size=100, verbose=2)
optimizer = ['SGD', 'RMSprop', 'Adagrad', 'Adadelta', 'Adam', 'Adamax', 'Nadam']
param_grid1 = dict(optimizer=optimizer)
grid2 = GridSearchCV(estimator=model_CV2, param_grid=param_grid1, n_jobs=-1, cv=3)
grid_result2 = grid2.fit(X_train_scaled4, y_train_categorical4)

print("Best: %f using %s" % (grid_result2.best_score_, grid_result2.best_params_))
means = grid_result2.cv_results_['mean_test_score']
stds = grid_result2.cv_results_['std_test_score']
params = grid_result2.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

Epoch 1/10
 - 0s - loss: 0.6768 - accuracy: 0.5936
Epoch 2/10
 - 0s - loss: 0.6748 - accuracy: 0.6007
Epoch 3/10
 - 0s - loss: 0.6736 - accuracy: 0.6025
Epoch 4/10
 - 0s - loss: 0.6727 - accuracy: 0.6022
Epoch 5/10
 - 0s - loss: 0.6722 - accuracy: 0.6018
Epoch 6/10
 - 0s - loss: 0.6716 - accuracy: 0.6019
Epoch 7/10
 - 0s - loss: 0.6712 - accuracy: 0.6024
Epoch 8/10
 - 0s - loss: 0.6709 - accuracy: 0.6025
Epoch 9/10
 - 0s - loss: 0.6705 - accuracy: 0.6024
Epoch 10/10
 - 0s - loss: 0.6702 - accuracy: 0.6027
Best: 0.601137 using {'optimizer': 'SGD'}
0.601137 (0.003553) with: {'optimizer': 'SGD'}
0.586228 (0.008644) with: {'optimizer': 'RMSprop'}
0.593452 (0.006462) with: {'optimizer': 'Adagrad'}
0.574393 (0.003668) with: {'optimizer': 'Adadelta'}
0.587765 (0.006600) with: {'optimizer': 'Adam'}
0.590993 (0.003518) with: {'optimizer': 'Adamax'}
0.578235 (0.015531) with: {'optimizer': 'Nadam'}


In [94]:
# Tune the Neuron Activation Function
def create_model2(activation='relu'):
    # define model
    model = Sequential()
    model.add(Dense(units=100, activation=activation, input_dim=11))
    model.add(Dense(units=100, activation=activation))
    model.add(Dense(units=2, activation='softmax'))
    # compile model
    model.compile(
    optimizer='SGD',
    loss='categorical_crossentropy',
    metrics=['accuracy']
    )
    return model
model_CV3 = KerasClassifier(build_fn=create_model2, epochs=10, batch_size=100, verbose=2)
activation = ['softmax', 'softplus', 'softsign', 'relu', 'tanh', 'sigmoid', 'hard_sigmoid', 'linear']
param_grid2 = dict(activation=activation)
grid3 = GridSearchCV(estimator=model_CV3, param_grid=param_grid2, n_jobs=-1, cv=3)

grid_result3 = grid3.fit(X_train_scaled4, y_train_categorical4)

print("Best: %f using %s" % (grid_result3.best_score_, grid_result3.best_params_))
means = grid_result3.cv_results_['mean_test_score']
stds = grid_result3.cv_results_['std_test_score']
params = grid_result3.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

Epoch 1/10
 - 1s - loss: 0.6868 - accuracy: 0.6016
Epoch 2/10
 - 0s - loss: 0.6798 - accuracy: 0.6016
Epoch 3/10
 - 0s - loss: 0.6762 - accuracy: 0.6016
Epoch 4/10
 - 0s - loss: 0.6744 - accuracy: 0.6016
Epoch 5/10
 - 0s - loss: 0.6734 - accuracy: 0.6016
Epoch 6/10
 - 0s - loss: 0.6729 - accuracy: 0.6016
Epoch 7/10
 - 0s - loss: 0.6727 - accuracy: 0.6016
Epoch 8/10
 - 0s - loss: 0.6725 - accuracy: 0.6016
Epoch 9/10
 - 0s - loss: 0.6724 - accuracy: 0.6016
Epoch 10/10
 - 0s - loss: 0.6724 - accuracy: 0.6016
Best: 0.601599 using {'activation': 'softmax'}
0.601599 (0.003928) with: {'activation': 'softmax'}
0.568398 (0.044231) with: {'activation': 'softplus'}
0.599908 (0.005997) with: {'activation': 'softsign'}
0.600676 (0.004224) with: {'activation': 'relu'}
0.601599 (0.003600) with: {'activation': 'tanh'}
0.601599 (0.003928) with: {'activation': 'sigmoid'}
0.601599 (0.003928) with: {'activation': 'hard_sigmoid'}
0.599754 (0.007584) with: {'activation': 'linear'}


In [95]:
model4 = Sequential()
model4.add(Dense(units=100, activation='softmax', input_dim=11))
model4.add(Dense(units=100, activation='softmax'))
model4.add(Dense(units=2, activation='softmax'))

model4.compile(
    optimizer='SGD',
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

model4.fit(X_train_scaled4, y_train_categorical4, batch_size=100, epochs=10, shuffle=True, verbose=2)

model_loss, model_accuracy = model4.evaluate(X_test_scaled4, y_test_categorical4, verbose=2)
print(f"J vs. P - Loss: {model_loss}, Accuracy: {model_accuracy}")

Epoch 1/10
 - 0s - loss: 0.6869 - accuracy: 0.6016
Epoch 2/10
 - 0s - loss: 0.6801 - accuracy: 0.6016
Epoch 3/10
 - 0s - loss: 0.6765 - accuracy: 0.6016
Epoch 4/10
 - 0s - loss: 0.6745 - accuracy: 0.6016
Epoch 5/10
 - 0s - loss: 0.6734 - accuracy: 0.6016
Epoch 6/10
 - 0s - loss: 0.6729 - accuracy: 0.6016
Epoch 7/10
 - 0s - loss: 0.6727 - accuracy: 0.6016
Epoch 8/10
 - 0s - loss: 0.6725 - accuracy: 0.6016
Epoch 9/10
 - 0s - loss: 0.6724 - accuracy: 0.6016
Epoch 10/10
 - 0s - loss: 0.6724 - accuracy: 0.6016
J vs. P - Loss: 0.6683037888899329, Accuracy: 0.6118026971817017


In [96]:
model1.save('Models/NN_jp.h5')

# Test

In [97]:
i_e = load_model('Models/NN_ie.h5')
n_s = load_model('Models/NN_ns.h5')
f_t = load_model('Models/NN_ft.h5')
j_p = load_model('Models/NN_jp.h5')



In [None]:
pred = test.predict_classes(X_test_scaled1[:15])

In [None]:
y_test_categorical1[:15]