In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
import tensorflow as tf
import tensorflow_hub as hub
import tensorflow_text
import numpy as np

In [2]:
mbti= pd.read_csv('./final_project/mbti_changed.csv')

#### Binary prediction 

- Introvert: 1 - Extrovert: 0
- Intuition:1 - Sensing: 0
- Thinking: 1 - Sensing: 0
- Judging 1 - Perceiving: 0

In [3]:
def introvert(personality):
    
    if 'I' in personality:
        return 1
    else:
        return 0
mbti['introv_extrov']= mbti.apply(lambda x: introvert(x['type']), axis=1)

def intuition(personality):
    
    if 'N' in personality:
        return 1
    else:
        return 0
mbti['intuit_sensin']= mbti.apply(lambda x: intuition(x['type']), axis=1)

def thinking(personality):
    
    if 'T' in personality:
        return 1
    else:
        return 0
mbti['thinkin_feelin']= mbti.apply(lambda x: thinking(x['type']), axis=1)

def judging(personality):
    
    if 'J' in personality:
        return 1
    else:
        return 0
mbti['judg_percev']= mbti.apply(lambda x: judging(x['type']), axis=1)


In [4]:
mbti.head()

Unnamed: 0,type,posts,type_index,cleaned_text,introv_extrov,intuit_sensin,thinkin_feelin,judg_percev
0,INFJ,'http://www.youtube.com/watch?v=qsXHcwe3krw|||...,8,enfp and intj moments sportscenter not t...,1,1,0,1
1,ENTP,'I'm finding the lack of me in these posts ver...,3,im finding the lack of me in these pos...,0,1,1,0
2,INTP,'Good one _____ https://www.youtube.com/wat...,11,good one of course to which i say i ...,1,1,1,0
3,INTJ,"'Dear INTP, I enjoyed our conversation the o...",10,dear intp i enjoyed our conversation the...,1,1,1,1
4,ENTJ,'You're fired.|||That's another silly misconce...,2,youre fired thats another silly misconcep...,0,1,1,1


In [5]:
train,test= train_test_split(mbti)
train,val= train_test_split(train)

print(train.shape)
print(val.shape)
print(test.shape)

(4879, 8)
(1627, 8)
(2169, 8)


In [6]:
embed= hub.load("https://tfhub.dev/google/nnlm-en-dim50/2")
hub_layer=hub.KerasLayer(embed, input_shape=[], dtype=tf.string, trainable=True)

In [7]:
model=tf.keras.Sequential()
model.add(hub_layer)
model.add(tf.keras.layers.Dense(16, activation='softmax'))
model.add(tf.keras.layers.Dense(1))
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
keras_layer (KerasLayer)     (None, 50)                48190600  
_________________________________________________________________
dense (Dense)                (None, 16)                816       
_________________________________________________________________
dense_1 (Dense)              (None, 1)                 17        
Total params: 48,191,433
Trainable params: 48,191,433
Non-trainable params: 0
_________________________________________________________________


In [8]:
model.compile(optimizer='adam', 
             loss= tf.keras.losses.BinaryCrossentropy(from_logits=True),
             metrics=tf.metrics.BinaryAccuracy())

#### Introvertion vs Extrovertion

In [9]:
intro_extr= np.array(train['introv_extrov'])
intro_extr_val= np.array(val['introv_extrov'])

In [10]:
train_array= np.array(train['cleaned_text'])
val_array= np.array(val['cleaned_text'])

In [11]:
model.fit(train_array, intro_extr, validation_data=(val_array, intro_extr_val), verbose=1, epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7f8ffa6ca430>

In [12]:
intro_extr= np.array(test['introv_extrov'])
test_array= np.array(test['cleaned_text'])

model.evaluate(test_array, intro_extr, verbose=1)



[0.39215466380119324, 0.8354080319404602]

#### Intuition vs Sensing

In [13]:
intuit_sens= np.array(train['intuit_sensin'])
intuit_sens_val= np.array(val['intuit_sensin'])
intuit_sens_test=np.array(test['intuit_sensin'])

In [14]:
model.fit(train_array, intuit_sens, validation_data=(val_array, intuit_sens_val), verbose=1, epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7f8f9653e0d0>

In [15]:
model.evaluate(test_array, intuit_sens_test, verbose=1)



[0.4194318354129791, 0.8524665832519531]

#### Thinking vs Feeling

In [16]:
think_feel= np.array(train['thinkin_feelin'])
think_feel_val= np.array(val['thinkin_feelin'])
think_feel_test=np.array(test['thinkin_feelin'])

In [17]:
model.fit(train_array, think_feel, validation_data=(val_array, think_feel_val), verbose=1, epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7f8f96511be0>

In [18]:
model.evaluate(test_array, think_feel_test, verbose=1)



[0.47073987126350403, 0.8086676001548767]

#### Judging vs Perceiving

In [19]:
judg_perceiv= np.array(train['judg_percev'])
judg_perceiv_val= np.array(val['judg_percev'])
judg_perceiv_test=np.array(test['judg_percev'])

In [20]:
model.fit(train_array, judg_perceiv, validation_data=(val_array, judg_perceiv_val), verbose=1, epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7f8f96539f40>

In [21]:
model.evaluate(test_array, judg_perceiv_test, verbose=1)



[0.6695128083229065, 0.6177962422370911]