In [1]:

import numpy as np
import pandas as pd

import tensorflow as tf
import tensorflow_hub as hub
import tensorflow_datasets as tfds

import matplotlib.pyplot as plt
from tqdm import tqdm


In [2]:
d = {'INTJ':'You are thoughtful, rational, quick-witted and independent. However, sometimes you are known to be overly critical, and have a combative side to yourself.', 'INTP':'You are unique, creative, inventive and imaginative. However, sometimes you are known to be a bit insensitive and impatient with others.', 'ENTJ':'You are determined, charismatic, confident and authoritative. However, sometimes you are known to  be intolerant of other people\'s weaknesses, and slightly arrogant.', 'ENTP':'You are audacious, bold, playful and rebellious. However, sometimes you can find it difficult to focus, and dislike talking about practical matters.', 'INFJ':'You are creative, insightful, passionate and have strong morals. However, sometimes you are a bit of a perfectionist, and find it reluctant to open up to other people.','INFP':'You are empathetic, generous, creative and passionate. However, sometimes your goals are a bit unrealistic and you tend to lack focus sometimes.', 'ENFJ':'You are passionate, reliable, charismatic and very receptive. However, sometimes you can be overly empathetic and condescending toward other people.', 'ENFP' : 'You are enthusiastic, festive, good-natured and excellent at communicating. However, you sometimes focus on being a people pleaser and disorganized.', 'ISTJ': 'You are very responsible, strong-willed, calm and enforce order. However, you are known to be stubborn and are somewhat judgemental sometimes', 'ISFJ': 'You are reliable, observant, enthusiastic and supportive. However, you are known to be overly humble and tend to take things personally', 'ESTJ' : 'You are dedicated, strong-willed, loyal and reliable. However, you find it difficult to relax, or share what you\'re feeling with other people.', 'ESFJ': 'You are very loyal, sensitive to other people\'s feelings, and have strong practical skills. However, you are sometimes worried about your social status and tend to be vulnerable to criticism.', 'ISTP': 'You are spotaneous, rational, optimistic and know how to prioritize things. However, you are known to be stubborn and get bored very easily.',  'ISFP' : 'You are charming, imaginative, passionate and sensitive to others. However, you are fiercely independent and get stressed out pretty easily.', 'ESTP': 'You are perceptive, direct, bold and rational. However, you tend to be defiant and may sometimes miss the bigger picture in favor of smaller victories.', 'ESFP' : 'You are observant, practical, have excellent people skills and are fond of showmanship. However, you are very sensitive and sometimes avoid conflict entirely.'}


In [3]:
mbti_df = pd.read_csv("mbti_1.csv")



mind_names = ["E" , "I"]
mind = []  # E or I

energy_names = ["N",  "S"]
energy = [] # N or S

nature_names = ["F", "T"]
nature = [] # F or T

tactics_names = ["J", "P"]

tactics = [] # J OR P

for t in mbti_df.type:
    mind.append(mind_names.index(t[0]))
    energy.append(energy_names.index(t[1]))
    nature.append(nature_names.index(t[2]))
    tactics.append(tactics_names.index(t[3]))

mbti_df['mind'] = mind
mbti_df['energy'] = energy
mbti_df['nature'] = nature
mbti_df['tactics'] = tactics


In [4]:
from googletrans import Translator
import re
import nltk
from nltk.stem import WordNetLemmatizer


from nltk.corpus import stopwords


translator = Translator()


def translate_text(text):
    result=""
    try:
        for sentence in text.split("."):
            result=result+translator.translate(sentence,dest='ko').text
    
        print(result)
        return result
    except Exception:
        return text

def replace_text(text):
    lemmatizer=WordNetLemmatizer()
    stop_words = set(stopwords.words('english')) # Load stop words
    pers_types = ['INFP' ,'INFJ', 'INTP', 'INTJ', 'ENTP', 'ENFP', 'ISTP' ,'ISFP' ,'ENTJ', 'ISTJ','ENFJ', 'ISFJ' ,'ESTP', 'ESFP' ,'ESFJ' ,'ESTJ']
    pers_types = [p.lower() for p in pers_types]  
    try:
        
        text=re.sub('https?://[^\s<>"]+|www\.[^\s<>"]+',' ',text)
        
        text=re.sub('[^0-9a-z]',' ',text)
        text=text.lower()    
        text = " ".join([word for word in text.split() if word not in stop_words]) # Remove stop words
        #print(len(sentence))
        
        for p in pers_types:
            text = re.sub(p, '', text)
        #print(len(sentence))
        
        text = lemmatizer.lemmatize(text) # Lemmatize words
    except:
        ''''''    
    return text    








In [5]:
mbti_df['posts']=mbti_df['posts'].apply(replace_text)
# mbti_df['posts']=mbti_df['posts'].apply(translate_text)
# mbti_df.to_csv('mbti_1_kor.csv')

# mbti_df.to_csv('mbti_1_replace.csv')

In [6]:
mbti_df

Unnamed: 0,type,posts,mind,energy,nature,tactics
0,INFJ,moments sportscenter top ten plays pranks hat...,1,0,0,0
1,ENTP,finding lack posts alarming ex boring position...,0,0,1,1
2,INTP,ood one course say know blessing curse oes abs...,1,0,1,1
3,INTJ,ear enjoyed conversation day soteric gabbing n...,1,0,1,0
4,ENTJ,ou fired hat another silly misconception hat a...,0,0,1,0
...,...,...,...,...,...,...
8670,ISFP,always think cats doms reason websites become ...,1,1,0,1
8671,ENFP,thread already exists someplace else heck dele...,0,0,0,1
8672,INTP,many questions things would take purple pill i...,1,0,1,1
8673,INFP,conflicted right comes wanting children honest...,1,0,0,1


In [7]:
from sklearn.model_selection import train_test_split
max_len=50000
max_words=10000
label_mind = mbti_df.mind
label_energy = mbti_df.energy
label_nature = mbti_df.nature
label_tactics = mbti_df.tactics
feature = mbti_df.posts

tokenizer=tf.keras.preprocessing.text.Tokenizer()
tokenizer.fit_on_texts(feature)

feature=tokenizer.texts_to_sequences(feature)

max_length=max([len(seq) for seq in feature])
vocabulary_size=len(tokenizer.word_index)+1
embedding_dim=100


feature=tf.keras.preprocessing.sequence.pad_sequences(feature,maxlen=max_length)

label_mind = np.array(label_mind)
label_energy = np.array(label_energy)
label_nature = np.array(label_nature)
label_tactics = np.array(label_tactics)



feature_train, feature_test, labelm_train, labelm_test = train_test_split (feature, label_mind, test_size =.3 , random_state= 42, stratify= label_mind)
feature_train, feature_test, labele_train, labele_test = train_test_split (feature, label_energy, test_size =.3 , random_state= 42, stratify= label_energy)
feature_train, feature_test, labeln_train, labeln_test = train_test_split (feature, label_nature, test_size =.3 , random_state= 42, stratify= label_nature)
feature_train, feature_test, labelt_train, labelt_test = train_test_split (feature, label_tactics, test_size =.3 , random_state= 42, stratify= label_nature)



In [8]:

def createModel():
  model = tf.keras.Sequential()
  # model.add(hub_layer)
  model.add(tf.keras.layers.Embedding(input_dim=vocabulary_size,output_dim=embedding_dim,input_length=max_length))
  # model.add(tf.keras.layers.Embedding(vocab_len+1,embedding_dim,input_length=max_length))
  model.add(tf.keras.layers.Dense(16, activation='relu'))
  model.add(tf.keras.layers.Dense(1,))

  # model.summary()
  model.compile(optimizer='adam',
                loss='binary_crossentropy',
                metrics=['accuracy'])
  return model

In [9]:
def normalize(res, range, minima):
  normalized_vals = []
  for arr in res:
    normalized_vals.append((arr[0] + abs(minima))/range)
  return normalized_vals

In [10]:
feature
# feature_val = feature_train[3036:]
# partial_feature_train = feature_train[:3036]

array([[    0,     0,     0, ...,   181,   121,    32],
       [    0,     0,     0, ...,    44,    44,   117],
       [    0,     0,     0, ...,  1264, 58338, 58339],
       ...,
       [    0,     0,     0, ...,  2609,   590,  1538],
       [    0,     0,     0, ...,  1301,  1197,   115],
       [    0,     0,     0, ...,    91,    56,    50]])

In [11]:
tokenizer.word_index
import csv
with open('word_index.csv', 'w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(['word', 'index'])
    for word, index in tokenizer.word_index.items():
        writer.writerow([word, index])



In [12]:

# m_val = labelm_train[3036:]
# partial_m_train = labelm_train[:3036]

# e_val = labele_train[3036:]
# partial_e_train = labele_train[:3036]

# n_val = labeln_train[3036:]
# partial_n_train = labeln_train[:3036]

# t_val = labelt_train[3036:]
# partial_t_train = labelt_train[:3036]

In [13]:
def float_to_mind(float_results):
    res = []
    for num in float_results:
        if(num < 0.5):
            res.append("E")
        else:
            res.append("I")
    return res


def float_to_energy(float_results):
    res = []
    for num in float_results:
        if(num < 0.5):
            res.append("N")
        else:
            res.append("S")
    return res


def float_to_nature(float_results):
    res = []
    for num in float_results:
        if(num < 0.5):
            res.append("F")
        else:
            res.append("T")
    return res   

def float_to_tactics(float_results):
    res = []
    for num in float_results:
        if(num < 0.5):
            res.append("J")
        else:
            res.append("P")
    return res

In [14]:

mind = createModel()
history = mind.fit(feature,
                    label_mind,
                    epochs=40,
                    batch_size=512,
                     validation_split=0.2,
                    )

# tfjs.converters.save_keras_model(mind, "mind")

Epoch 1/40


ResourceExhaustedError: Graph execution error:

Detected at node 'Cast_19' defined at (most recent call last):
    File "c:\Users\taegi\anaconda3\envs\env38\lib\runpy.py", line 194, in _run_module_as_main
      return _run_code(code, main_globals, None,
    File "c:\Users\taegi\anaconda3\envs\env38\lib\runpy.py", line 87, in _run_code
      exec(code, run_globals)
    File "c:\Users\taegi\anaconda3\envs\env38\lib\site-packages\ipykernel_launcher.py", line 17, in <module>
      app.launch_new_instance()
    File "c:\Users\taegi\anaconda3\envs\env38\lib\site-packages\traitlets\config\application.py", line 982, in launch_instance
      app.start()
    File "c:\Users\taegi\anaconda3\envs\env38\lib\site-packages\ipykernel\kernelapp.py", line 712, in start
      self.io_loop.start()
    File "c:\Users\taegi\anaconda3\envs\env38\lib\site-packages\tornado\platform\asyncio.py", line 215, in start
      self.asyncio_loop.run_forever()
    File "c:\Users\taegi\anaconda3\envs\env38\lib\asyncio\base_events.py", line 570, in run_forever
      self._run_once()
    File "c:\Users\taegi\anaconda3\envs\env38\lib\asyncio\base_events.py", line 1859, in _run_once
      handle._run()
    File "c:\Users\taegi\anaconda3\envs\env38\lib\asyncio\events.py", line 81, in _run
      self._context.run(self._callback, *self._args)
    File "c:\Users\taegi\anaconda3\envs\env38\lib\site-packages\ipykernel\kernelbase.py", line 510, in dispatch_queue
      await self.process_one()
    File "c:\Users\taegi\anaconda3\envs\env38\lib\site-packages\ipykernel\kernelbase.py", line 499, in process_one
      await dispatch(*args)
    File "c:\Users\taegi\anaconda3\envs\env38\lib\site-packages\ipykernel\kernelbase.py", line 406, in dispatch_shell
      await result
    File "c:\Users\taegi\anaconda3\envs\env38\lib\site-packages\ipykernel\kernelbase.py", line 730, in execute_request
      reply_content = await reply_content
    File "c:\Users\taegi\anaconda3\envs\env38\lib\site-packages\ipykernel\ipkernel.py", line 383, in do_execute
      res = shell.run_cell(
    File "c:\Users\taegi\anaconda3\envs\env38\lib\site-packages\ipykernel\zmqshell.py", line 528, in run_cell
      return super().run_cell(*args, **kwargs)
    File "c:\Users\taegi\anaconda3\envs\env38\lib\site-packages\IPython\core\interactiveshell.py", line 2940, in run_cell
      result = self._run_cell(
    File "c:\Users\taegi\anaconda3\envs\env38\lib\site-packages\IPython\core\interactiveshell.py", line 2995, in _run_cell
      return runner(coro)
    File "c:\Users\taegi\anaconda3\envs\env38\lib\site-packages\IPython\core\async_helpers.py", line 129, in _pseudo_sync_runner
      coro.send(None)
    File "c:\Users\taegi\anaconda3\envs\env38\lib\site-packages\IPython\core\interactiveshell.py", line 3194, in run_cell_async
      has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
    File "c:\Users\taegi\anaconda3\envs\env38\lib\site-packages\IPython\core\interactiveshell.py", line 3373, in run_ast_nodes
      if await self.run_code(code, result, async_=asy):
    File "c:\Users\taegi\anaconda3\envs\env38\lib\site-packages\IPython\core\interactiveshell.py", line 3433, in run_code
      exec(code_obj, self.user_global_ns, self.user_ns)
    File "C:\Users\taegi\AppData\Local\Temp\ipykernel_23424\1944590311.py", line 2, in <module>
      history = mind.fit(feature,
    File "c:\Users\taegi\anaconda3\envs\env38\lib\site-packages\keras\utils\traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "c:\Users\taegi\anaconda3\envs\env38\lib\site-packages\keras\engine\training.py", line 1685, in fit
      tmp_logs = self.train_function(iterator)
    File "c:\Users\taegi\anaconda3\envs\env38\lib\site-packages\keras\engine\training.py", line 1284, in train_function
      return step_function(self, iterator)
    File "c:\Users\taegi\anaconda3\envs\env38\lib\site-packages\keras\engine\training.py", line 1268, in step_function
      outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "c:\Users\taegi\anaconda3\envs\env38\lib\site-packages\keras\engine\training.py", line 1249, in run_step
      outputs = model.train_step(data)
    File "c:\Users\taegi\anaconda3\envs\env38\lib\site-packages\keras\engine\training.py", line 1055, in train_step
      return self.compute_metrics(x, y, y_pred, sample_weight)
    File "c:\Users\taegi\anaconda3\envs\env38\lib\site-packages\keras\engine\training.py", line 1149, in compute_metrics
      self.compiled_metrics.update_state(y, y_pred, sample_weight)
    File "c:\Users\taegi\anaconda3\envs\env38\lib\site-packages\keras\engine\compile_utils.py", line 605, in update_state
      metric_obj.update_state(y_t, y_p, sample_weight=mask)
    File "c:\Users\taegi\anaconda3\envs\env38\lib\site-packages\keras\utils\metrics_utils.py", line 77, in decorated
      update_op = update_state_fn(*args, **kwargs)
    File "c:\Users\taegi\anaconda3\envs\env38\lib\site-packages\keras\metrics\base_metric.py", line 140, in update_state_fn
      return ag_update_state(*args, **kwargs)
    File "c:\Users\taegi\anaconda3\envs\env38\lib\site-packages\keras\metrics\base_metric.py", line 691, in update_state
      matches = ag_fn(y_true, y_pred, **self._fn_kwargs)
    File "c:\Users\taegi\anaconda3\envs\env38\lib\site-packages\keras\metrics\accuracy_metrics.py", line 395, in binary_accuracy
      metrics_utils.binary_matches(y_true, y_pred, threshold), axis=-1
    File "c:\Users\taegi\anaconda3\envs\env38\lib\site-packages\keras\utils\metrics_utils.py", line 934, in binary_matches
      return tf.cast(tf.equal(y_true, y_pred), backend.floatx())
Node: 'Cast_19'
OOM when allocating tensor with shape[512,907,512] and type float on /job:localhost/replica:0/task:0/device:CPU:0 by allocator cpu
	 [[{{node Cast_19}}]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info. This isn't available when running in Eager mode.
 [Op:__inference_train_function_1173]

In [None]:
energy = createModel()
history = energy.fit(feature,
                    label_energy,
                    epochs=40,
                    batch_size=512,
                    validation_split=0.2,
                   )
# tfjs.converters.save_keras_model(energy, "energy")

Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40


In [None]:
nature = createModel()
history = nature.fit(feature,
                    label_nature,
                    epochs=40,
                    batch_size=512,
                    validation_split=0.2,
                   )
# tfjs.converters.save_keras_model(model, "nature")

Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40


In [None]:
tactics = createModel()
history = tactics.fit(feature,
                    label_tactics,
                    epochs=40,
                    batch_size=512,
                    validation_split=0.2,
                   )
#tfjs.converters.save_keras_model(model, "tactics")


Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40


In [None]:

# Mind
res1 = mind.predict(feature_train)
m_minima = float((min(res1))[0])
m_maxima = float((max(res1))[0])
m_range = m_maxima-m_minima

# Energy
res1 = energy.predict(feature_train)
e_minima = float((min(res1))[0])
e_maxima = float((max(res1))[0])
e_range = e_maxima-e_minima

# Nature
res1 = nature.predict(feature_train)
n_minima = float((min(res1))[0])
n_maxima = float((max(res1))[0])
n_range = n_maxima-n_minima

# Tactics
res1 = tactics.predict(feature_train)
t_minima = float((min(res1))[0])
t_maxima = float((max(res1))[0])
t_range = t_maxima-t_minima



ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()

In [None]:

def predict(input_string):
  input_arr = []
  input_arr.append(input_string)

  # Mind
  results = mind.predict(input_arr)
  mind_res = (float_to_mind(normalize(results, m_range, m_minima)))[0]

  # Energy
  results = energy.predict(input_arr)
  energy_res = (float_to_energy(normalize(results, e_range, e_minima)))[0]
  
  # Nature
  results = nature.predict(input_arr)
  nature_res = (float_to_nature(normalize(results, n_range, n_minima)))[0]

  # Tactics
  results = tactics.predict(input_arr)
  tactics_res = (float_to_tactics(normalize(results, t_range, t_minima)))[0]

  return mind_res + energy_res + nature_res + tactics_res

In [None]:
results = predict(input())
print(results)
print(d[results])

# mind.save('mind.h5')
# energy.save('energy.h5')
# nature.save('nature.h5')
# tactics.save('tactics.h5')

In [None]:
# energy.save('./energys', save_format='tf')

# mind.save('./minds', save_format='tf')
# nature.save('./natures', save_format='tf')
# tactics.save('./tacticss', save_format='tf')




INFO:tensorflow:Assets written to: ./energys\assets


INFO:tensorflow:Assets written to: ./energys\assets


INFO:tensorflow:Assets written to: ./minds\assets


INFO:tensorflow:Assets written to: ./minds\assets


INFO:tensorflow:Assets written to: ./natures\assets


INFO:tensorflow:Assets written to: ./natures\assets


INFO:tensorflow:Assets written to: ./tacticss\assets


INFO:tensorflow:Assets written to: ./tacticss\assets
