In [28]:
#necessary
import pandas as pd
import numpy as np
import pickle
#plotting
import matplotlib.pyplot as plt
import seaborn as sns

#NPL
import re
from nltk.stem import WordNetLemmatizer
from sklearn.preprocessing import LabelEncoder
from nltk.corpus import stopwords
import nltk
nltk.download('stopwords')
nltk.download('wordnet')
#Ml
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from xgboost import XGBClassifier
from sklearn.linear_model import SGDClassifier
from sklearn.linear_model import LogisticRegression
import warnings
warnings.filterwarnings("ignore")

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


In [29]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [30]:
df2 = pd.read_csv('/content/drive/MyDrive/MBIT_data/mbti_1.csv')
data=df2.copy()

In [31]:
# MBTI unique types and binary translation dictionaries
unique_type_list = ['infj', 'entp', 'intp', 'intj', 'entj', 'enfj', 'infp', 'enfp', 'istp', 'isfp', 'isfj', 'istj', 'estp', 'esfp', 'estj', 'esfj']
b_Pers = {'I': 0, 'E': 1, 'N': 0, 'S': 1, 'F': 0, 'T': 1, 'J': 0, 'P': 1}
b_Pers_list = [{0: 'I', 1: 'E'}, {0: 'N', 1: 'S'}, {0: 'F', 1: 'T'}, {0: 'J', 1: 'P'}]

def translate_personality(personality):
    # Transform mbti to binary vector
    return [b_Pers[l] for l in personality]

# To show result output for personality prediction
def translate_back(personality):
    # Transform binary vector to mbti personality
    s = ""
    for i, l in enumerate(personality):
        s += b_Pers_list[i][l]
    return s


# Posts preprocessor

In [32]:
# Define stop words and lemmatizer
useless_words = set(stopwords.words('english'))
lemmatiser = WordNetLemmatizer()

def preprocess_posts(posts, remove_stop_words=True, remove_mbti_profiles=True):
    # Remove and clean comments
    temp = posts

    # Remove url links
    temp = re.sub('http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', ' ', temp)

    # Remove Non-words - keep only words
    temp = re.sub("[^a-zA-Z]", " ", temp)

    # Remove spaces > 1
    temp = re.sub(' +', ' ', temp).lower()

    # Remove multiple letter repeating words
    temp = re.sub(r'([a-z])\1{2,}[\s|\w]*', '', temp)

    # Remove stop words
    if remove_stop_words:
        temp = " ".join([lemmatiser.lemmatize(w) for w in temp.split(' ') if w not in useless_words])
    else:
        temp = " ".join([lemmatiser.lemmatize(w) for w in temp.split(' ')])

    # Remove MBTI personality words from posts
    if remove_mbti_profiles:
        for t in unique_type_list:
            temp = temp.replace(t, "")

    return temp


# MBIT to binary val transformer

In [33]:
def transform_mbti_to_binary(data):
    list_personality = []
    len_data = len(data)
    i = 0

    for row in data.iterrows():
        # Transform mbti to binary vector
        type_labelized = translate_personality(row[1].type)  # or use lab_encoder.transform([row[1].type])[0]
        list_personality.append(type_labelized)

    list_personality = np.array(list_personality)
    return list_personality

# To combine 2 functoins. Use for training

In [34]:
def combined_preprocessing(data, remove_stop_words=True, remove_mbti_profiles=True):
    list_posts = []
    len_data = len(data)
    i = 0

    for row in data.iterrows():
        # check code working
        # i+=1
        # if (i % 500 == 0 or i == 1 or i == len_data):
        #     print("%s of %s rows" % (i, len_data))

        # Preprocess posts
        temp = preprocess_posts(row[1].posts, remove_stop_words, remove_mbti_profiles)
        list_posts.append(temp)

    list_posts = np.array(list_posts)
    list_personality = transform_mbti_to_binary(data)

    return list_posts, list_personality


In [35]:
list_posts, list_personality = combined_preprocessing(data, remove_stop_words=True, remove_mbti_profiles=True)

print("Example:")
print("\nPost before preprocessing:\n\n", data.posts.iloc[0])
print("\nPost after preprocessing:\n\n", list_posts[0])
print("\nMBTI before preprocessing:\n\n", data.type.iloc[0])
print("\nMBTI after preprocessing:\n\n", list_personality[0])


Example:

Post before preprocessing:

 'http://www.youtube.com/watch?v=qsXHcwe3krw|||http://41.media.tumblr.com/tumblr_lfouy03PMA1qa1rooo1_500.jpg|||enfp and intj moments  https://www.youtube.com/watch?v=iz7lE1g4XM4  sportscenter not top ten plays  https://www.youtube.com/watch?v=uCdfze1etec  pranks|||What has been the most life-changing experience in your life?|||http://www.youtube.com/watch?v=vXZeYwwRDw8   http://www.youtube.com/watch?v=u8ejam5DP3E  On repeat for most of today.|||May the PerC Experience immerse you.|||The last thing my INFJ friend posted on his facebook before committing suicide the next day. Rest in peace~   http://vimeo.com/22842206|||Hello ENFJ7. Sorry to hear of your distress. It's only natural for a relationship to not be perfection all the time in every moment of existence. Try to figure the hard times as times of growth, as...|||84389  84390  http://wallpaperpassion.com/upload/23700/friendship-boy-and-girl-wallpaper.jpg  http://assets.dornob.com/wp-content/upl

# 👍

In [36]:
from sklearn.feature_extraction.text import TfidfVectorizer

# Define the vectorizer
tfidf_vectorizer = TfidfVectorizer(
    analyzer="word",
    max_features=1000,
    max_df=0.7,
    min_df=0.1
)

# Fit and transform the list_posts directly to a TF-IDF representation
print("Using TfidfVectorizer:")
X_tfidf = tfidf_vectorizer.fit_transform(list_posts)

# The shape of the TF-IDF matrix
print("Now the dataset size is as below")
print(X_tfidf.shape)

# Feature names
feature_names = tfidf_vectorizer.get_feature_names_out()
print("10 feature names can be seen below")
print(list(enumerate(feature_names[:10])))


Using TfidfVectorizer:
Now the dataset size is as below
(8675, 595)
10 feature names can be seen below
[(0, 'ability'), (1, 'able'), (2, 'absolutely'), (3, 'across'), (4, 'act'), (5, 'action'), (6, 'actually'), (7, 'add'), (8, 'advice'), (9, 'afraid')]


In [37]:
# Initialize personality types
personality_type = [ "IE: Introversion (I) / Extroversion (E)", "NS: Intuition (N) / Sensing (S)",
                   "FT: Feeling (F) / Thinking (T)", "JP: Judging (J) / Perceiving (P)"  ]

for l in range(len(personality_type)):
    print(personality_type[l])

IE: Introversion (I) / Extroversion (E)
NS: Intuition (N) / Sensing (S)
FT: Feeling (F) / Thinking (T)
JP: Judging (J) / Perceiving (P)


In [38]:
print("For MBTI personality type : %s" % translate_back(list_personality[2,:]))
print("Y : Binarized MBTI 1st row: %s" % list_personality[2,:])

For MBTI personality type : INTP
Y : Binarized MBTI 1st row: [0 0 1 1]


#👍

In [39]:
models = {}
X=X_tfidf.toarray()
# Individually training each MBTI personality type
for l in range(len(personality_type)):

    Y = list_personality[:, l]

    # Split data into train and test sets
    X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.33, random_state=7)

    # Fit model on training data
    model = LogisticRegression()
    model.fit(X_train, y_train)

    # Save the trained model to the dictionary
    models[personality_type[l]] = model

    # Make predictions for test data
    y_pred = model.predict(X_test)

    predictions = [round(value) for value in y_pred]

    # Evaluate predictions
    accuracy = accuracy_score(y_test, predictions)

    print("%s Accuracy: %.2f%%" % (personality_type[l], accuracy * 100.0))

IE: Introversion (I) / Extroversion (E) Accuracy: 77.54%
NS: Intuition (N) / Sensing (S) Accuracy: 86.06%
FT: Feeling (F) / Thinking (T) Accuracy: 72.44%
JP: Judging (J) / Perceiving (P) Accuracy: 64.51%


In [40]:
# Save the models to disk
with open('mbti_models.pkl', 'wb') as f:
    pickle.dump(models, f)

In [41]:
# Save the tfidf vectorizer to disk
with open('tfidf_vectorizer.pkl', 'wb') as f:
    pickle.dump(tfidf_vectorizer, f)

# You can start from here

In [42]:
import re
import numpy as np
import pickle
from sklearn.feature_extraction.text import TfidfVectorizer
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer

# Defining stop words and lemmatizer
useless_words = set(stopwords.words('english'))
lemmatiser = WordNetLemmatizer()

# Load the trained models
with open('../pkls/mbti_models.pkl', 'rb') as f:
    models = pickle.load(f)

# Load the trained TF-IDF vectorizer
with open('../pkls/tfidf_vectorizer.pkl', 'rb') as f:
    tfidf_vectorizer = pickle.load(f)

personality_type = [ "IE: Introversion (I) / Extroversion (E)", "NS: Intuition (N) / Sensing (S)",
                   "FT: Feeling (F) / Thinking (T)", "JP: Judging (J) / Perceiving (P)"  ]

unique_type_list = ['infj', 'entp', 'intp', 'intj', 'entj', 'enfj', 'infp', 'enfp', 'istp', 'isfp', 'isfj', 'istj', 'estp', 'esfp', 'estj', 'esfj']

In [43]:
def preprocess_posts(posts, remove_stop_words=True, remove_mbti_profiles=True):
    # Remove and clean comments
    temp = posts

    # Remove url links
    temp = re.sub('http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', ' ', temp)

    # Remove Non-words - keep only words
    temp = re.sub("[^a-zA-Z]", " ", temp)

    # Remove spaces > 1
    temp = re.sub(' +', ' ', temp).lower()

    # Remove multiple letter repeating words
    temp = re.sub(r'([a-z])\1{2,}[\s|\w]*', '', temp)

    # Remove stop words
    if remove_stop_words:
        temp = " ".join([lemmatiser.lemmatize(w) for w in temp.split(' ') if w not in useless_words])
    else:
        temp = " ".join([lemmatiser.lemmatize(w) for w in temp.split(' ')])

    # Remove MBTI personality words from posts
    if remove_mbti_profiles:
        for t in unique_type_list:
            temp = temp.replace(t, "")

    return temp

In [44]:
def predict_personality(input_text):
    # Preprocess the input text
    preprocessed_text = preprocess_posts(input_text)

    # Transform the preprocessed text using the loaded TF-IDF vectorizer
    transformed_text = tfidf_vectorizer.transform([preprocessed_text])

    # Dictionary to store predictions and their probabilities
    predictions = {}

    # Predict using the loaded models
    for personality in personality_type:
        model = models[personality]
        prediction = model.predict(transformed_text)[0]
        probability = model.predict_proba(transformed_text)[0]

        predictions[personality] = {
            'prediction': prediction,
            'probability': probability[1] if prediction == 1 else probability[0]
        }

    return predictions

In [55]:
input_text1 =""" They act like they care They tell me to share But when I carve the stories on my arm The doctor just calls it self harm I’m not asking for attention There’s a reason I have apprehensions I just need you to see What has become of me||| I know I’m going crazy But they think my thoughts are just hazy When in that chaos, in that confusion I’m crying out for help, to escape my delusions||| Mental health is a state of mind How does one keep that up when assistance is denied All my failed attempts to fight the blaze You treat it like its a passing phase||| Well stop, its not, because mental illness is real Understand that we’re all not made of steel Because when you brush these issues under the carpet You make it seem like its our mistake we’re not guarded||| Don’t you realise that its a problem that needs to be addressed Starting at home, in our nest Why do you keep your mouths shut about such things Instead of caring for those with broken wings||| What use is this social stigma When mental illness is not even such an enigma Look around and you’ll see the numbers of the affected hiding under the covers ||| This is an issue that needs to be discussed Not looked down upon with disgust Mental illness needs to be accepted So that people can be protected ||| Let me give you some direction People need affection The darkness must be escaped Only then the lost can be saved||| Bring in a change Something not very strange The new year is here Its time to eradicate fear||| Recognise the wrists under the knives To stop mental illness from taking more lives Let’s break the convention Start ‘suicide prevention’.||| Hoping the festival of lights drives the darkness of mental illness away"""


In [53]:
input_text2 = """ I dont think anyone would be able to live 300 years i am not talking about the physical ability to do so but the mental fortitude unless you decide to live away from civilization it simply is not possible.|||Believe me you would not want to live for that long alone , unless there are others who can live for 300 years as well.|||You cannot enjoy something if you say something to yourself like ‘I wanna enjoy this , i think this thing is gonna be fun’ believe me it doesn’t work.|||I think this problem might be face by a lot of people.|||Firstly you should only study stuff that interests you . (obvious)|||Now there are subjects that you school forces you to take and you have no option but to some how score in those subjects. (What i used to think is subjects like history , geography and most of all Hindi are utterly useless , i wanna be a programmer why do i study these)|||But because i had no choice i thought lets give these a try. I started questioning things and when i dug deep into the events of history and and why each event took place or how it was of benefit to the benefactor. This questioning and finding out the reasons made me like history.|||My point is unless ya’ll start questioning and researching further than whats necessary for exams you wont like that subject. All the subjects are beautiful , its what you choose to see. Basically give everything a real shot in life , everything works out. (my advice seems ironical as if you dont like the subject and i am telling you to research further but try it once )|||And also never study because there is exam or because you have to do an assignment or because someone is told you to or because ….|||But please ONLY STUDY BECAUSE YOU FEEL LIKE AND BECAUSE YOU WANT TO. Until you develop this sense of want to study it will be hard for you to like it. You must like it so much , that you know when people say after studying maths for 1 hour they took a break of 20 mins watching their favorite tv show(lets say friends)|||For you it should be like after 1 hour of x(that you dont like but HAVE to do) activity you take a break of 20 mins and you study , (like i like reading article on ai so i do that) you might like bio you will do that what i want to say is that is what it means to like something and only then you can truly enjoy it.|||If only something known as luck existed. (no offence to the readers or person who asked the question)|||Luck is a really interesting term , a really complex illusion. What i am saying is there is nothing known as luck that exists. Something simply doesnt just happen. It happens for a reason and with a reason.|||Some over here might claim that if it is not luck then what is it that cause (cause a child to be born in a rich family or a person to be saved by weirdest phenomenon and escape death.)|||What i want ya’ll to know is firstly that being born rich cannot be called as “lucky” like we cannot say to be born in a rich family is particularly a good thing there are many reasons to this (some people like to work their way up , Some want to experience the life troubles, well whatever the reasons might be) So firstly when we say something to be lucky we just CONSIDER that what happened was good. Same goes with the case of being saved from and awful accident. We still dont know the purpose of life or our existence and hence we dont know if living is a boon. this might be harsh for some but Reality is Harsh.|||What want ya’ll to know is never feel bad if something good(in your perspective) happens to someone as It might as well not turn out to be good if you see the BIG picture.|||Besides its also a good thing to think this way as its boosts up your hopes , like you might consider that everything that has happened to you has made you what you are and even if you don’t appreciate your conditions there is someone somewhere who would want to be in that.|||I think the all of us are 100% selfish. (no offence)|||The thing is even if we say we care about someone and then we help that person in reality we are just making ourselves happy by helping that someone.|||What i mean to say is even when people talk about sacrifices for others the reality is that sacrifice made that person or those persons happy which you cared for and thus those people being happy makes you happy.|||Everything comes down to you. You can try and deny it but you all know it.|||Now about those people who sacrifice their lives for others that is a peculiar case , and here too (this might be hard for some to believe) but they sacrifice life for someone they loved (they thought they loved) but the truth is in a situation where a person sacrifices his life for another the truth is that if he hadnt dont that he couldn’t have survived without that person anyways and then there are always some who seek glory.|||I hope you get the point. Even when you say that people spend 30s and 40s the truth is making their kids life perfect gives them happiness. There are people without kids too cause for these making their kids life perfect doesnt give them as much happiness as focusing on their own goals might.|||Now i believe there might be many who thought that making kids life perfect might give them happiness but it turns out to be false and then they are stuck there fulfilling moral obligations. It all comes down to your resolves and how firm you are in you decisions.|||Isn’t it fun to watch our disciples fight among themselves to prove that only one of us exists!|||I tend to believe that everything in this universe HAS TO HAVE A PURPOSE. Rather than thinking that the universe is a useless place and we have no purpose i would rather think we are just too stupid and dont know or cannot find the purpose. I have always wondered that what would be our reason to exist , once i thought of us (humans on earth) a crop created by aliens that takes this long to grow (i mean may be it would be not possible to create humans by a process other than evolution) so the aliens started the life on earth and are now just waiting for us to evolve , and so our purpose according to that is nothing but to serve as food or may be what ever they want , now then the question arises is what would their purpose be , And all such hypothetical situation lead to to scenario to go into infinity.|||After a lot of such crazy thought i came to 2 conclusions , 1 is pretty simple our purpose of life is find a purpose for our lives , and the weird thing is unlike other things once you find what your were looking for , the process ends there. The paradox is after you find your purpose of life did your succeed in finishing your purpose of life or did you just begun ?|||The 2nd one is what most people should agree is happiness. Now I think that this happiness should be confined to YOUR’s and ONLY your Happiness. Now the thing is some people gain happiness by giving people happiness.|||Let happiness be a quantifiable entity. We shall say that we start 0 oh hp . |||The zone where we dont feel happy or sad is 0 . Anything above 0 means you are happy and less than 0 is you are sad .|||Now one might think that if we suppose attain 100 hp , and do nothing after that we shall remain happy , the problem is that after a while our bar above which we remain moves up (simple adaptation) so now anything below 100hp is sad . This is the main problem with happiness , and So we need to keep doing stuff and increasing our hp . So maybe this counts as a purpose of life.|||Like when we are told about those saints and stuff who abandon society and live their life alone in discrete places where no one can disturb them. I believe the simple reason for this they have found happiness in doing nothing . Like someone finds happiness in making more money (there is a possibility that you might not make and hence be sad ) So these saint type people do something that has no opposite , like i know when they do nothing (i know its opposite is everything) and thats the very reason they go away from the civilization where they literally will have to do nothing . I also tend to think of these people as cowards who fear that they might loose at point.|||But the very point arises is what did you gain by gaining happiness. So again there is no end so Keep chasing the infinity its pointless but Keeps you busy(till the point you are alive) then after you are dead i guess nothing matters.|||But anyways thats the most easy thing to obtain happiness without sadness.|||But the very point arises is what did you gain by gaining happiness. So again there is no end so Keep chasing the infinity its pointless but Keeps you busy(till the point you are alive) then after you are dead i guess nothing matters.|||But if the life after death theory is true , and if by chance are memories are also transferred every time we are reborn. We all are fucked We are for infinity stuck chasing the infinity . (I wonder what happens when infinity chases infinity It will be fun to watch !) |||I think rather than worrying about these things we should just enjoy life . Because there will always something that we dont know that we dont know and thus we shall never know that. this is just 2 loops of not know , you can reach it , but Do it infinitely . Infinity is a bitch.|||Well To all beings good luck finding a purpose and to all those who know that it doesnt exist ya’ll are fucked."""

In [54]:
temp=[]
# Get the predictions
predictions = predict_personality(input_text2)
# Print the results
for personality, result in predictions.items():
    print(f"Personality Type: {personality}")
    print(f"Prediction: {(result['prediction'])}")
    temp.append(result['prediction'])
    print(f"Probability: {result['probability']:.2f}")
    print()
print(translate_back(temp))

Personality Type: IE: Introversion (I) / Extroversion (E)
Prediction: 0
Probability: 0.86

Personality Type: NS: Intuition (N) / Sensing (S)
Prediction: 0
Probability: 0.91

Personality Type: FT: Feeling (F) / Thinking (T)
Prediction: 1
Probability: 0.58

Personality Type: JP: Judging (J) / Perceiving (P)
Prediction: 0
Probability: 0.55

INTJ
