In [1]:
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import precision_score, f1_score
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from imblearn.over_sampling import SMOTE

Load data & Split training and testing data

In [2]:
Y=np.load('Y.npy')
X=np.load('X.npy')
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, stratify=Y,random_state=671)

In [3]:
personality_type = [ "IE: Introversion (I) | Extroversion (E)", "NS: Intuition    (N) | Sensing      (S)", 
                   "FT: Feeling      (F) | Thinking     (T)", "JP: Judging      (J) | Perceiving   (P)"  ]

Random Forest

In [4]:
for l in range(len(personality_type)):

    X_train, X_test, Y_train, Y_test = train_test_split(X, Y[:,l], test_size=0.2, stratify=Y[:,l],random_state=671)
    smote = SMOTE(sampling_strategy='auto', random_state=671)
    X_resampled, Y_resampled = smote.fit_resample(X_train, Y_train)
    
    # Define parameter grid
    param_grid = {
        'n_estimators': [100, 200, 500],
        'max_depth' : [4,5,6,7,8],
        'criterion' :['gini', 'entropy']
    }
    
    # Optimize random forests using grid search
    clf_rf = RandomForestClassifier(random_state=671)
    clf_rf_cv = GridSearchCV(clf_rf, param_grid, cv=5)
    clf_rf_cv.fit(X_resampled, Y_resampled)

    # Output optimal parameters
    print("Best parameters: ", clf_rf_cv.best_params_)

    # Predictions are made using models with optimal parameters
    Y_pred = clf_rf_cv.predict(X_test)
    
    predictions = [round(value) for value in Y_pred]
    # eva
    accuracy = accuracy_score(Y_test, predictions)
    
    print("%s Accuracy: %.2f%%" % (personality_type[l], accuracy * 100.0))
    print("%s Classification report for Test Data" % (personality_type[l]))
    print(classification_report(Y_test, Y_pred, zero_division=1))
    print("%s Confusion Matrix for Test Data" % (personality_type[l]))
    print(confusion_matrix(Y_test, predictions))
    print("\n")

Best parameters:  {'criterion': 'gini', 'max_depth': 8, 'n_estimators': 500}
IE: Introversion (I) | Extroversion (E) Accuracy: 70.95%
IE: Introversion (I) | Extroversion (E) Classification report for Test Data
              precision    recall  f1-score   support

           0       0.79      0.84      0.82      1335
           1       0.34      0.27      0.30       400

    accuracy                           0.71      1735
   macro avg       0.57      0.56      0.56      1735
weighted avg       0.69      0.71      0.70      1735

IE: Introversion (I) | Extroversion (E) Confusion Matrix for Test Data
[[1122  213]
 [ 291  109]]


Best parameters:  {'criterion': 'gini', 'max_depth': 8, 'n_estimators': 500}
NS: Intuition    (N) | Sensing      (S) Accuracy: 77.23%
NS: Intuition    (N) | Sensing      (S) Classification report for Test Data
              precision    recall  f1-score   support

           0       0.87      0.87      0.87      1496
           1       0.18      0.19      0.19 

SVM

In [5]:
for l in range(len(personality_type)):

    X_train, X_test, Y_train, Y_test = train_test_split(X, Y[:,l], test_size=0.2, stratify=Y[:,l],random_state=671)
    smote = SMOTE(sampling_strategy='auto', random_state=671)
    X_resampled, Y_resampled = smote.fit_resample(X_train, Y_train)
    
    # grid parameter
    param_grid = {
        'C': [0.1, 1, 10, 100],
        'gamma': [1, 0.1, 0.01, 0.001],
        'kernel': ['rbf', 'poly', 'sigmoid']
    }
    
    # Optimize SVM using grid search
    clf_svm = SVC(random_state=671)
    clf_svm_cv = GridSearchCV(clf_svm, param_grid, cv=5)
    clf_svm_cv.fit(X_resampled, Y_resampled)

    # Output optimal parameters
    print("Best parameters: ", clf_svm_cv.best_params_)

    # Predictions are made using models with optimal parameters
    Y_pred = clf_svm_cv.predict(X_test)
    
    predictions = [round(value) for value in Y_pred]
    # Evaluate results
    accuracy = accuracy_score(Y_test, predictions)
    
    print("%s Accuracy: %.2f%%" % (personality_type[l], accuracy * 100.0))
    print("%s Classification report for Test Data" % (personality_type[l]))
    print(classification_report(Y_test, Y_pred, zero_division=1))
    print("%s Confusion Matrix for Test Data" % (personality_type[l]))
    print(confusion_matrix(Y_test, predictions))
    print("\n")


Best parameters:  {'C': 100, 'gamma': 1, 'kernel': 'rbf'}
IE: Introversion (I) | Extroversion (E) Accuracy: 77.12%
IE: Introversion (I) | Extroversion (E) Classification report for Test Data
              precision    recall  f1-score   support

           0       0.79      0.95      0.86      1335
           1       0.51      0.18      0.27       400

    accuracy                           0.77      1735
   macro avg       0.65      0.56      0.57      1735
weighted avg       0.73      0.77      0.73      1735

IE: Introversion (I) | Extroversion (E) Confusion Matrix for Test Data
[[1266   69]
 [ 328   72]]


Best parameters:  {'C': 10, 'gamma': 1, 'kernel': 'rbf'}
NS: Intuition    (N) | Sensing      (S) Accuracy: 85.19%
NS: Intuition    (N) | Sensing      (S) Classification report for Test Data
              precision    recall  f1-score   support

           0       0.87      0.98      0.92      1496
           1       0.29      0.05      0.09       239

    accuracy                

MLP

In [8]:
from sklearn.neural_network import MLPClassifier
for l in range(len(personality_type)):

    X_train, X_test, Y_train, Y_test = train_test_split(X, Y[:,l], test_size=0.2, stratify=Y[:,l],random_state=671)
    smote = SMOTE(sampling_strategy='auto', random_state=671)
    X_resampled, Y_resampled = smote.fit_resample(X_train, Y_train)
    
    # Use MLP for training
    clf_mlp = MLPClassifier(max_iter=1000,  random_state=671)

    clf_mlp.fit(X_resampled, Y_resampled)
    

    # Make predictions on test data
    Y_pred = clf_mlp.predict(X_test)
    
    predictions = [round(value) for value in Y_pred]
    # evaluation
    accuracy = accuracy_score(Y_test, predictions)
    
    print("%s Accuracy: %.2f%%" % (personality_type[l], accuracy * 100.0))
    print("%s Classification report for Test Data" % (personality_type[l]))
    print(classification_report(Y_test, Y_pred, zero_division=1))
    print("%s Confusion Matrix for Test Data" % (personality_type[l]))
    print(confusion_matrix(Y_test, predictions))
    print("\n")

IE: Introversion (I) | Extroversion (E) Accuracy: 70.32%
IE: Introversion (I) | Extroversion (E) Classification report for Test Data
              precision    recall  f1-score   support

           0       0.81      0.80      0.81      1335
           1       0.36      0.37      0.36       400

    accuracy                           0.70      1735
   macro avg       0.58      0.59      0.59      1735
weighted avg       0.71      0.70      0.70      1735

IE: Introversion (I) | Extroversion (E) Confusion Matrix for Test Data
[[1072  263]
 [ 252  148]]


NS: Intuition    (N) | Sensing      (S) Accuracy: 77.35%
NS: Intuition    (N) | Sensing      (S) Classification report for Test Data
              precision    recall  f1-score   support

           0       0.87      0.86      0.87      1496
           1       0.20      0.21      0.21       239

    accuracy                           0.77      1735
   macro avg       0.54      0.54      0.54      1735
weighted avg       0.78      0.77  