In [1]:
## Importing Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error,mean_squared_error,r2_score,accuracy_score,confusion_matrix,classification_report
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LinearRegression,LogisticRegression
from sklearn.ensemble import RandomForestRegressor,RandomForestClassifier
from sklearn.neighbors import KNeighborsRegressor,KNeighborsClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import make_pipeline
from sklearn.compose import make_column_selector as selector
from sklearn.ensemble import VotingRegressor, VotingClassifier
from sklearn.multioutput import MultiOutputClassifier

In [2]:
## Importing Dataset
df = pd.read_csv('Myers Briggs Table_S1.csv')
df.head()

Unnamed: 0,S No,AGE,HEIGHT,WEIGHT,SEX,ACTIVITY LEVEL,PAIN 1,PAIN 2,PAIN 3,PAIN 4,MBTI,E,I,S,N,T,F,J,P,POSTURE
0,1,53,62,125,Female,Low,0.0,0.0,0.0,0.0,ESFJ,18,3,17,9,9,13,18,4,A
1,2,52,69,157,Male,High,7.0,8.0,5.0,3.0,ISTJ,6,15,14,12,21,3,13,9,B
2,3,30,69,200,Male,High,0.0,0.0,0.0,0.0,ESTJ,15,6,16,10,15,9,12,10,A
3,4,51,66,175,Male,Moderate,9.5,9.5,9.5,1.5,ISTJ,6,15,21,5,13,11,19,3,D
4,5,45,63,199,Female,Moderate,4.0,5.0,2.0,2.0,ENFJ,14,7,20,6,9,15,16,6,A


In [3]:
## copying data set and droping unwanted columns
df1 = df.copy()
df1.drop(['S No','AGE','HEIGHT','WEIGHT','ACTIVITY LEVEL'],axis=1,inplace=True)
df1.head()

Unnamed: 0,SEX,PAIN 1,PAIN 2,PAIN 3,PAIN 4,MBTI,E,I,S,N,T,F,J,P,POSTURE
0,Female,0.0,0.0,0.0,0.0,ESFJ,18,3,17,9,9,13,18,4,A
1,Male,7.0,8.0,5.0,3.0,ISTJ,6,15,14,12,21,3,13,9,B
2,Male,0.0,0.0,0.0,0.0,ESTJ,15,6,16,10,15,9,12,10,A
3,Male,9.5,9.5,9.5,1.5,ISTJ,6,15,21,5,13,11,19,3,D
4,Female,4.0,5.0,2.0,2.0,ENFJ,14,7,20,6,9,15,16,6,A


In [4]:
## Label Encoding
le = LabelEncoder()
df1['SEX'] = le.fit_transform(df1['SEX'])

df1.head()

Unnamed: 0,SEX,PAIN 1,PAIN 2,PAIN 3,PAIN 4,MBTI,E,I,S,N,T,F,J,P,POSTURE
0,0,0.0,0.0,0.0,0.0,ESFJ,18,3,17,9,9,13,18,4,A
1,1,7.0,8.0,5.0,3.0,ISTJ,6,15,14,12,21,3,13,9,B
2,1,0.0,0.0,0.0,0.0,ESTJ,15,6,16,10,15,9,12,10,A
3,1,9.5,9.5,9.5,1.5,ISTJ,6,15,21,5,13,11,19,3,D
4,0,4.0,5.0,2.0,2.0,ENFJ,14,7,20,6,9,15,16,6,A


In [5]:
## splitting data into X and y
Xr = df1[['SEX','E','I','N','S','T','F','J','P']]
yr = df1[['PAIN 1','PAIN 2','PAIN 3','PAIN 4']]


In [6]:
Xc = df1[['E','I','N','S','T','F','J','P']]
yc = df1[['MBTI','POSTURE']]

In [7]:
E=np.std(df1['E'])
I=np.std(df1['I'])
N=np.std(df1['N'])
S=np.std(df1['S'])
T=np.std(df1['T'])
F=np.std(df1['F'])
J=np.std(df1['J'])
P=np.std(df1['P'])


In [8]:
dataset_b=[]
for _,row in df1.iterrows():
        temp={
            'E':row['E'],
            'I':row['I'],
            'N':row['N'],
            'S':row['S'],
            'T':row['T'],
            'F':row['F'],
            'J':row['J'],
            'P':row['P'],
            'MBTI':row['MBTI'],
            'POSTURE':row['POSTURE']
            }
        dataset_b.append(temp)
for _ in range(1000):
    for _,row in df1.iterrows():
        temp={
                'E':row['E']+np.random.normal(E),
                'I':row['I']+np.random.normal(I),
                'N':row['N']+np.random.normal(N),
                'S':row['S']+np.random.normal(S),
                'T':row['T']+np.random.normal(T),
                'F':row['F']+np.random.normal(F),
                'J':row['J']+np.random.normal(J),
                'P':row['P']+np.random.normal(P),
                'MBTI':row['MBTI'],
                'POSTURE':row['POSTURE']
                }
        dataset_b.append(temp)
df2=pd.DataFrame(dataset_b)
df2.head()

Unnamed: 0,E,I,N,S,T,F,J,P,MBTI,POSTURE
0,18.0,3.0,9.0,17.0,9.0,13.0,18.0,4.0,ESFJ,A
1,6.0,15.0,12.0,14.0,21.0,3.0,13.0,9.0,ISTJ,B
2,15.0,6.0,10.0,16.0,15.0,9.0,12.0,10.0,ESTJ,A
3,6.0,15.0,5.0,21.0,13.0,11.0,19.0,3.0,ISTJ,D
4,14.0,7.0,6.0,20.0,9.0,15.0,16.0,6.0,ENFJ,A


In [9]:
## splitting data into X and y
Xc = df2[['E','I','N','S','T','F','J','P']]
yc = df2[['MBTI','POSTURE']]

In [10]:
x_orignal = df1[['E','I','N','S','T','F','J','P']]
y_orignal = df1[['MBTI','POSTURE']]

In [11]:
## splitting data into train and test
x_orignal_train,x_orignal_test,y_orignal_train,y_orignal_test = train_test_split(x_orignal,y_orignal,test_size=0.2,random_state= 0)

In [12]:
Xc_train,Xc_test,yc_train,yc_test = train_test_split(Xc,yc,test_size=0.2,random_state=0)

In [15]:
rf = MultiOutputClassifier(RandomForestClassifier())


In [16]:
rf.fit(Xc_train,yc_train)
rf.predict(Xc_test)
rf.score(Xc_test,yc_test)

0.9750772399588054

In [18]:
rf.score(x_orignal_test,y_orignal_test)

0.9