# Dataset: Extrovert vs. Introvert Behavior Data

https://www.kaggle.com/datasets/rakeshkapilavai/extrovert-vs-introvert-behavior-data


In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler,LabelEncoder
import pickle

In [2]:
## Load the dataset
data=pd.read_csv("personality_dataset.csv")
data.head()

Unnamed: 0,Time_spent_Alone,Stage_fear,Social_event_attendance,Going_outside,Drained_after_socializing,Friends_circle_size,Post_frequency,Personality
0,4.0,No,4.0,6.0,No,13.0,5.0,Extrovert
1,9.0,Yes,0.0,0.0,Yes,0.0,3.0,Introvert
2,9.0,Yes,1.0,2.0,Yes,5.0,2.0,Introvert
3,0.0,No,6.0,7.0,No,14.0,8.0,Extrovert
4,3.0,No,9.0,4.0,No,8.0,5.0,Extrovert


In [3]:
## Encode categorical variables
label_encoder_stage=LabelEncoder()
data['Stage_fear'] = label_encoder_stage.fit_transform(data['Stage_fear'])
data

Unnamed: 0,Time_spent_Alone,Stage_fear,Social_event_attendance,Going_outside,Drained_after_socializing,Friends_circle_size,Post_frequency,Personality
0,4.0,0,4.0,6.0,No,13.0,5.0,Extrovert
1,9.0,1,0.0,0.0,Yes,0.0,3.0,Introvert
2,9.0,1,1.0,2.0,Yes,5.0,2.0,Introvert
3,0.0,0,6.0,7.0,No,14.0,8.0,Extrovert
4,3.0,0,9.0,4.0,No,8.0,5.0,Extrovert
...,...,...,...,...,...,...,...,...
2895,3.0,0,7.0,6.0,No,6.0,6.0,Extrovert
2896,3.0,0,8.0,3.0,No,14.0,9.0,Extrovert
2897,4.0,1,1.0,1.0,Yes,4.0,0.0,Introvert
2898,11.0,1,1.0,3.0,Yes,2.0,0.0,Introvert


In [4]:
## Encode categorical variables
label_encoder_drained=LabelEncoder()
data['Drained_after_socializing'] = label_encoder_drained.fit_transform(data['Drained_after_socializing'])
data

Unnamed: 0,Time_spent_Alone,Stage_fear,Social_event_attendance,Going_outside,Drained_after_socializing,Friends_circle_size,Post_frequency,Personality
0,4.0,0,4.0,6.0,0,13.0,5.0,Extrovert
1,9.0,1,0.0,0.0,1,0.0,3.0,Introvert
2,9.0,1,1.0,2.0,1,5.0,2.0,Introvert
3,0.0,0,6.0,7.0,0,14.0,8.0,Extrovert
4,3.0,0,9.0,4.0,0,8.0,5.0,Extrovert
...,...,...,...,...,...,...,...,...
2895,3.0,0,7.0,6.0,0,6.0,6.0,Extrovert
2896,3.0,0,8.0,3.0,0,14.0,9.0,Extrovert
2897,4.0,1,1.0,1.0,1,4.0,0.0,Introvert
2898,11.0,1,1.0,3.0,1,2.0,0.0,Introvert


In [5]:
## Save the encoders and scaler
with open('label_encoder_drained.pkl','wb') as file:
    pickle.dump(label_encoder_drained,file)

with open('label_encoder_stage.pkl','wb') as file:
    pickle.dump(label_encoder_stage,file)

In [6]:
print(data['Personality'].unique())

['Extrovert' 'Introvert']


In [7]:
data['Personality'] = data['Personality'].map({'Extrovert': 1, 'Introvert': 0})

In [8]:
## DiVide the dataset into indepent and dependent features
X=data.drop('Personality',axis=1)
y=data['Personality']

## Split the data in training and tetsing sets
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=40)

## Scale these features
scaler=StandardScaler()
X_train=scaler.fit_transform(X_train)
X_test=scaler.transform(X_test)


### ANN Implementation

In [9]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.callbacks import EarlyStopping
import datetime




In [10]:
(X_train.shape[1],)

(7,)

In [11]:
from scikeras.wrappers import KerasClassifier
from sklearn.model_selection import cross_val_score, StratifiedKFold
from tensorflow.keras.layers import Dense, Input

## Build the ANN Model
def create_model():
    model = Sequential()
    model.add(Input(shape=(7,))) 
    model.add(Dense(22, activation='relu'))
    model.add(Dense(42, activation='relu'))
    model.add(Dense(12, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

# Wrap the model
clf = KerasClassifier(
    model=create_model,
    epochs=200,
    callbacks=[EarlyStopping(patience=20)],
    verbose=1
)

In [12]:
import numpy as np
# Define cross-validation
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

# Run cross-validation
scores = cross_val_score(clf, X, y, cv=cv, scoring='accuracy', error_score= 'raise')

print("CV Accuracy scores:", scores)
print("Mean CV Accuracy:", np.mean(scores))



Epoch 1/200


Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoc

In [13]:
create_model().save('model.keras')

In [17]:
clf.fit(X_train, y_train)

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78

0,1,2
,model,<function cre...002524F1BC700>
,build_fn,
,warm_start,False
,random_state,
,optimizer,'rmsprop'
,loss,
,metrics,
,batch_size,
,validation_batch_size,
,verbose,1


In [18]:
clf.model_.save("model.keras")
