In [45]:
import numpy as np 
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score,confusion_matrix,classification_report
import warnings
warnings.filterwarnings('ignore')
import category_encoders as ce
accuracy=[]
model=[]

In [46]:
ch_data=pd.read_csv('psyc.csv')

In [47]:
ch_data.head()

Unnamed: 0,gender,age,openness,neuroticism,conscientiousness,agreeableness,extraversion,Personality
0,Female,20,7,9,9,5,5,dependable
1,Male,17,5,4,5,2,4,serious
2,Female,25,5,5,7,2,4,serious
3,Female,18,6,2,7,4,7,serious
4,Female,19,2,4,7,1,3,responsible


In [48]:
ch_data.describe()

Unnamed: 0,age,openness,neuroticism,conscientiousness,agreeableness,extraversion
count,315.0,315.0,315.0,315.0,315.0,315.0
mean,20.244444,4.850794,4.584127,4.812698,4.844444,4.926984
std,2.616811,1.537211,1.818623,1.786315,1.718555,1.466527
min,5.0,1.0,1.0,1.0,1.0,1.0
25%,18.0,4.0,3.0,4.0,4.0,4.0
50%,20.0,5.0,5.0,5.0,5.0,5.0
75%,22.0,6.0,6.0,6.0,6.0,6.0
max,28.0,8.0,9.0,9.0,8.0,8.0


In [49]:
ch_data.shape

(315, 8)

In [6]:
ch_data.nunique()

gender                 2
age                   13
openness               8
neuroticism            9
conscientiousness      9
agreeableness          8
extraversion           8
Personality            5
dtype: int64

In [7]:
ch_data.isna().sum()

gender                0
age                   0
openness              0
neuroticism           0
conscientiousness     0
agreeableness         0
extraversion          0
Personality           0
dtype: int64

In [8]:
ch_data.head()

Unnamed: 0,gender,age,openness,neuroticism,conscientiousness,agreeableness,extraversion,Personality
0,Female,20,7,9,9,5,5,dependable
1,Male,17,5,4,5,2,4,serious
2,Female,25,5,5,7,2,4,serious
3,Female,18,6,2,7,4,7,serious
4,Female,19,2,4,7,1,3,responsible


In [9]:
from sklearn.preprocessing import LabelEncoder
disorder_column = ch_data['Personality']
label_encoder = LabelEncoder()
encoded_values = label_encoder.fit_transform(disorder_column)
label_mapping = dict(zip(label_encoder.classes_, label_encoder.transform(label_encoder.classes_)))
ch_data['Personality'] = encoded_values

In [10]:
ch_data.dtypes

gender                object
age                    int64
openness               int64
neuroticism            int64
conscientiousness      int64
agreeableness          int64
extraversion           int64
Personality            int64
dtype: object

In [11]:
label_mapping

{'dependable': 0,
 'extraverted': 1,
 'lively': 2,
 'responsible': 3,
 'serious': 4}

In [12]:
encoder=ce.OrdinalEncoder(["gender"])

In [13]:
ch_data=encoder.fit_transform(ch_data)

In [14]:
ch_data

Unnamed: 0,gender,age,openness,neuroticism,conscientiousness,agreeableness,extraversion,Personality
0,1,20,7,9,9,5,5,0
1,2,17,5,4,5,2,4,4
2,1,25,5,5,7,2,4,4
3,1,18,6,2,7,4,7,4
4,1,19,2,4,7,1,3,3
...,...,...,...,...,...,...,...,...
310,1,19,6,5,6,4,3,1
311,2,18,2,5,8,3,7,0
312,2,18,7,5,6,2,7,4
313,2,23,6,7,5,4,3,1


In [15]:
corr=ch_data.corr()

In [16]:
corr["Personality"].sort_values()

neuroticism          -0.347764
openness             -0.188905
age                  -0.038495
agreeableness         0.019730
conscientiousness     0.132049
extraversion          0.222841
gender                0.276026
Personality           1.000000
Name: Personality, dtype: float64

In [17]:
data_train,data_test=train_test_split(ch_data,test_size=0.2,random_state=123)

In [18]:
data_train

Unnamed: 0,gender,age,openness,neuroticism,conscientiousness,agreeableness,extraversion,Personality
206,2,18,6,3,6,6,6,4
239,1,21,4,5,7,7,5,4
53,2,24,4,5,5,6,2,2
22,1,20,4,5,6,5,5,4
265,2,20,5,5,3,4,3,4
...,...,...,...,...,...,...,...,...
106,1,22,6,2,1,6,6,4
83,1,26,5,6,5,7,3,1
17,2,18,5,3,5,6,7,4
230,1,20,2,6,5,5,3,1


In [19]:
data_test

Unnamed: 0,gender,age,openness,neuroticism,conscientiousness,agreeableness,extraversion,Personality
42,1,22,3,6,6,1,6,4
11,2,19,4,4,4,4,3,3
94,1,20,6,2,5,6,7,4
114,2,25,5,5,6,6,5,1
266,1,18,5,4,4,6,6,4
...,...,...,...,...,...,...,...,...
75,1,19,4,6,5,4,4,1
306,1,17,6,3,6,4,6,4
30,2,19,4,4,6,4,3,4
19,1,23,5,4,4,5,4,1


In [20]:
X_train=data_train.drop(["Personality"],axis=1)
y_train=data_train["Personality"]
X_test=data_test.drop(["Personality"],axis=1)
y_test=data_test["Personality"]

In [21]:
knn=KNeighborsClassifier(n_neighbors=11,weights="distance",algorithm="kd_tree")
knn.fit(X_train,y_train)
y_knn_p=knn.predict(X_test)
print(accuracy_score(y_test,y_knn_p))
print(confusion_matrix(y_test,y_knn_p))


0.6507936507936508
[[ 0  0  0  0  2]
 [ 0 14  0  0  6]
 [ 0  0  0  0  3]
 [ 0  4  0  1  6]
 [ 0  1  0  0 26]]


In [22]:
svc=SVC(kernel="poly",C=4,gamma='scale')
svc.fit(X_train,y_train)
y_svc_p=svc.predict(X_test)
print(accuracy_score(y_test,y_svc_p))
print(confusion_matrix(y_test,y_svc_p))


0.8095238095238095
[[ 0  0  0  0  2]
 [ 0 17  0  1  2]
 [ 0  1  2  0  0]
 [ 0  3  0  5  3]
 [ 0  0  0  0 27]]


In [23]:
nb=MultinomialNB()
nb.fit(X_train,y_train)
y_nb_p=nb.predict(X_test)
print(accuracy_score(y_test,y_nb_p))
print(confusion_matrix(y_test,y_nb_p))


0.5396825396825397
[[ 0  0  0  0  2]
 [ 0  7  0  0 13]
 [ 0  0  0  0  3]
 [ 0  2  0  0  9]
 [ 0  0  0  0 27]]


In [24]:
dt=DecisionTreeClassifier()
dt.fit(X_train,y_train)
y_p=dt.predict(X_test)
print(accuracy_score(y_test,y_p))
print(confusion_matrix(y_test,y_p))

0.5238095238095238
[[ 0  0  0  0  2]
 [ 3 11  2  1  3]
 [ 0  0  1  1  1]
 [ 0  1  1  4  5]
 [ 2  6  1  1 17]]


In [25]:
from sklearn.ensemble import RandomForestClassifier
rfc=RandomForestClassifier()
rfc.fit(X_train,y_train)
y_p1=rfc.predict(X_test)
print(accuracy_score(y_test,y_p1))
print(confusion_matrix(y_test,y_p1))

0.6825396825396826
[[ 0  0  0  0  2]
 [ 0 15  1  0  4]
 [ 0  0  1  1  1]
 [ 0  4  0  1  6]
 [ 0  1  0  0 26]]


In [26]:
from sklearn.model_selection import GridSearchCV

In [27]:
svm_model = SVC()

# Define the parameter grid to search
param_grid = {
    'C': [0.1, 1, 10],
    'kernel': ['linear', 'rbf', 'poly'],
    'gamma': ['scale', 'auto']
}

# Create a GridSearchCV object
grid_search = GridSearchCV(estimator=svm_model, param_grid=param_grid, cv=5)

# Fit the model with different hyperparameter combinations
grid_search.fit(X_train, y_train)

# Get the best parameters and the corresponding model
best_params = grid_search.best_params_
best_model = grid_search.best_estimator_

# Predictions on the test set using the best model
y_pred = best_model.predict(X_test)

# Evaluate the performance
accuracy = accuracy_score(y_test, y_pred)
print(f'Best Parameters: {best_params}')
print(f'Accuracy on Test Set: {accuracy:.2f}')


Best Parameters: {'C': 10, 'gamma': 'scale', 'kernel': 'linear'}
Accuracy on Test Set: 0.75


In [28]:
dt_classifier = DecisionTreeClassifier()

# Define the parameter grid to search
param_grid = {
    'criterion': ['gini', 'entropy'],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

# Create a GridSearchCV object
grid_search = GridSearchCV(estimator=dt_classifier, param_grid=param_grid, cv=5, scoring='accuracy')

# Fit the model with different hyperparameter combinations
grid_search.fit(X_train, y_train)

# Get the best parameters and the corresponding model
best_params = grid_search.best_params_
best_model = grid_search.best_estimator_

# Predictions on the test set using the best model
y_pred = best_model.predict(X_test)

# Evaluate the performance
accuracy = accuracy_score(y_test, y_pred)
print(f'Best Parameters: {best_params}')
print(f'Accuracy on Test Set: {accuracy:.2f}')

Best Parameters: {'criterion': 'gini', 'max_depth': 30, 'min_samples_leaf': 2, 'min_samples_split': 10}
Accuracy on Test Set: 0.60


In [29]:
import streamlit as st

In [30]:
st.title('Machine Learning Model Deployment')
st.sidebar.header('User Input Features')

2023-12-30 17:59:12.486 
  command:

    streamlit run /Users/namanmuktha/anaconda3/lib/python3.11/site-packages/ipykernel_launcher.py [ARGUMENTS]


DeltaGenerator(_root_container=1, _parent=DeltaGenerator())

In [31]:
feature1 = st.sidebar.slider('gender ', min_value=1, max_value=2, value=1)
feature2 = st.sidebar.slider('age ', min_value=2, max_value=100, value=50)
feature3 = st.sidebar.slider('openness ', min_value=1, max_value=10, value=3)
feature4 = st.sidebar.slider('neuroticism ', min_value=1, max_value=10, value=3)
feature5=st.sidebar.slider('conscientiousness ', min_value=1, max_value=10, value=3)
feature6=st.sidebar.slider('agreeableness ', min_value=1, max_value=10, value=3)
feature7=st.sidebar.slider('extraversion ', min_value=1, max_value=10, value=3)

In [32]:
X_train.columns

Index(['gender ', 'age ', 'openness ', 'neuroticism ', 'conscientiousness ',
       'agreeableness ', 'extraversion '],
      dtype='object')

In [40]:
input_data = pd.DataFrame({
    'gender ': [feature1],
    'age ': [feature2],
    'openness ': [feature3],
    'neuroticism ': [feature4],
    'conscientiousness ': [feature5],
    'agreeableness ': [feature6],
    'extraversion ': [feature7]
})

In [41]:
prediction=svc.predict(input_data)

In [42]:
st.write(prediction[0])

In [43]:
!streamlit run c.py


Usage: streamlit run [OPTIONS] TARGET [ARGS]...
Try 'streamlit run --help' for help.

Error: Invalid value: File does not exist: app.py


In [44]:
!streamlit run --server.enableCORS=false --server.port=8501 character.ipynb


2023-12-30 18:08:53.638 
As a result, 'server.enableCORS' is being overridden to 'true'.

More information:
In order to protect against CSRF attacks, we send a cookie with each request.
To do so, we must specify allowable origins, which places a restriction on
cross-origin resource sharing.

If cross origin resource sharing is required, please disable server.enableXsrfProtection.
            
Usage: streamlit run [OPTIONS] TARGET [ARGS]...
Try 'streamlit run --help' for help.

Error: Streamlit requires raw Python (.py) files, not .ipynb.
For more information, please see https://docs.streamlit.io
