In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

In [2]:
from sklearn.metrics import classification_report
from sklearn.preprocessing import LabelEncoder
from sklearn.neural_network import MLPClassifier
import warnings
warnings.filterwarnings ("ignore")

In [3]:
# read training data
data = pd.read_csv('panic_disorder_dataset_training.csv')
data.head()
train_data_set = pd.DataFrame(data)
train_data_set.head()

Unnamed: 0,Participant ID,Age,Gender,Family History,Personal History,Current Stressors,Symptoms,Severity,Impact on Life,Demographics,Medical History,Psychiatric History,Substance Use,Coping Mechanisms,Social Support,Lifestyle Factors,Panic Disorder Diagnosis
0,1,38,Male,No,Yes,Moderate,Shortness of breath,Mild,Mild,Rural,Diabetes,Bipolar disorder,,Socializing,High,Sleep quality,0
1,2,51,Male,No,No,High,Panic attacks,Mild,Mild,Urban,Asthma,Anxiety disorder,Drugs,Exercise,High,Sleep quality,0
2,3,32,Female,Yes,No,High,Panic attacks,Mild,Significant,Urban,Diabetes,Depressive disorder,,Seeking therapy,Moderate,Exercise,0
3,4,64,Female,No,No,Moderate,Chest pain,Moderate,Moderate,Rural,Diabetes,,,Meditation,High,Exercise,0
4,5,31,Male,Yes,No,Moderate,Panic attacks,Mild,Moderate,Rural,Asthma,,Drugs,Seeking therapy,Low,Sleep quality,0


In [4]:
# read test data
data = pd.read_csv('panic_disorder_dataset_testing.csv')
test_data_set = pd.DataFrame(data)
test_data_set.head()

Unnamed: 0,Participant ID,Age,Gender,Family History,Personal History,Current Stressors,Symptoms,Severity,Impact on Life,Demographics,Medical History,Psychiatric History,Substance Use,Coping Mechanisms,Social Support,Lifestyle Factors,Panic Disorder Diagnosis
0,1,41,Male,Yes,No,High,Shortness of breath,Mild,Mild,Urban,Diabetes,Bipolar disorder,Alcohol,Seeking therapy,Low,Exercise,0
1,2,20,Female,Yes,No,Low,Shortness of breath,Mild,Significant,Urban,Asthma,Anxiety disorder,Drugs,Exercise,High,Diet,0
2,3,32,Male,Yes,Yes,High,Panic attacks,Severe,Mild,Rural,Heart disease,Bipolar disorder,Drugs,Meditation,Moderate,Exercise,0
3,4,41,Female,Yes,Yes,Moderate,Shortness of breath,Moderate,Significant,Urban,Heart disease,Anxiety disorder,,Exercise,High,Sleep quality,0
4,5,36,Female,Yes,No,High,Chest pain,Severe,Significant,Rural,Asthma,Depressive disorder,,Seeking therapy,Low,Exercise,0


In [5]:
#strore of the data except xAttack of training dataset
train_x=train_data_set.drop(['Panic Disorder Diagnosis','Participant ID'],axis=1)
#store xAttack columns data of training dataset
train_y=train_data_set['Panic Disorder Diagnosis']
#type casting to integer
train_y =train_y.astype('int')
#strore of the data except xAttack of test dataset
test_x=test_data_set.drop(['Panic Disorder Diagnosis','Participant ID'],axis=1)
#store xAttack columns data of testing dataset
test_y=test_data_set['Panic Disorder Diagnosis']
#type casting to integer
test_y =test_y.astype('int')

In [6]:
label_encoder = LabelEncoder()
# Iterate over each column in the dataset
for column in train_x.columns:
    # Check if the column contains string values
    if train_x[column].dtype == "object":
        # Fit label encoder on the column
        label_encoder.fit(train_x[column])
        # Transform the string values to integers
        train_x[column] = label_encoder.transform(train_x[column])
for column in test_x.columns:
    # Check if the column contains string values
    if test_x[column].dtype == "object":
        # Fit label encoder on the column
        label_encoder.fit(test_x[column])
        # Transform the string values to integers
        test_x[column] = label_encoder.transform(test_x[column])

In [7]:
#preprocessing using StandardScaler
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
#scaling of training dataset
X_train_scaled = scaler.fit_transform(train_x)
X_test_scaled = scaler.fit_transform(test_x)

## AdaBoostClassifier

In [9]:
from sklearn.ensemble import AdaBoostClassifier
from sklearn import metrics
abc = AdaBoostClassifier(n_estimators=100, learning_rate=0.1)
ada_model = abc.fit(X_train_scaled, train_y)
y_pred = ada_model.predict(X_test_scaled)
print("Accuracy:",metrics.accuracy_score(test_y, y_pred))
print('Classification Report:')
print(classification_report(test_y,y_pred))

Accuracy: 0.96205
Classification Report:
              precision    recall  f1-score   support

           0       0.96      1.00      0.98     19159
           1       0.70      0.17      0.28       841

    accuracy                           0.96     20000
   macro avg       0.83      0.58      0.63     20000
weighted avg       0.95      0.96      0.95     20000



In [10]:
from sklearn.metrics import confusion_matrix, precision_score, recall_score, f1_score, cohen_kappa_score, accuracy_score, classification_report

# Confusion matrix
cm=confusion_matrix(test_y, y_pred.round())
print(cm)

[[19096    63]
 [  696   145]]


## Random Forest

In [12]:
#random forest classifier using SKlearn module
from sklearn.ensemble import RandomForestClassifier
rf = RandomForestClassifier(n_estimators=100, random_state=0)
#fittness of the model on training dataset
rf.fit(train_x, train_y)
print("Accuracy on training set: {:.3f}".format(rf.score(train_x, train_y)))
print("Accuracy on test set: {:.3f}".format(rf.score(test_x, test_y)))
print('Classification Report:')
print(classification_report(test_y,y_pred))
print('Confusion matrix:')
cm=confusion_matrix(test_y, y_pred.round())
print(cm)

Accuracy on training set: 1.000
Accuracy on test set: 0.998
Classification Report:
              precision    recall  f1-score   support

           0       0.96      1.00      0.98     19159
           1       0.70      0.17      0.28       841

    accuracy                           0.96     20000
   macro avg       0.83      0.58      0.63     20000
weighted avg       0.95      0.96      0.95     20000

Confusion matrix:
[[19096    63]
 [  696   145]]


## Decision Tree

In [14]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
dt = DecisionTreeClassifier()
sv = SVC(kernel ='poly', degree=2)

In [15]:
# Apply Decision Tree on dataset and good the best Scores
dt.fit(X_train_scaled, train_y)
print("Accuracy on training set: {:.3f}".format(dt.score(X_train_scaled, train_y)))
print("Accuracy on test set: {:.3f}".format(dt.score(X_test_scaled, test_y)))
y_pred = dt.predict(X_test_scaled)
print('Classification Report:')
print(classification_report(test_y,y_pred))
print('Confusion matrix:')
cm=confusion_matrix(test_y, y_pred.round())
print(cm)

Accuracy on training set: 1.000
Accuracy on test set: 1.000
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     19159
           1       1.00      0.99      1.00       841

    accuracy                           1.00     20000
   macro avg       1.00      1.00      1.00     20000
weighted avg       1.00      1.00      1.00     20000

Confusion matrix:
[[19157     2]
 [    5   836]]


## SVM

In [17]:
# Apply support vector machine
sv.fit(X_train_scaled, train_y)
print("Accuracy on training set: {:.3f}".format(sv.score(X_train_scaled, train_y)))
print("Accuracy on test set: {:.3f}".format(sv.score(X_test_scaled, test_y)))

y_pred = sv.predict(X_test_scaled)
print('Classification Report:')
print(classification_report(test_y,y_pred))
print('Confusion matrix:')
cm=confusion_matrix(test_y, y_pred.round())
print(cm)

Accuracy on training set: 0.957
Accuracy on test set: 0.958
Classification Report:
              precision    recall  f1-score   support

           0       0.96      1.00      0.98     19159
           1       0.00      0.00      0.00       841

    accuracy                           0.96     20000
   macro avg       0.48      0.50      0.49     20000
weighted avg       0.92      0.96      0.94     20000

Confusion matrix:
[[19159     0]
 [  841     0]]


## KNN

In [19]:
# Apply KNN for Pridctionon dataset
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier()
knn.fit(X_train_scaled, train_y)
y_pred=knn.predict(X_test_scaled)
print('Classification Report:')
print(classification_report(test_y,y_pred))
print('Confusion matrix:')
cm=confusion_matrix(test_y, y_pred.round())
print(cm)

Classification Report:
              precision    recall  f1-score   support

           0       0.98      0.99      0.99     19159
           1       0.80      0.47      0.60       841

    accuracy                           0.97     20000
   macro avg       0.89      0.73      0.79     20000
weighted avg       0.97      0.97      0.97     20000

Confusion matrix:
[[19060    99]
 [  442   399]]


## Naive Bays

In [21]:
# Apply Naive Bays on same dataset
from sklearn.naive_bayes import GaussianNB
gnb = GaussianNB()
gnb.fit(X_train_scaled, train_y)
print("Accuracy on training set: {:.3f}".format(gnb.score(X_train_scaled, train_y)))
print("Accuracy on test set: {:.3f}".format(gnb.score(X_test_scaled, test_y)))
y_pred=gnb.predict(X_test_scaled)
print('Classification Report:')
print(classification_report(test_y,y_pred))
print('Confusion matrix:')
cm=confusion_matrix(test_y, y_pred.round())
print(cm)

Accuracy on training set: 0.712
Accuracy on test set: 0.958
Classification Report:
              precision    recall  f1-score   support

           0       0.96      1.00      0.98     19159
           1       0.00      0.00      0.00       841

    accuracy                           0.96     20000
   macro avg       0.48      0.50      0.49     20000
weighted avg       0.92      0.96      0.94     20000

Confusion matrix:
[[19159     0]
 [  841     0]]


## feed forward deep neural network

In [23]:
# Apply feed forward Deep neural network 
mlp = MLPClassifier(solver='sgd', hidden_layer_sizes=(45,30,15),activation='relu', random_state=1, learning_rate_init=0.2, alpha =0.09,
                    learning_rate='invscaling',momentum=0.9, max_iter=100,early_stopping=True, validation_fraction=0.1,verbose=True, 
                   )
#MLP model fittness on traning dataset
mlp.fit(X_train_scaled, train_y)
#accuracy on training dataset
pred_mlp = mlp.predict(X_test_scaled)

print(classification_report(test_y , pred_mlp))

print("Accuracy on training set: {:.3f}".format(mlp.score(X_train_scaled, train_y)))
#accuracy on test dataset
print("Accuracy on test set: {:.3f}".format(mlp.score(X_test_scaled, test_y)))


Iteration 1, loss = 0.07891917
Validation score: 0.986400
Iteration 2, loss = 0.05789485
Validation score: 0.984900
Iteration 3, loss = 0.05195191
Validation score: 0.987200
Iteration 4, loss = 0.04906689
Validation score: 0.989000
Iteration 5, loss = 0.04735534
Validation score: 0.989800
Iteration 6, loss = 0.04622966
Validation score: 0.990600
Iteration 7, loss = 0.04541915
Validation score: 0.990900
Iteration 8, loss = 0.04480274
Validation score: 0.991700
Iteration 9, loss = 0.04430475
Validation score: 0.991900
Iteration 10, loss = 0.04388719
Validation score: 0.992000
Iteration 11, loss = 0.04352666
Validation score: 0.992000
Iteration 12, loss = 0.04321298
Validation score: 0.992300
Iteration 13, loss = 0.04293355
Validation score: 0.992700
Iteration 14, loss = 0.04268301
Validation score: 0.993100
Iteration 15, loss = 0.04245283
Validation score: 0.993400
Iteration 16, loss = 0.04224224
Validation score: 0.993600
Iteration 17, loss = 0.04204626
Validation score: 0.993900
Iterat