In [1]:
import numpy as np
import pandas as pd

trainset_file_path = './data/panic_disorder_dataset_training.csv'
testset_file_path = './data/panic_disorder_dataset_testing.csv'

trainData = pd.read_csv(trainset_file_path, na_values='NA')
testData = pd.read_csv(testset_file_path, na_values='NA')

trainData.head()

Unnamed: 0,Participant ID,Age,Gender,Family History,Personal History,Current Stressors,Symptoms,Severity,Impact on Life,Demographics,Medical History,Psychiatric History,Substance Use,Coping Mechanisms,Social Support,Lifestyle Factors,Panic Disorder Diagnosis
0,1,38,Male,No,Yes,Moderate,Shortness of breath,Mild,Mild,Rural,Diabetes,Bipolar disorder,,Socializing,High,Sleep quality,0
1,2,51,Male,No,No,High,Panic attacks,Mild,Mild,Urban,Asthma,Anxiety disorder,Drugs,Exercise,High,Sleep quality,0
2,3,32,Female,Yes,No,High,Panic attacks,Mild,Significant,Urban,Diabetes,Depressive disorder,,Seeking therapy,Moderate,Exercise,0
3,4,64,Female,No,No,Moderate,Chest pain,Moderate,Moderate,Rural,Diabetes,,,Meditation,High,Exercise,0
4,5,31,Male,Yes,No,Moderate,Panic attacks,Mild,Moderate,Rural,Asthma,,Drugs,Seeking therapy,Low,Sleep quality,0


In [2]:
import random
random.seed(0)

In [3]:
train_x = trainData.drop('Panic Disorder Diagnosis', axis=1)
train_x = train_x.drop('Participant ID', axis=1)
train_y = trainData['Panic Disorder Diagnosis']
train_y = train_y.astype('int')

test_x = testData.drop('Panic Disorder Diagnosis', axis=1)
test_x = test_x.drop('Participant ID', axis=1)
test_y = testData['Panic Disorder Diagnosis']
test_y = test_y.astype('int')

train_x.head()

Unnamed: 0,Age,Gender,Family History,Personal History,Current Stressors,Symptoms,Severity,Impact on Life,Demographics,Medical History,Psychiatric History,Substance Use,Coping Mechanisms,Social Support,Lifestyle Factors
0,38,Male,No,Yes,Moderate,Shortness of breath,Mild,Mild,Rural,Diabetes,Bipolar disorder,,Socializing,High,Sleep quality
1,51,Male,No,No,High,Panic attacks,Mild,Mild,Urban,Asthma,Anxiety disorder,Drugs,Exercise,High,Sleep quality
2,32,Female,Yes,No,High,Panic attacks,Mild,Significant,Urban,Diabetes,Depressive disorder,,Seeking therapy,Moderate,Exercise
3,64,Female,No,No,Moderate,Chest pain,Moderate,Moderate,Rural,Diabetes,,,Meditation,High,Exercise
4,31,Male,Yes,No,Moderate,Panic attacks,Mild,Moderate,Rural,Asthma,,Drugs,Seeking therapy,Low,Sleep quality


In [4]:
train_x = pd.get_dummies(train_x, drop_first=True)
test_x = pd.get_dummies(test_x, drop_first=True)

# Align the columns in the test set with the train set
test_x = test_x.reindex(columns=train_x.columns, fill_value=0)


In [5]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
train_x = scaler.fit_transform(train_x)
test_x = scaler.transform(test_x)


### save Scaler

In [None]:
# save the scaler to models/scaler.pkl
import pickle
scaler_file_path = './models/scaler.pkl'
pickle.dump(scaler, open(scaler_file_path, 'wb'))

### machine learning model

In [6]:
from sklearn.linear_model import LogisticRegression

logistic_model = LogisticRegression(random_state=0)
logistic_model.fit(train_x, train_y)
logistic_predictions = logistic_model.predict(test_x)


In [7]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
accuracy = accuracy_score(test_y, logistic_predictions)
precision = precision_score(test_y, logistic_predictions)
recall = recall_score(test_y, logistic_predictions)
f1 = f1_score(test_y, logistic_predictions)
conf_matrix = confusion_matrix(test_y, logistic_predictions)
print(f"Accuracy: {accuracy}")
print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1 Score: {f1}")
print(f"Confusion Matrix:\n{conf_matrix}")


Accuracy: 0.96825
Precision: 0.6584615384615384
Recall: 0.5089179548156956
F1 Score: 0.5741113346747149
Confusion Matrix:
[[18937   222]
 [  413   428]]


In [8]:
from sklearn.svm import SVC

svm_model = SVC(random_state=0)
svm_model.fit(train_x, train_y)



### save Model

In [11]:
# save the svm model
import pickle
pickle.dump(svm_model, open('./models/best_model.pkl', 'wb'))

### load Model

In [13]:
# load model to svm_model
svm_model = pickle.load(open('./models/best_model.pkl', 'rb'))

In [12]:
svm_predictions = svm_model.predict(test_x)
accuracy = accuracy_score(test_y, svm_predictions)
precision = precision_score(test_y, svm_predictions)
recall = recall_score(test_y, svm_predictions)
f1 = f1_score(test_y, svm_predictions)
conf_matrix = confusion_matrix(test_y, svm_predictions)
print(f"Accuracy: {accuracy}")
print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1 Score: {f1}")
print(f"Confusion Matrix:\n{conf_matrix}")

Accuracy: 1.0
Precision: 1.0
Recall: 1.0
F1 Score: 1.0
Confusion Matrix:
[[19159     0]
 [    0   841]]


In [11]:
from sklearn.tree import DecisionTreeClassifier

decision_tree_model = DecisionTreeClassifier(random_state=0)
decision_tree_model.fit(train_x, train_y)
decision_tree_predictions = decision_tree_model.predict(test_x)


In [12]:
accuracy = accuracy_score(test_y, decision_tree_predictions)
precision = precision_score(test_y, decision_tree_predictions)
recall = recall_score(test_y, decision_tree_predictions)
f1 = f1_score(test_y, decision_tree_predictions)
conf_matrix = confusion_matrix(test_y, decision_tree_predictions)
print(f"Accuracy: {accuracy}")
print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1 Score: {f1}")
print(f"Confusion Matrix:\n{conf_matrix}")

Accuracy: 0.999
Precision: 0.9881093935790726
Recall: 0.9881093935790726
F1 Score: 0.9881093935790726
Confusion Matrix:
[[19149    10]
 [   10   831]]


In [13]:
from sklearn.ensemble import RandomForestClassifier

random_forest_model = RandomForestClassifier(random_state=0)
random_forest_model.fit(train_x, train_y)
random_forest_predictions = random_forest_model.predict(test_x)


In [14]:
accuracy = accuracy_score(test_y, random_forest_predictions)
precision = precision_score(test_y, random_forest_predictions)
recall = recall_score(test_y, random_forest_predictions)
f1 = f1_score(test_y, random_forest_predictions)
conf_matrix = confusion_matrix(test_y, random_forest_predictions)
print(f"Accuracy: {accuracy}")
print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1 Score: {f1}")
print(f"Confusion Matrix:\n{conf_matrix}")

Accuracy: 0.9982
Precision: 0.9975278121137207
Recall: 0.9595719381688466
F1 Score: 0.9781818181818182
Confusion Matrix:
[[19157     2]
 [   34   807]]


In [15]:
from sklearn.ensemble import GradientBoostingClassifier

gradient_boosting_model = GradientBoostingClassifier(random_state=0)
gradient_boosting_model.fit(train_x, train_y)
gradient_boosting_predictions = gradient_boosting_model.predict(test_x)


In [16]:
accuracy = accuracy_score(test_y, gradient_boosting_predictions)
precision = precision_score(test_y, gradient_boosting_predictions)
recall = recall_score(test_y, gradient_boosting_predictions)
f1 = f1_score(test_y, gradient_boosting_predictions)
conf_matrix = confusion_matrix(test_y, gradient_boosting_predictions)
print(f"Accuracy: {accuracy}")
print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1 Score: {f1}")
print(f"Confusion Matrix:\n{conf_matrix}")

Accuracy: 0.9987
Precision: 0.9721900347624566
Recall: 0.9976218787158145
F1 Score: 0.9847417840375586
Confusion Matrix:
[[19135    24]
 [    2   839]]


### neural network

In [17]:
import tensorflow as tf
tf.random.set_seed(0)


In [18]:
import numpy as np
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import Adam


In [19]:
# Initialize the constructor
model = Sequential()

# Add an input layer 
model.add(Dense(12, activation='relu', input_shape=(train_x.shape[1],)))

# Add one hidden layer 
model.add(Dense(8, activation='relu'))

# Add an output layer 
model.add(Dense(1, activation='sigmoid'))

# Compile model
model.compile(loss='binary_crossentropy',
              optimizer=Adam(),
              metrics=['accuracy'])




In [20]:
# Fit the model
model.fit(train_x, train_y, epochs=5, batch_size=1, verbose=1)


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.src.callbacks.History at 0x2de27a890>

In [21]:
loss, accuracy = model.evaluate(test_x, test_y, verbose=0)
print("Model Accuracy: %.2f%%" % (accuracy * 100))


Model Accuracy: 100.00%
