## Importing the basic libraries

In [11]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

## Reading the dataset and splitting into the dependent and independent variables

In [12]:
dataset = pd.read_excel("Dataset 6 145.xlsx")

feature_names = dataset.columns[:-3]

X = dataset.iloc[:, :-4]
y = dataset.iloc[:, -4]  

## Checking the dependent variable

In [13]:
print(y.head)
print(y.describe())

<bound method NDFrame.head of 0      1
1      1
2      1
3      1
4      1
      ..
140    0
141    0
142    0
143    0
144    0
Name: PERIODONTITIS, Length: 145, dtype: int64>
count    145.000000
mean       0.551724
std        0.499041
min        0.000000
25%        0.000000
50%        1.000000
75%        1.000000
max        1.000000
Name: PERIODONTITIS, dtype: float64


## Training and Testing data split 

In [14]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)


## Feature scaling

In [15]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)


## Dimensionality Reduction

In [16]:
from sklearn.decomposition import PCA
pca = PCA(n_components=10)
X_train = pca.fit_transform(X_train)
X_test = pca.transform(X_test)
component_weights = pca.components_


## Extracting the important feature names

In [17]:
feature_weights_mapping = {}
for i, component in enumerate(component_weights):
    component_feature_weights = zip(feature_names, component)
    sorted_feature_weights = sorted(
        component_feature_weights, key=lambda x: abs(x[1]), reverse=True
    )
    feature_weights_mapping[f"Component {i+1}"] = sorted_feature_weights
print("Feature names contributing to Principal Components:")
for component, weights in feature_weights_mapping.items():
    print(f"{component}:")
    for feature, weight in weights[:5]: 
        print(f"  {feature}: {weight:.4f}")
    print()


Feature names contributing to Principal Components:
Component 1:
  BCAL 32: 0.1131
  PCAL 21: 0.1103
  MCAL 32: 0.1101
  LCAL 32: 0.1099
  DPD 32: 0.1096

Component 2:
  LCAL 37: 0.1195
  LPD 37: 0.1189
  MCAL 15: 0.1086
  MPD 15: 0.1084
  PCAL 15: 0.1078

Component 3:
  BPD 31: 0.1315
  LPD 31: 0.1307
  DPD 31: 0.1294
  LCAL 31: 0.1272
  MPD 31: 0.1266

Component 4:
  LCAL 33: 0.1472
  PPD 11: -0.1385
  BPD 33: 0.1376
  BPD 11: -0.1348
  MCAL 11: -0.1346

Component 5:
  MCAL 24: 0.1554
  DCAL 24: 0.1525
  PPD 24: 0.1382
  BPD 24: 0.1378
  PCAL 24: 0.1343

Component 6:
  BCAL 37: 0.1533
  DPD 37: 0.1523
  MCAL 37: 0.1514
  DCAL 37: 0.1513
  MPD 37: 0.1493

Component 7:
  LPD 45: 0.1724
  BPD 45: 0.1683
  BCAL 45: 0.1665
  LCAL 45: 0.1625
  MCAL 45: 0.1619

Component 8:
  BPD 16: 0.1981
  PPD 16: 0.1974
  MPD 16: 0.1955
  BCAL 16: 0.1943
  MCAL 16: 0.1879

Component 9:
  LCAL 35: 0.1744
  LPD 35: 0.1676
  MPD 35: 0.1632
  BPD 35: 0.1626
  DCAL 35: 0.1592

Component 10:
  MPD 13: 0.2216


## Random Forest Classifier

In [18]:
from sklearn.ensemble import RandomForestClassifier
classifier = RandomForestClassifier(n_estimators=10,criterion='entropy', random_state=0)
classifier.fit(X_train, y_train)

from sklearn.metrics import confusion_matrix, accuracy_score
print("Training Accuracy:")
y_pred_train = classifier.predict(X_train)
cm_train = confusion_matrix(y_train, y_pred_train)
print(cm_train)
print(accuracy_score(y_train, y_pred_train))

print("Testing Accuracy:")
y_pred_test = classifier.predict(X_test)
cm_test = confusion_matrix(y_test, y_pred_test)
print(cm_test)
print(accuracy_score(y_test, y_pred_test))

tn, fp, fn, tp = cm_test.ravel()

sensitivity = tp / (tp + fn)
specificity = tn / (tn + fp)

print(f"Sensitivity: {sensitivity:.4f}")
print(f"Specificity: {specificity:.4f}")


Training Accuracy:
[[52  0]
 [ 1 63]]
0.9913793103448276
Testing Accuracy:
[[12  1]
 [ 3 13]]
0.8620689655172413
Sensitivity: 0.8125
Specificity: 0.9231


## Decision Tree

In [19]:
from sklearn.tree import DecisionTreeClassifier
classifier = DecisionTreeClassifier(criterion = 'entropy', random_state = 0)
classifier.fit(X_train, y_train)

from sklearn.metrics import confusion_matrix, accuracy_score
print("Training Accuracy:")
y_pred_train = classifier.predict(X_train)
cm_train = confusion_matrix(y_train, y_pred_train)
print(cm_train)
print(accuracy_score(y_train, y_pred_train))

print("Testing Accuracy:")
y_pred_test = classifier.predict(X_test)
cm_test = confusion_matrix(y_test, y_pred_test)
print(cm_test)
print(accuracy_score(y_test, y_pred_test))

tn, fp, fn, tp = cm_test.ravel()

sensitivity = tp / (tp + fn)
specificity = tn / (tn + fp)

print(f"Sensitivity: {sensitivity:.4f}")
print(f"Specificity: {specificity:.4f}")


Training Accuracy:
[[52  0]
 [ 0 64]]
1.0
Testing Accuracy:
[[12  1]
 [ 2 14]]
0.896551724137931
Sensitivity: 0.8750
Specificity: 0.9231


## Support Vector Machine

In [20]:
from sklearn.svm import SVC
classifier = SVC(kernel = 'linear', random_state = 0)
classifier.fit(X_train, y_train)
from sklearn.metrics import confusion_matrix, accuracy_score
print("Training Accuracy:")
y_pred_train = classifier.predict(X_train)
cm_train = confusion_matrix(y_train, y_pred_train)
print(cm_train)
print(accuracy_score(y_train, y_pred_train))

print("Testing Accuracy:")
y_pred_test = classifier.predict(X_test)
cm_test = confusion_matrix(y_test, y_pred_test)
print(cm_test)
print(accuracy_score(y_test, y_pred_test))

tn, fp, fn, tp = cm_test.ravel()

sensitivity = tp / (tp + fn)
specificity = tn / (tn + fp)

print(f"Sensitivity: {sensitivity:.4f}")
print(f"Specificity: {specificity:.4f}")


Training Accuracy:
[[49  3]
 [ 6 58]]
0.9224137931034483
Testing Accuracy:
[[13  0]
 [ 2 14]]
0.9310344827586207
Sensitivity: 0.8750
Specificity: 1.0000


## Kernel SVM

In [21]:
from sklearn.svm import SVC
classifier = SVC(kernel = 'rbf', random_state = 0)
classifier.fit(X_train, y_train)
from sklearn.metrics import confusion_matrix, accuracy_score
print("Training Accuracy:")
y_pred_train = classifier.predict(X_train)
cm_train = confusion_matrix(y_train, y_pred_train)
print(cm_train)
print(accuracy_score(y_train, y_pred_train))

print("Testing Accuracy:")
y_pred_test = classifier.predict(X_test)
cm_test = confusion_matrix(y_test, y_pred_test)
print(cm_test)
print(accuracy_score(y_test, y_pred_test))

tn, fp, fn, tp = cm_test.ravel()

sensitivity = tp / (tp + fn)
specificity = tn / (tn + fp)

print(f"Sensitivity: {sensitivity:.4f}")
print(f"Specificity: {specificity:.4f}")


Training Accuracy:
[[46  6]
 [12 52]]
0.8448275862068966
Testing Accuracy:
[[11  2]
 [ 4 12]]
0.7931034482758621
Sensitivity: 0.7500
Specificity: 0.8462


## K-Nearest Neighbors

In [22]:
from sklearn.neighbors import KNeighborsClassifier
classifier = KNeighborsClassifier(n_neighbors = 5, metric = 'minkowski', p = 2)
classifier.fit(X_train, y_train)
from sklearn.metrics import confusion_matrix, accuracy_score
print("Training Accuracy:")
y_pred_train = classifier.predict(X_train)
cm_train = confusion_matrix(y_train, y_pred_train)
print(cm_train)
print(accuracy_score(y_train, y_pred_train))

print("Testing Accuracy:")
y_pred_test = classifier.predict(X_test)
cm_test = confusion_matrix(y_test, y_pred_test)
print(cm_test)
print(accuracy_score(y_test, y_pred_test))

tn, fp, fn, tp = cm_test.ravel()

sensitivity = tp / (tp + fn)
specificity = tn / (tn + fp)

print(f"Sensitivity: {sensitivity:.4f}")
print(f"Specificity: {specificity:.4f}")


Training Accuracy:
[[43  9]
 [ 6 58]]
0.8706896551724138
Testing Accuracy:
[[10  3]
 [ 2 14]]
0.8275862068965517
Sensitivity: 0.8750
Specificity: 0.7692


## Logistic Regression

In [23]:
from sklearn.linear_model import LogisticRegression
classifier = LogisticRegression(random_state = 0)
classifier.fit(X_train, y_train)
from sklearn.metrics import confusion_matrix, accuracy_score
print("Training Accuracy:")
y_pred_train = classifier.predict(X_train)
cm_train = confusion_matrix(y_train, y_pred_train)
print(cm_train)
print(accuracy_score(y_train, y_pred_train))

print("Testing Accuracy:")
y_pred_test = classifier.predict(X_test)
cm_test = confusion_matrix(y_test, y_pred_test)
print(cm_test)
print(accuracy_score(y_test, y_pred_test))

tn, fp, fn, tp = cm_test.ravel()

sensitivity = tp / (tp + fn)
specificity = tn / (tn + fp)

print(f"Sensitivity: {sensitivity:.4f}")
print(f"Specificity: {specificity:.4f}")


Training Accuracy:
[[48  4]
 [ 7 57]]
0.9051724137931034
Testing Accuracy:
[[13  0]
 [ 2 14]]
0.9310344827586207
Sensitivity: 0.8750
Specificity: 1.0000


## Naive Bayes

In [24]:
from sklearn.naive_bayes import GaussianNB
classifier = GaussianNB()
classifier.fit(X_train, y_train)
from sklearn.metrics import confusion_matrix, accuracy_score
print("Training Accuracy:")
y_pred_train = classifier.predict(X_train)
cm_train = confusion_matrix(y_train, y_pred_train)
print(cm_train)
print(accuracy_score(y_train, y_pred_train))

print("Testing Accuracy:")
y_pred_test = classifier.predict(X_test)
cm_test = confusion_matrix(y_test, y_pred_test)
print(cm_test)
print(accuracy_score(y_test, y_pred_test))

tn, fp, fn, tp = cm_test.ravel()

sensitivity = tp / (tp + fn)
specificity = tn / (tn + fp)

print(f"Sensitivity: {sensitivity:.4f}")
print(f"Specificity: {specificity:.4f}")


Training Accuracy:
[[46  6]
 [28 36]]
0.7068965517241379
Testing Accuracy:
[[11  2]
 [ 9  7]]
0.6206896551724138
Sensitivity: 0.4375
Specificity: 0.8462


## XGBoost

In [25]:
from xgboost import XGBClassifier
classifier = XGBClassifier()
classifier.fit(X_train, y_train)
from sklearn.metrics import confusion_matrix, accuracy_score
print("Training Accuracy:")
y_pred_train = classifier.predict(X_train)
cm_train = confusion_matrix(y_train, y_pred_train)
print(cm_train)
print(accuracy_score(y_train, y_pred_train))

print("Testing Accuracy:")
y_pred_test = classifier.predict(X_test)
cm_test = confusion_matrix(y_test, y_pred_test)
print(cm_test)
print(accuracy_score(y_test, y_pred_test))

tn, fp, fn, tp = cm_test.ravel()

sensitivity = tp / (tp + fn)
specificity = tn / (tn + fp)

print(f"Sensitivity: {sensitivity:.4f}")
print(f"Specificity: {specificity:.4f}")


Training Accuracy:
[[52  0]
 [ 0 64]]
1.0
Testing Accuracy:
[[11  2]
 [ 5 11]]
0.7586206896551724
Sensitivity: 0.6875
Specificity: 0.8462


## CatBoost

In [26]:
from catboost import CatBoostClassifier
classifier = CatBoostClassifier()
classifier.fit(X_train, y_train)
from sklearn.metrics import confusion_matrix, accuracy_score
print("Training Accuracy:")
y_pred_train = classifier.predict(X_train)
cm_train = confusion_matrix(y_train, y_pred_train)
print(cm_train)
print(accuracy_score(y_train, y_pred_train))

print("Testing Accuracy:")
y_pred_test = classifier.predict(X_test)
cm_test = confusion_matrix(y_test, y_pred_test)
print(cm_test)
print(accuracy_score(y_test, y_pred_test))

tn, fp, fn, tp = cm_test.ravel()

sensitivity = tp / (tp + fn)
specificity = tn / (tn + fp)

print(f"Sensitivity: {sensitivity:.4f}")
print(f"Specificity: {specificity:.4f}")


Learning rate set to 0.004106
0:	learn: 0.6905008	total: 164ms	remaining: 2m 43s
1:	learn: 0.6876657	total: 166ms	remaining: 1m 22s
2:	learn: 0.6844354	total: 168ms	remaining: 56s
3:	learn: 0.6816704	total: 171ms	remaining: 42.5s
4:	learn: 0.6779447	total: 173ms	remaining: 34.3s
5:	learn: 0.6735903	total: 175ms	remaining: 28.9s
6:	learn: 0.6701050	total: 177ms	remaining: 25.1s
7:	learn: 0.6671002	total: 178ms	remaining: 22.1s
8:	learn: 0.6635582	total: 180ms	remaining: 19.8s
9:	learn: 0.6599637	total: 182ms	remaining: 18s
10:	learn: 0.6570428	total: 184ms	remaining: 16.5s
11:	learn: 0.6541835	total: 185ms	remaining: 15.3s
12:	learn: 0.6505018	total: 187ms	remaining: 14.2s
13:	learn: 0.6478364	total: 190ms	remaining: 13.4s
14:	learn: 0.6441104	total: 192ms	remaining: 12.6s
15:	learn: 0.6416493	total: 193ms	remaining: 11.9s
16:	learn: 0.6385893	total: 195ms	remaining: 11.3s
17:	learn: 0.6355184	total: 197ms	remaining: 10.8s
18:	learn: 0.6325186	total: 199ms	remaining: 10.3s
19:	learn: 0.

# ANN

In [27]:
import tensorflow as tf
ann = tf.keras.models.Sequential()
ann.add(tf.keras.layers.Dense(units=12, activation='relu'))
ann.add(tf.keras.layers.Dense(units=8, activation='relu'))
ann.add(tf.keras.layers.Dense(units=12, activation='relu'))
ann.add(tf.keras.layers.Dense(units=8, activation='relu'))
ann.add(tf.keras.layers.Dropout(rate=0.2))
ann.add(tf.keras.layers.Dense(units=1, activation='sigmoid'))
ann.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])
ann.fit(X_train, y_train, batch_size = 32, epochs = 100)

# Predicting the Test set results
y_pred = ann.predict(X_test)
y_pred = (y_pred > 0.5)

# Making the Confusion Matrix
from sklearn.metrics import confusion_matrix, accuracy_score
cm = confusion_matrix(y_test, y_pred)
print(cm)
print(accuracy_score(y_test, y_pred))
tn, fp, fn, tp = cm_test.ravel()

sensitivity = tp / (tp + fn)
specificity = tn / (tn + fp)

print(f"Sensitivity: {sensitivity:.4f}")
print(f"Specificity: {specificity:.4f}")

Epoch 1/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step - accuracy: 0.5575 - loss: 0.8115
Epoch 2/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.5395 - loss: 0.7509 
Epoch 3/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.5305 - loss: 0.7405 
Epoch 4/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.4950 - loss: 0.7451 
Epoch 5/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.5425 - loss: 0.6835 
Epoch 6/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.5817 - loss: 0.6639 
Epoch 7/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.6205 - loss: 0.6415 
Epoch 8/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.5886 - loss: 0.6175 
Epoch 9/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37