## Importing the basic libraries

In [17]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

## Reading the dataset and splitting into the dependent and independent variables

In [18]:
dataset = pd.read_excel("Dataset 6 186.xlsx")

feature_names = dataset.columns[:-4]

X = dataset.iloc[:, :-4]
y = dataset.iloc[:, -4]
print(dataset.iloc[:,-4])
print(feature_names)

0      1
1      1
2      1
3      1
4      1
      ..
195    1
196    1
197    1
198    1
199    0
Name: PERIODONTITIS, Length: 200, dtype: int64
Index(['AGE', 'SEX', 'ORAL HABITS', 'bleeding/ swollen gums ',
       'food enlodgement +pain ', 'pain during chewing', 'tooth mobility',
       '           bad odor', 'others(non periodontal reasons)',
       'brushing habits',
       ...
       'MPD 47', 'MCAL 47', 'BPD 47', 'BCAL 47', 'DPD 47', 'DCAL 47', 'LPD 47',
       'LCAL 47', 'DIABETES ASSESSMENT', 'GINGIVITIS'],
      dtype='object', length=247)


## Checking the dependent variable

In [19]:
print(y.head)
print(y.describe())

<bound method NDFrame.head of 0      1
1      1
2      1
3      1
4      1
      ..
195    1
196    1
197    1
198    1
199    0
Name: PERIODONTITIS, Length: 200, dtype: int64>
count    200.00000
mean       0.54500
std        0.49922
min        0.00000
25%        0.00000
50%        1.00000
75%        1.00000
max        1.00000
Name: PERIODONTITIS, dtype: float64


## Training and Testing data split

In [20]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)


## Feature scaling

In [21]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)


## Dimensionality Reduction

In [22]:
from sklearn.decomposition import PCA
pca = PCA(n_components=10)
X_train = pca.fit_transform(X_train)
X_test = pca.transform(X_test)
component_weights = pca.components_


## Extracting the important feature names

In [23]:
feature_weights_mapping = {}
for i, component in enumerate(component_weights):
    component_feature_weights = zip(feature_names, component)
    sorted_feature_weights = sorted(
        component_feature_weights, key=lambda x: abs(x[1]), reverse=True
    )
    feature_weights_mapping[f"Component {i+1}"] = sorted_feature_weights
print("Feature names contributing to Principal Components:")
for component, weights in feature_weights_mapping.items():
    print(f"{component}:")
    for feature, weight in weights[:5]:
        print(f"  {feature}: {weight:.4f}")
    print()


Feature names contributing to Principal Components:
Component 1:
  BCAL 23: 0.1039
  BCAL 32: 0.1034
  MCAL 32: 0.1010
  DCAL 32: 0.1008
  LCAL 32: 0.0996

Component 2:
  ORAL HABITS: 0.1106
  MCAL 15: -0.1052
  PCAL 15: -0.1002
  PPD 15: -0.0994
  BCAL 43: 0.0984

Component 3:
  MPD 33: 0.1538
  LPD 33: 0.1530
  BPD 33: 0.1525
  DPD 33: 0.1521
  MCAL 33: 0.1457

Component 4:
  DPD 31: 0.1386
  MPD 31: 0.1385
  LPD 31: 0.1369
  BPD 31: 0.1367
  BCAL 31: 0.1304

Component 5:
  BPD 14: 0.1520
  PPD 14: 0.1518
  DPD 14: 0.1511
  PCAL 14: 0.1499
  DPD 25: 0.1493

Component 6:
  LCAL 41: 0.1606
  DCAL 41: 0.1578
  MCAL 41: 0.1566
  DPD 41: 0.1563
  MPD 41: 0.1529

Component 7:
  DPD 24: 0.2233
  MPD 24: 0.2184
  BPD 24: 0.2175
  PPD 24: 0.2172
  PCAL 24: 0.2141

Component 8:
  MCAL 13: 0.1903
  DCAL 13: 0.1886
  PPD 13: 0.1877
  MPD 13: 0.1864
  DPD 13: 0.1856

Component 9:
  LCAL 46: 0.1664
  DPD 45: -0.1428
  BPD 45: -0.1416
  MCAL 47: 0.1376
  BCAL 45: -0.1361

Component 10:
  MCAL 23: 0

## Random Forest Classifier

In [24]:
from sklearn.ensemble import RandomForestClassifier
classifier = RandomForestClassifier(n_estimators=10,criterion='entropy', random_state=0)
classifier.fit(X_train, y_train)

from sklearn.metrics import confusion_matrix, accuracy_score
print("Training Accuracy:")
y_pred_train = classifier.predict(X_train)
cm_train = confusion_matrix(y_train, y_pred_train)
print(cm_train)
print(accuracy_score(y_train, y_pred_train))

print("Testing Accuracy:")
y_pred_test = classifier.predict(X_test)
cm_test = confusion_matrix(y_test, y_pred_test)
print(cm_test)
print(accuracy_score(y_test, y_pred_test))

tn, fp, fn, tp = cm_test.ravel()

sensitivity = tp / (tp + fn)
specificity = tn / (tn + fp)

print(f"Sensitivity: {sensitivity:.4f}")
print(f"Specificity: {specificity:.4f}")


Training Accuracy:
[[76  0]
 [ 5 79]]
0.96875
Testing Accuracy:
[[14  1]
 [ 4 21]]
0.875
Sensitivity: 0.8400
Specificity: 0.9333


## Decision Tree

In [25]:
from sklearn.tree import DecisionTreeClassifier
classifier = DecisionTreeClassifier(criterion = 'entropy', random_state = 0)
classifier.fit(X_train, y_train)

from sklearn.metrics import confusion_matrix, accuracy_score
print("Training Accuracy:")
y_pred_train = classifier.predict(X_train)
cm_train = confusion_matrix(y_train, y_pred_train)
print(cm_train)
print(accuracy_score(y_train, y_pred_train))

print("Testing Accuracy:")
y_pred_test = classifier.predict(X_test)
cm_test = confusion_matrix(y_test, y_pred_test)
print(cm_test)
print(accuracy_score(y_test, y_pred_test))

tn, fp, fn, tp = cm_test.ravel()

sensitivity = tp / (tp + fn)
specificity = tn / (tn + fp)

print(f"Sensitivity: {sensitivity:.4f}")
print(f"Specificity: {specificity:.4f}")


Training Accuracy:
[[76  0]
 [ 0 84]]
1.0
Testing Accuracy:
[[13  2]
 [ 4 21]]
0.85
Sensitivity: 0.8400
Specificity: 0.8667


## Support Vector Machine

In [26]:
from sklearn.svm import SVC
classifier = SVC(kernel = 'linear', random_state = 0)
classifier.fit(X_train, y_train)
from sklearn.metrics import confusion_matrix, accuracy_score
print("Training Accuracy:")
y_pred_train = classifier.predict(X_train)
cm_train = confusion_matrix(y_train, y_pred_train)
print(cm_train)
print(accuracy_score(y_train, y_pred_train))

print("Testing Accuracy:")
y_pred_test = classifier.predict(X_test)
cm_test = confusion_matrix(y_test, y_pred_test)
print(cm_test)
print(accuracy_score(y_test, y_pred_test))

tn, fp, fn, tp = cm_test.ravel()

sensitivity = tp / (tp + fn)
specificity = tn / (tn + fp)

print(f"Sensitivity: {sensitivity:.4f}")
print(f"Specificity: {specificity:.4f}")


Training Accuracy:
[[72  4]
 [10 74]]
0.9125
Testing Accuracy:
[[14  1]
 [ 3 22]]
0.9
Sensitivity: 0.8800
Specificity: 0.9333


## Kernel SVM

In [27]:
from sklearn.svm import SVC
classifier = SVC(kernel = 'rbf', random_state = 0)
classifier.fit(X_train, y_train)
from sklearn.metrics import confusion_matrix, accuracy_score
print("Training Accuracy:")
y_pred_train = classifier.predict(X_train)
cm_train = confusion_matrix(y_train, y_pred_train)
print(cm_train)
print(accuracy_score(y_train, y_pred_train))

print("Testing Accuracy:")
y_pred_test = classifier.predict(X_test)
cm_test = confusion_matrix(y_test, y_pred_test)
print(cm_test)
print(accuracy_score(y_test, y_pred_test))

tn, fp, fn, tp = cm_test.ravel()

sensitivity = tp / (tp + fn)
specificity = tn / (tn + fp)

print(f"Sensitivity: {sensitivity:.4f}")
print(f"Specificity: {specificity:.4f}")


Training Accuracy:
[[72  4]
 [17 67]]
0.86875
Testing Accuracy:
[[14  1]
 [ 4 21]]
0.875
Sensitivity: 0.8400
Specificity: 0.9333


## K-Nearest Neighbors

In [28]:
from sklearn.neighbors import KNeighborsClassifier
classifier = KNeighborsClassifier(n_neighbors = 5, metric = 'minkowski', p = 2)
classifier.fit(X_train, y_train)
from sklearn.metrics import confusion_matrix, accuracy_score
print("Training Accuracy:")
y_pred_train = classifier.predict(X_train)
cm_train = confusion_matrix(y_train, y_pred_train)
print(cm_train)
print(accuracy_score(y_train, y_pred_train))

print("Testing Accuracy:")
y_pred_test = classifier.predict(X_test)
cm_test = confusion_matrix(y_test, y_pred_test)
print(cm_test)
print(accuracy_score(y_test, y_pred_test))

tn, fp, fn, tp = cm_test.ravel()

sensitivity = tp / (tp + fn)
specificity = tn / (tn + fp)

print(f"Sensitivity: {sensitivity:.4f}")
print(f"Specificity: {specificity:.4f}")


Training Accuracy:
[[66 10]
 [10 74]]
0.875
Testing Accuracy:
[[13  2]
 [ 2 23]]
0.9
Sensitivity: 0.9200
Specificity: 0.8667


## Logistic Regression

In [29]:
from sklearn.linear_model import LogisticRegression
classifier = LogisticRegression(random_state = 0)
classifier.fit(X_train, y_train)
from sklearn.metrics import confusion_matrix, accuracy_score
print("Training Accuracy:")
y_pred_train = classifier.predict(X_train)
cm_train = confusion_matrix(y_train, y_pred_train)
print(cm_train)
print(accuracy_score(y_train, y_pred_train))

print("Testing Accuracy:")
y_pred_test = classifier.predict(X_test)
cm_test = confusion_matrix(y_test, y_pred_test)
print(cm_test)
print(accuracy_score(y_test, y_pred_test))

tn, fp, fn, tp = cm_test.ravel()

sensitivity = tp / (tp + fn)
specificity = tn / (tn + fp)

print(f"Sensitivity: {sensitivity:.4f}")
print(f"Specificity: {specificity:.4f}")


Training Accuracy:
[[72  4]
 [10 74]]
0.9125
Testing Accuracy:
[[14  1]
 [ 2 23]]
0.925
Sensitivity: 0.9200
Specificity: 0.9333


## Naive Bayes

In [30]:
from sklearn.naive_bayes import GaussianNB
classifier = GaussianNB()
classifier.fit(X_train, y_train)
from sklearn.metrics import confusion_matrix, accuracy_score
print("Training Accuracy:")
y_pred_train = classifier.predict(X_train)
cm_train = confusion_matrix(y_train, y_pred_train)
print(cm_train)
print(accuracy_score(y_train, y_pred_train))

print("Testing Accuracy:")
y_pred_test = classifier.predict(X_test)
cm_test = confusion_matrix(y_test, y_pred_test)
print(cm_test)
print(accuracy_score(y_test, y_pred_test))

tn, fp, fn, tp = cm_test.ravel()

sensitivity = tp / (tp + fn)
specificity = tn / (tn + fp)

print(f"Sensitivity: {sensitivity:.4f}")
print(f"Specificity: {specificity:.4f}")


Training Accuracy:
[[67  9]
 [41 43]]
0.6875
Testing Accuracy:
[[13  2]
 [17  8]]
0.525
Sensitivity: 0.3200
Specificity: 0.8667


## XGBoost

In [31]:
from xgboost import XGBClassifier
classifier = XGBClassifier()
classifier.fit(X_train, y_train)
from sklearn.metrics import confusion_matrix, accuracy_score
print("Training Accuracy:")
y_pred_train = classifier.predict(X_train)
cm_train = confusion_matrix(y_train, y_pred_train)
print(cm_train)
print(accuracy_score(y_train, y_pred_train))

print("Testing Accuracy:")
y_pred_test = classifier.predict(X_test)
cm_test = confusion_matrix(y_test, y_pred_test)
print(cm_test)
print(accuracy_score(y_test, y_pred_test))

tn, fp, fn, tp = cm_test.ravel()

sensitivity = tp / (tp + fn)
specificity = tn / (tn + fp)

print(f"Sensitivity: {sensitivity:.4f}")
print(f"Specificity: {specificity:.4f}")


Training Accuracy:
[[76  0]
 [ 0 84]]
1.0
Testing Accuracy:
[[12  3]
 [ 2 23]]
0.875
Sensitivity: 0.9200
Specificity: 0.8000


## CatBoost

In [32]:
! pip install catboost

Collecting catboost
  Downloading catboost-1.2.8-cp311-cp311-manylinux2014_x86_64.whl.metadata (1.2 kB)
Downloading catboost-1.2.8-cp311-cp311-manylinux2014_x86_64.whl (99.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m99.2/99.2 MB[0m [31m9.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: catboost
Successfully installed catboost-1.2.8


In [33]:
from catboost import CatBoostClassifier
classifier = CatBoostClassifier()
classifier.fit(X_train, y_train)
from sklearn.metrics import confusion_matrix, accuracy_score
print("Training Accuracy:")
y_pred_train = classifier.predict(X_train)
cm_train = confusion_matrix(y_train, y_pred_train)
print(cm_train)
print(accuracy_score(y_train, y_pred_train))

print("Testing Accuracy:")
y_pred_test = classifier.predict(X_test)
cm_test = confusion_matrix(y_test, y_pred_test)
print(cm_test)
print(accuracy_score(y_test, y_pred_test))

tn, fp, fn, tp = cm_test.ravel()

sensitivity = tp / (tp + fn)
specificity = tn / (tn + fp)

print(f"Sensitivity: {sensitivity:.4f}")
print(f"Specificity: {specificity:.4f}")


Learning rate set to 0.004711
0:	learn: 0.6880960	total: 49.3ms	remaining: 49.2s
1:	learn: 0.6840971	total: 53.6ms	remaining: 26.7s
2:	learn: 0.6792577	total: 56ms	remaining: 18.6s
3:	learn: 0.6753773	total: 62.4ms	remaining: 15.5s
4:	learn: 0.6714375	total: 65.5ms	remaining: 13s
5:	learn: 0.6671962	total: 71ms	remaining: 11.8s
6:	learn: 0.6639368	total: 74ms	remaining: 10.5s
7:	learn: 0.6599647	total: 83.1ms	remaining: 10.3s
8:	learn: 0.6559535	total: 85.8ms	remaining: 9.45s
9:	learn: 0.6515251	total: 89.1ms	remaining: 8.82s
10:	learn: 0.6480176	total: 91.5ms	remaining: 8.23s
11:	learn: 0.6449701	total: 103ms	remaining: 8.45s
12:	learn: 0.6417492	total: 107ms	remaining: 8.09s
13:	learn: 0.6391551	total: 110ms	remaining: 7.77s
14:	learn: 0.6354133	total: 119ms	remaining: 7.81s
15:	learn: 0.6324455	total: 121ms	remaining: 7.47s
16:	learn: 0.6284192	total: 124ms	remaining: 7.16s
17:	learn: 0.6251811	total: 132ms	remaining: 7.23s
18:	learn: 0.6212998	total: 138ms	remaining: 7.14s
19:	lear

# ANN

In [34]:
import tensorflow as tf
ann = tf.keras.models.Sequential()
ann.add(tf.keras.layers.Dense(units=12, activation='relu'))
ann.add(tf.keras.layers.Dense(units=8, activation='relu'))
ann.add(tf.keras.layers.Dense(units=12, activation='relu'))
ann.add(tf.keras.layers.Dense(units=8, activation='relu'))
ann.add(tf.keras.layers.Dropout(rate=0.2))
ann.add(tf.keras.layers.Dense(units=1, activation='sigmoid'))
ann.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])
ann.fit(X_train, y_train, batch_size = 32, epochs = 100)

# Predicting the Test set results
y_pred = ann.predict(X_test)
y_pred = (y_pred > 0.5)

# Making the Confusion Matrix
from sklearn.metrics import confusion_matrix, accuracy_score
cm = confusion_matrix(y_test, y_pred)
print(cm)
print(accuracy_score(y_test, y_pred))
tn, fp, fn, tp = cm_test.ravel()

sensitivity = tp / (tp + fn)
specificity = tn / (tn + fp)

print(f"Sensitivity: {sensitivity:.4f}")
print(f"Specificity: {specificity:.4f}")

Epoch 1/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 16ms/step - accuracy: 0.5440 - loss: 0.7269
Epoch 2/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step - accuracy: 0.4740 - loss: 0.6875
Epoch 3/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 56ms/step - accuracy: 0.4699 - loss: 0.8274
Epoch 4/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 32ms/step - accuracy: 0.5276 - loss: 0.7437
Epoch 5/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step - accuracy: 0.5406 - loss: 0.6837 
Epoch 6/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step - accuracy: 0.5854 - loss: 0.6461 
Epoch 7/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step - accuracy: 0.5648 - loss: 0.6638 
Epoch 8/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - accuracy: 0.6219 - loss: 0.6507
Epoch 9/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m