## Importing the basic libraries

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

## Reading the dataset and splitting into the dependent and independent variables

In [2]:
dataset = pd.read_excel("Dataset 6 186.xlsx")

feature_names = dataset.columns[:-4]

X = dataset.iloc[:, :-4]
y = dataset.iloc[:, -4]
print(feature_names)

Index(['AGE', 'SEX', 'ORAL HABITS', 'bleeding/ swollen gums ',
       'food enlodgement +pain ', 'pain during chewing', 'tooth mobility',
       '           bad odor', 'others(non periodontal reasons)',
       'brushing habits',
       ...
       'MPD 47', 'MCAL 47', 'BPD 47', 'BCAL 47', 'DPD 47', 'DCAL 47', 'LPD 47',
       'LCAL 47', 'DIABETES ASSESSMENT', 'GINGIVITIS'],
      dtype='object', length=247)


## Checking the dependent variable

In [3]:
print(y.head)
print(y.describe())

<bound method NDFrame.head of 0      1
1      1
2      1
3      1
4      1
      ..
180    0
181    0
182    0
183    0
184    0
Name: PERIODONTITIS, Length: 185, dtype: int64>
count    185.000000
mean       0.551351
std        0.498706
min        0.000000
25%        0.000000
50%        1.000000
75%        1.000000
max        1.000000
Name: PERIODONTITIS, dtype: float64


## Training and Testing data split

In [4]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)


## Feature scaling

In [5]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)


## Dimensionality Reduction

In [6]:
from sklearn.decomposition import PCA
pca = PCA(n_components=10)
X_train = pca.fit_transform(X_train)
X_test = pca.transform(X_test)
component_weights = pca.components_


## Extracting the important feature names

In [7]:
feature_weights_mapping = {}
for i, component in enumerate(component_weights):
    component_feature_weights = zip(feature_names, component)
    sorted_feature_weights = sorted(
        component_feature_weights, key=lambda x: abs(x[1]), reverse=True
    )
    feature_weights_mapping[f"Component {i+1}"] = sorted_feature_weights
print("Feature names contributing to Principal Components:")
for component, weights in feature_weights_mapping.items():
    print(f"{component}:")
    for feature, weight in weights[:5]:
        print(f"  {feature}: {weight:.4f}")
    print()


Feature names contributing to Principal Components:
Component 1:
  BCAL 32: 0.1123
  MCAL 32: 0.1105
  DCAL 32: 0.1099
  LCAL 32: 0.1098
  DPD 32: 0.1078

Component 2:
  LCAL 37: 0.1125
  LPD 37: 0.1122
  MCAL 15: 0.1035
  BPD 26: 0.0996
  MPD 26: 0.0995

Component 3:
  LPD 33: 0.1699
  BPD 33: 0.1691
  DPD 33: 0.1670
  MPD 33: 0.1659
  LCAL 33: 0.1636

Component 4:
  BPD 13: 0.1580
  PCAL 13: 0.1565
  BCAL 13: 0.1556
  DPD 13: 0.1551
  DCAL 13: 0.1535

Component 5:
  PCAL 11: 0.1277
  BPD 11: 0.1254
  DCAL 37: 0.1244
  PPD 11: 0.1237
  BCAL 11: 0.1231

Component 6:
  BCAL 17: 0.1669
  MPD 17: 0.1649
  PPD 17: 0.1647
  BPD 17: 0.1641
  MCAL 17: 0.1629

Component 7:
  BPD 22: 0.1636
  DPD 22: 0.1626
  DCAL 22: 0.1605
  MPD 22: 0.1597
  MCAL 22: 0.1596

Component 8:
  DCAL 24: 0.1778
  DPD 24: 0.1777
  MPD 24: 0.1729
  MCAL 24: 0.1729
  BPD 24: 0.1715

Component 9:
  LCAL 47: 0.1780
  DCAL 47: 0.1776
  DPD 47: 0.1767
  BPD 47: 0.1763
  MPD 47: 0.1762

Component 10:
  BPD 45: 0.1744
  DPD

## Random Forest Classifier

In [8]:
from sklearn.ensemble import RandomForestClassifier
classifier = RandomForestClassifier(n_estimators=10,criterion='entropy', random_state=0)
classifier.fit(X_train, y_train)

from sklearn.metrics import confusion_matrix, accuracy_score
print("Training Accuracy:")
y_pred_train = classifier.predict(X_train)
cm_train = confusion_matrix(y_train, y_pred_train)
print(cm_train)
print(accuracy_score(y_train, y_pred_train))

print("Testing Accuracy:")
y_pred_test = classifier.predict(X_test)
cm_test = confusion_matrix(y_test, y_pred_test)
print(cm_test)
print(accuracy_score(y_test, y_pred_test))

tn, fp, fn, tp = cm_test.ravel()

sensitivity = tp / (tp + fn)
specificity = tn / (tn + fp)

print(f"Sensitivity: {sensitivity:.4f}")
print(f"Specificity: {specificity:.4f}")


Training Accuracy:
[[64  0]
 [ 3 81]]
0.9797297297297297
Testing Accuracy:
[[16  3]
 [ 1 17]]
0.8918918918918919
Sensitivity: 0.9444
Specificity: 0.8421


## Decision Tree

In [9]:
from sklearn.tree import DecisionTreeClassifier
classifier = DecisionTreeClassifier(criterion = 'entropy', random_state = 0)
classifier.fit(X_train, y_train)

from sklearn.metrics import confusion_matrix, accuracy_score
print("Training Accuracy:")
y_pred_train = classifier.predict(X_train)
cm_train = confusion_matrix(y_train, y_pred_train)
print(cm_train)
print(accuracy_score(y_train, y_pred_train))

print("Testing Accuracy:")
y_pred_test = classifier.predict(X_test)
cm_test = confusion_matrix(y_test, y_pred_test)
print(cm_test)
print(accuracy_score(y_test, y_pred_test))

tn, fp, fn, tp = cm_test.ravel()

sensitivity = tp / (tp + fn)
specificity = tn / (tn + fp)

print(f"Sensitivity: {sensitivity:.4f}")
print(f"Specificity: {specificity:.4f}")


Training Accuracy:
[[64  0]
 [ 0 84]]
1.0
Testing Accuracy:
[[16  3]
 [ 3 15]]
0.8378378378378378
Sensitivity: 0.8333
Specificity: 0.8421


## Support Vector Machine

In [10]:
from sklearn.svm import SVC
classifier = SVC(kernel = 'linear', random_state = 0)
classifier.fit(X_train, y_train)
from sklearn.metrics import confusion_matrix, accuracy_score
print("Training Accuracy:")
y_pred_train = classifier.predict(X_train)
cm_train = confusion_matrix(y_train, y_pred_train)
print(cm_train)
print(accuracy_score(y_train, y_pred_train))

print("Testing Accuracy:")
y_pred_test = classifier.predict(X_test)
cm_test = confusion_matrix(y_test, y_pred_test)
print(cm_test)
print(accuracy_score(y_test, y_pred_test))

tn, fp, fn, tp = cm_test.ravel()

sensitivity = tp / (tp + fn)
specificity = tn / (tn + fp)

print(f"Sensitivity: {sensitivity:.4f}")
print(f"Specificity: {specificity:.4f}")


Training Accuracy:
[[60  4]
 [ 8 76]]
0.918918918918919
Testing Accuracy:
[[17  2]
 [ 2 16]]
0.8918918918918919
Sensitivity: 0.8889
Specificity: 0.8947


## Kernel SVM

In [11]:
from sklearn.svm import SVC
classifier = SVC(kernel = 'rbf', random_state = 0)
classifier.fit(X_train, y_train)
from sklearn.metrics import confusion_matrix, accuracy_score
print("Training Accuracy:")
y_pred_train = classifier.predict(X_train)
cm_train = confusion_matrix(y_train, y_pred_train)
print(cm_train)
print(accuracy_score(y_train, y_pred_train))

print("Testing Accuracy:")
y_pred_test = classifier.predict(X_test)
cm_test = confusion_matrix(y_test, y_pred_test)
print(cm_test)
print(accuracy_score(y_test, y_pred_test))

tn, fp, fn, tp = cm_test.ravel()

sensitivity = tp / (tp + fn)
specificity = tn / (tn + fp)

print(f"Sensitivity: {sensitivity:.4f}")
print(f"Specificity: {specificity:.4f}")


Training Accuracy:
[[62  2]
 [20 64]]
0.8513513513513513
Testing Accuracy:
[[15  4]
 [ 5 13]]
0.7567567567567568
Sensitivity: 0.7222
Specificity: 0.7895


## K-Nearest Neighbors

In [12]:
from sklearn.neighbors import KNeighborsClassifier
classifier = KNeighborsClassifier(n_neighbors = 5, metric = 'minkowski', p = 2)
classifier.fit(X_train, y_train)
from sklearn.metrics import confusion_matrix, accuracy_score
print("Training Accuracy:")
y_pred_train = classifier.predict(X_train)
cm_train = confusion_matrix(y_train, y_pred_train)
print(cm_train)
print(accuracy_score(y_train, y_pred_train))

print("Testing Accuracy:")
y_pred_test = classifier.predict(X_test)
cm_test = confusion_matrix(y_test, y_pred_test)
print(cm_test)
print(accuracy_score(y_test, y_pred_test))

tn, fp, fn, tp = cm_test.ravel()

sensitivity = tp / (tp + fn)
specificity = tn / (tn + fp)

print(f"Sensitivity: {sensitivity:.4f}")
print(f"Specificity: {specificity:.4f}")


Training Accuracy:
[[52 12]
 [ 6 78]]
0.8783783783783784
Testing Accuracy:
[[14  5]
 [ 2 16]]
0.8108108108108109
Sensitivity: 0.8889
Specificity: 0.7368


## Logistic Regression

In [13]:
from sklearn.linear_model import LogisticRegression
classifier = LogisticRegression(random_state = 0)
classifier.fit(X_train, y_train)
from sklearn.metrics import confusion_matrix, accuracy_score
print("Training Accuracy:")
y_pred_train = classifier.predict(X_train)
cm_train = confusion_matrix(y_train, y_pred_train)
print(cm_train)
print(accuracy_score(y_train, y_pred_train))

print("Testing Accuracy:")
y_pred_test = classifier.predict(X_test)
cm_test = confusion_matrix(y_test, y_pred_test)
print(cm_test)
print(accuracy_score(y_test, y_pred_test))

tn, fp, fn, tp = cm_test.ravel()

sensitivity = tp / (tp + fn)
specificity = tn / (tn + fp)

print(f"Sensitivity: {sensitivity:.4f}")
print(f"Specificity: {specificity:.4f}")


Training Accuracy:
[[60  4]
 [ 8 76]]
0.918918918918919
Testing Accuracy:
[[16  3]
 [ 2 16]]
0.8648648648648649
Sensitivity: 0.8889
Specificity: 0.8421


## Naive Bayes

In [14]:
from sklearn.naive_bayes import GaussianNB
classifier = GaussianNB()
classifier.fit(X_train, y_train)
from sklearn.metrics import confusion_matrix, accuracy_score
print("Training Accuracy:")
y_pred_train = classifier.predict(X_train)
cm_train = confusion_matrix(y_train, y_pred_train)
print(cm_train)
print(accuracy_score(y_train, y_pred_train))

print("Testing Accuracy:")
y_pred_test = classifier.predict(X_test)
cm_test = confusion_matrix(y_test, y_pred_test)
print(cm_test)
print(accuracy_score(y_test, y_pred_test))

tn, fp, fn, tp = cm_test.ravel()

sensitivity = tp / (tp + fn)
specificity = tn / (tn + fp)

print(f"Sensitivity: {sensitivity:.4f}")
print(f"Specificity: {specificity:.4f}")


Training Accuracy:
[[58  6]
 [44 40]]
0.6621621621621622
Testing Accuracy:
[[15  4]
 [ 8 10]]
0.6756756756756757
Sensitivity: 0.5556
Specificity: 0.7895


## XGBoost

In [15]:
from xgboost import XGBClassifier
classifier = XGBClassifier()
classifier.fit(X_train, y_train)
from sklearn.metrics import confusion_matrix, accuracy_score
print("Training Accuracy:")
y_pred_train = classifier.predict(X_train)
cm_train = confusion_matrix(y_train, y_pred_train)
print(cm_train)
print(accuracy_score(y_train, y_pred_train))

print("Testing Accuracy:")
y_pred_test = classifier.predict(X_test)
cm_test = confusion_matrix(y_test, y_pred_test)
print(cm_test)
print(accuracy_score(y_test, y_pred_test))

tn, fp, fn, tp = cm_test.ravel()

sensitivity = tp / (tp + fn)
specificity = tn / (tn + fp)

print(f"Sensitivity: {sensitivity:.4f}")
print(f"Specificity: {specificity:.4f}")


Training Accuracy:
[[64  0]
 [ 0 84]]
1.0
Testing Accuracy:
[[16  3]
 [ 1 17]]
0.8918918918918919
Sensitivity: 0.9444
Specificity: 0.8421


## CatBoost

In [18]:
from catboost import CatBoostClassifier
classifier = CatBoostClassifier()
classifier.fit(X_train, y_train)
from sklearn.metrics import confusion_matrix, accuracy_score
print("Training Accuracy:")
y_pred_train = classifier.predict(X_train)
cm_train = confusion_matrix(y_train, y_pred_train)
print(cm_train)
print(accuracy_score(y_train, y_pred_train))

print("Testing Accuracy:")
y_pred_test = classifier.predict(X_test)
cm_test = confusion_matrix(y_test, y_pred_test)
print(cm_test)
print(accuracy_score(y_test, y_pred_test))

tn, fp, fn, tp = cm_test.ravel()

sensitivity = tp / (tp + fn)
specificity = tn / (tn + fp)

print(f"Sensitivity: {sensitivity:.4f}")
print(f"Specificity: {specificity:.4f}")


Learning rate set to 0.004557
0:	learn: 0.6897905	total: 64.9ms	remaining: 1m 4s
1:	learn: 0.6858488	total: 74.5ms	remaining: 37.2s
2:	learn: 0.6823983	total: 81.5ms	remaining: 27.1s
3:	learn: 0.6790863	total: 86.9ms	remaining: 21.6s
4:	learn: 0.6753013	total: 107ms	remaining: 21.3s
5:	learn: 0.6716459	total: 113ms	remaining: 18.8s
6:	learn: 0.6680984	total: 122ms	remaining: 17.3s
7:	learn: 0.6644646	total: 125ms	remaining: 15.6s
8:	learn: 0.6605268	total: 131ms	remaining: 14.4s
9:	learn: 0.6573400	total: 138ms	remaining: 13.6s
10:	learn: 0.6532878	total: 141ms	remaining: 12.7s
11:	learn: 0.6501043	total: 151ms	remaining: 12.4s
12:	learn: 0.6460775	total: 156ms	remaining: 11.8s
13:	learn: 0.6428278	total: 164ms	remaining: 11.6s
14:	learn: 0.6394820	total: 167ms	remaining: 11s
15:	learn: 0.6358872	total: 175ms	remaining: 10.8s
16:	learn: 0.6324331	total: 178ms	remaining: 10.3s
17:	learn: 0.6298873	total: 188ms	remaining: 10.3s
18:	learn: 0.6263584	total: 199ms	remaining: 10.3s
19:	learn

In [17]:
! pip install catboost

Collecting catboost
  Downloading catboost-1.2.8-cp311-cp311-manylinux2014_x86_64.whl.metadata (1.2 kB)
Downloading catboost-1.2.8-cp311-cp311-manylinux2014_x86_64.whl (99.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m99.2/99.2 MB[0m [31m10.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: catboost
Successfully installed catboost-1.2.8


# ANN

In [19]:
import tensorflow as tf
ann = tf.keras.models.Sequential()
ann.add(tf.keras.layers.Dense(units=12, activation='relu'))
ann.add(tf.keras.layers.Dense(units=8, activation='relu'))
ann.add(tf.keras.layers.Dense(units=12, activation='relu'))
ann.add(tf.keras.layers.Dense(units=8, activation='relu'))
ann.add(tf.keras.layers.Dropout(rate=0.2))
ann.add(tf.keras.layers.Dense(units=1, activation='sigmoid'))
ann.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])
ann.fit(X_train, y_train, batch_size = 32, epochs = 100)

# Predicting the Test set results
y_pred = ann.predict(X_test)
y_pred = (y_pred > 0.5)

# Making the Confusion Matrix
from sklearn.metrics import confusion_matrix, accuracy_score
cm = confusion_matrix(y_test, y_pred)
print(cm)
print(accuracy_score(y_test, y_pred))
tn, fp, fn, tp = cm_test.ravel()

sensitivity = tp / (tp + fn)
specificity = tn / (tn + fp)

print(f"Sensitivity: {sensitivity:.4f}")
print(f"Specificity: {specificity:.4f}")

Epoch 1/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 9ms/step - accuracy: 0.4651 - loss: 1.2057
Epoch 2/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.4828 - loss: 0.9640  
Epoch 3/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.5098 - loss: 0.8226 
Epoch 4/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.5333 - loss: 0.8118 
Epoch 5/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.4949 - loss: 0.7629 
Epoch 6/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.5545 - loss: 0.7706 
Epoch 7/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.5466 - loss: 0.7513 
Epoch 8/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.6210 - loss: 0.7226 
Epoch 9/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[