## Importing the basic libraries

In [41]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

## Reading the dataset and splitting into the dependent and independent variables

In [42]:
dataset = pd.read_excel("Dataset4_with radiograph.xlsx")

feature_names = dataset.columns[:-4]

X = dataset.iloc[:, :-4]
y = dataset.iloc[:, -4]  

## Checking the dependent variable

In [43]:
print(y.head)

<bound method NDFrame.head of 0      1
1      1
2      1
3      1
4      1
      ..
100    0
101    0
102    0
103    0
104    0
Name: PERIODONTITIS, Length: 105, dtype: int64>


## Training and Testing data split 

In [44]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)


## Feature scaling

In [45]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)


## Dimensionality Reduction

In [46]:
from sklearn.decomposition import PCA
pca = PCA(n_components=10)
X_train = pca.fit_transform(X_train)
X_test = pca.transform(X_test)
component_weights = pca.components_


## Extracting the important feature names

In [47]:
feature_weights_mapping = {}
for i, component in enumerate(component_weights):
    component_feature_weights = zip(feature_names, component)
    sorted_feature_weights = sorted(
        component_feature_weights, key=lambda x: abs(x[1]), reverse=True
    )
    feature_weights_mapping[f"Component {i+1}"] = sorted_feature_weights
print("Feature names contributing to Principal Components:")
for component, weights in feature_weights_mapping.items():
    print(f"{component}:")
    for feature, weight in weights[:5]: 
        print(f"  {feature}: {weight:.4f}")
    print()


Feature names contributing to Principal Components:
Component 1:
  BCAL 32: 0.0800
  MPS 32: 0.0783
  DPD 32: 0.0783
  DPS 32: 0.0783
  BGI 32: 0.0783

Component 2:
  ORAL HABITS: 0.0748
  DCAL 43: 0.0695
  DCS 33: 0.0690
  MCS 33: 0.0690
  LCS 33: 0.0690

Component 3:
  LCAL 33: 0.0911
  BPD 33: 0.0909
  LPS 43: 0.0907
  LPS 33: 0.0905
  MPS 33: 0.0905

Component 4:
  DPD 11: -0.0840
  MPD 11: -0.0822
  PPD 11: -0.0814
  PGI 11: -0.0801
  DGI 11: -0.0799

Component 5:
  PGI 24: 0.0848
  MCS 24: 0.0847
  DGI 24: 0.0838
  DCS 24: 0.0837
  PCS 24: 0.0837

Component 6:
  DPD 24: 0.1166
  MPD 24: 0.1157
  DCAL 24: 0.1137
  MPS 24: 0.1127
  MCAL 24: 0.1127

Component 7:
  LCAL 35: 0.1172
  BCAL 35: 0.1148
  DCS 35: 0.1142
  MCS 35: 0.1142
  LCS 35: 0.1142

Component 8:
  MCAL 27: 0.0907
  DCAL 27: 0.0907
  BPD 27: 0.0902
  MCS 27: 0.0901
  DCS 27: 0.0901

Component 9:
  MGI 23: 0.1259
  DGI 23: 0.1221
  BGI 44: 0.1206
  BGI 23: 0.1202
  BCS 23: 0.1201

Component 10:
  BCAL 16: -0.1171
  BPS

## Random Forest Classifier

In [48]:
from sklearn.ensemble import RandomForestClassifier
classifier = RandomForestClassifier(n_estimators=10,criterion='entropy', random_state=0)
classifier.fit(X_train, y_train)

from sklearn.metrics import confusion_matrix, accuracy_score
print("Training Accuracy:")
y_pred_train = classifier.predict(X_train)
cm_train = confusion_matrix(y_train, y_pred_train)
print(cm_train)
print(accuracy_score(y_train, y_pred_train))

print("Testing Accuracy:")
y_pred_test = classifier.predict(X_test)
cm_test = confusion_matrix(y_test, y_pred_test)
print(cm_test)
print(accuracy_score(y_test, y_pred_test))

tn, fp, fn, tp = cm_test.ravel()

sensitivity = tp / (tp + fn)
specificity = tn / (tn + fp)

print(f"Sensitivity: {sensitivity:.4f}")
print(f"Specificity: {specificity:.4f}")


Training Accuracy:
[[42  0]
 [ 1 41]]
0.9880952380952381
Testing Accuracy:
[[ 9  1]
 [ 1 10]]
0.9047619047619048
Sensitivity: 0.9091
Specificity: 0.9000


## Decision Tree

In [49]:
from sklearn.tree import DecisionTreeClassifier
classifier = DecisionTreeClassifier(criterion = 'entropy', random_state = 0)
classifier.fit(X_train, y_train)

from sklearn.metrics import confusion_matrix, accuracy_score
print("Training Accuracy:")
y_pred_train = classifier.predict(X_train)
cm_train = confusion_matrix(y_train, y_pred_train)
print(cm_train)
print(accuracy_score(y_train, y_pred_train))

print("Testing Accuracy:")
y_pred_test = classifier.predict(X_test)
cm_test = confusion_matrix(y_test, y_pred_test)
print(cm_test)
print(accuracy_score(y_test, y_pred_test))

tn, fp, fn, tp = cm_test.ravel()

sensitivity = tp / (tp + fn)
specificity = tn / (tn + fp)

print(f"Sensitivity: {sensitivity:.4f}")
print(f"Specificity: {specificity:.4f}")


Training Accuracy:
[[42  0]
 [ 0 42]]
1.0
Testing Accuracy:
[[ 9  1]
 [ 1 10]]
0.9047619047619048
Sensitivity: 0.9091
Specificity: 0.9000


## Support Vector Machine

In [50]:
from sklearn.svm import SVC
classifier = SVC(kernel = 'linear', random_state = 0)
classifier.fit(X_train, y_train)
from sklearn.metrics import confusion_matrix, accuracy_score
print("Training Accuracy:")
y_pred_train = classifier.predict(X_train)
cm_train = confusion_matrix(y_train, y_pred_train)
print(cm_train)
print(accuracy_score(y_train, y_pred_train))

print("Testing Accuracy:")
y_pred_test = classifier.predict(X_test)
cm_test = confusion_matrix(y_test, y_pred_test)
print(cm_test)
print(accuracy_score(y_test, y_pred_test))

tn, fp, fn, tp = cm_test.ravel()

sensitivity = tp / (tp + fn)
specificity = tn / (tn + fp)

print(f"Sensitivity: {sensitivity:.4f}")
print(f"Specificity: {specificity:.4f}")


Training Accuracy:
[[37  5]
 [ 4 38]]
0.8928571428571429
Testing Accuracy:
[[ 9  1]
 [ 0 11]]
0.9523809523809523
Sensitivity: 1.0000
Specificity: 0.9000


## Kernel SVM

In [53]:
from sklearn.svm import SVC
classifier = SVC(kernel = 'rbf', random_state = 0)
classifier.fit(X_train, y_train)
from sklearn.metrics import confusion_matrix, accuracy_score
print("Training Accuracy:")
y_pred_train = classifier.predict(X_train)
cm_train = confusion_matrix(y_train, y_pred_train)
print(cm_train)
print(accuracy_score(y_train, y_pred_train))

print("Testing Accuracy:")
y_pred_test = classifier.predict(X_test)
cm_test = confusion_matrix(y_test, y_pred_test)
print(cm_test)
print(accuracy_score(y_test, y_pred_test))

tn, fp, fn, tp = cm_test.ravel()

sensitivity = tp / (tp + fn)
specificity = tn / (tn + fp)

print(f"Sensitivity: {sensitivity:.4f}")
print(f"Specificity: {specificity:.4f}")


Training Accuracy:
[[38  4]
 [11 31]]
0.8214285714285714
Testing Accuracy:
[[9 1]
 [5 6]]
0.7142857142857143
Sensitivity: 0.5455
Specificity: 0.9000


## K-Nearest Neighbors

In [54]:
from sklearn.neighbors import KNeighborsClassifier
classifier = KNeighborsClassifier(n_neighbors = 5, metric = 'minkowski', p = 2)
classifier.fit(X_train, y_train)
from sklearn.metrics import confusion_matrix, accuracy_score
print("Training Accuracy:")
y_pred_train = classifier.predict(X_train)
cm_train = confusion_matrix(y_train, y_pred_train)
print(cm_train)
print(accuracy_score(y_train, y_pred_train))

print("Testing Accuracy:")
y_pred_test = classifier.predict(X_test)
cm_test = confusion_matrix(y_test, y_pred_test)
print(cm_test)
print(accuracy_score(y_test, y_pred_test))

tn, fp, fn, tp = cm_test.ravel()

sensitivity = tp / (tp + fn)
specificity = tn / (tn + fp)

print(f"Sensitivity: {sensitivity:.4f}")
print(f"Specificity: {specificity:.4f}")


Training Accuracy:
[[33  9]
 [ 4 38]]
0.8452380952380952
Testing Accuracy:
[[ 8  2]
 [ 1 10]]
0.8571428571428571
Sensitivity: 0.9091
Specificity: 0.8000


## Logistic Regression

In [55]:
from sklearn.linear_model import LogisticRegression
classifier = LogisticRegression(random_state = 0)
classifier.fit(X_train, y_train)
from sklearn.metrics import confusion_matrix, accuracy_score
print("Training Accuracy:")
y_pred_train = classifier.predict(X_train)
cm_train = confusion_matrix(y_train, y_pred_train)
print(cm_train)
print(accuracy_score(y_train, y_pred_train))

print("Testing Accuracy:")
y_pred_test = classifier.predict(X_test)
cm_test = confusion_matrix(y_test, y_pred_test)
print(cm_test)
print(accuracy_score(y_test, y_pred_test))

tn, fp, fn, tp = cm_test.ravel()

sensitivity = tp / (tp + fn)
specificity = tn / (tn + fp)

print(f"Sensitivity: {sensitivity:.4f}")
print(f"Specificity: {specificity:.4f}")


Training Accuracy:
[[37  5]
 [ 6 36]]
0.8690476190476191
Testing Accuracy:
[[ 9  1]
 [ 1 10]]
0.9047619047619048
Sensitivity: 0.9091
Specificity: 0.9000


## Naive Bayes

In [56]:
from sklearn.naive_bayes import GaussianNB
classifier = GaussianNB()
classifier.fit(X_train, y_train)
from sklearn.metrics import confusion_matrix, accuracy_score
print("Training Accuracy:")
y_pred_train = classifier.predict(X_train)
cm_train = confusion_matrix(y_train, y_pred_train)
print(cm_train)
print(accuracy_score(y_train, y_pred_train))

print("Testing Accuracy:")
y_pred_test = classifier.predict(X_test)
cm_test = confusion_matrix(y_test, y_pred_test)
print(cm_test)
print(accuracy_score(y_test, y_pred_test))

tn, fp, fn, tp = cm_test.ravel()

sensitivity = tp / (tp + fn)
specificity = tn / (tn + fp)

print(f"Sensitivity: {sensitivity:.4f}")
print(f"Specificity: {specificity:.4f}")


Training Accuracy:
[[38  4]
 [19 23]]
0.7261904761904762
Testing Accuracy:
[[9 1]
 [9 2]]
0.5238095238095238
Sensitivity: 0.1818
Specificity: 0.9000


## XGBoost

In [57]:
from xgboost import XGBClassifier
classifier = XGBClassifier()
classifier.fit(X_train, y_train)
from sklearn.metrics import confusion_matrix, accuracy_score
print("Training Accuracy:")
y_pred_train = classifier.predict(X_train)
cm_train = confusion_matrix(y_train, y_pred_train)
print(cm_train)
print(accuracy_score(y_train, y_pred_train))

print("Testing Accuracy:")
y_pred_test = classifier.predict(X_test)
cm_test = confusion_matrix(y_test, y_pred_test)
print(cm_test)
print(accuracy_score(y_test, y_pred_test))

tn, fp, fn, tp = cm_test.ravel()

sensitivity = tp / (tp + fn)
specificity = tn / (tn + fp)

print(f"Sensitivity: {sensitivity:.4f}")
print(f"Specificity: {specificity:.4f}")


Training Accuracy:
[[42  0]
 [ 0 42]]
1.0
Testing Accuracy:
[[ 9  1]
 [ 1 10]]
0.9047619047619048
Sensitivity: 0.9091
Specificity: 0.9000


## CatBoost

In [58]:
from catboost import CatBoostClassifier
classifier = CatBoostClassifier()
classifier.fit(X_train, y_train)
from sklearn.metrics import confusion_matrix, accuracy_score
print("Training Accuracy:")
y_pred_train = classifier.predict(X_train)
cm_train = confusion_matrix(y_train, y_pred_train)
print(cm_train)
print(accuracy_score(y_train, y_pred_train))

print("Testing Accuracy:")
y_pred_test = classifier.predict(X_test)
cm_test = confusion_matrix(y_test, y_pred_test)
print(cm_test)
print(accuracy_score(y_test, y_pred_test))

tn, fp, fn, tp = cm_test.ravel()

sensitivity = tp / (tp + fn)
specificity = tn / (tn + fp)

print(f"Sensitivity: {sensitivity:.4f}")
print(f"Specificity: {specificity:.4f}")


Learning rate set to 0.003578
0:	learn: 0.6905405	total: 144ms	remaining: 2m 24s
1:	learn: 0.6884132	total: 146ms	remaining: 1m 12s
2:	learn: 0.6854058	total: 149ms	remaining: 49.4s
3:	learn: 0.6823447	total: 152ms	remaining: 37.8s
4:	learn: 0.6791116	total: 154ms	remaining: 30.6s
5:	learn: 0.6762238	total: 156ms	remaining: 25.9s
6:	learn: 0.6742588	total: 158ms	remaining: 22.4s
7:	learn: 0.6720510	total: 160ms	remaining: 19.8s
8:	learn: 0.6696261	total: 162ms	remaining: 17.8s
9:	learn: 0.6675395	total: 164ms	remaining: 16.3s
10:	learn: 0.6649811	total: 167ms	remaining: 15s
11:	learn: 0.6628615	total: 169ms	remaining: 13.9s
12:	learn: 0.6603615	total: 170ms	remaining: 12.9s
13:	learn: 0.6574105	total: 172ms	remaining: 12.1s
14:	learn: 0.6546670	total: 174ms	remaining: 11.4s
15:	learn: 0.6526609	total: 175ms	remaining: 10.8s
16:	learn: 0.6502340	total: 177ms	remaining: 10.2s
17:	learn: 0.6476194	total: 178ms	remaining: 9.71s
18:	learn: 0.6444585	total: 179ms	remaining: 9.27s
19:	learn: 