## Importing the basic libraries

In [18]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

## Reading the dataset and splitting into the dependent and independent variables

In [19]:
dataset = pd.read_excel("Dataset 6 145.xlsx")

feature_names = dataset.columns[:-3]

X = dataset.iloc[:, :-3]
y = dataset.iloc[:, -3]  

## Checking the dependent variable

In [20]:
print(y.head)
print(y.describe())

<bound method NDFrame.head of 0      4
1      4
2      4
3      2
4      3
      ..
140    0
141    0
142    0
143    0
144    0
Name: STAGING OF PERIODONTITIS, Length: 145, dtype: int64>
count    145.000000
mean       1.834483
std        1.795108
min        0.000000
25%        0.000000
50%        2.000000
75%        4.000000
max        4.000000
Name: STAGING OF PERIODONTITIS, dtype: float64


## Training and Testing data split 

In [21]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)


## Feature scaling

In [22]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)


## Dimensionality Reduction

In [23]:
from sklearn.decomposition import PCA
pca = PCA(n_components=10)
X_train = pca.fit_transform(X_train)
X_test = pca.transform(X_test)
component_weights = pca.components_

## Extracting the important feature names

In [24]:
feature_weights_mapping = {}
for i, component in enumerate(component_weights):
    component_feature_weights = zip(feature_names, component)
    sorted_feature_weights = sorted(
        component_feature_weights, key=lambda x: abs(x[1]), reverse=True
    )
    feature_weights_mapping[f"Component {i+1}"] = sorted_feature_weights
print("Feature names contributing to Principal Components:")
for component, weights in feature_weights_mapping.items():
    print(f"{component}:")
    for feature, weight in weights[:5]: 
        print(f"  {feature}: {weight:.4f}")
    print()


Feature names contributing to Principal Components:
Component 1:
  BCAL 32: 0.1126
  PCAL 21: 0.1095
  MCAL 32: 0.1095
  LCAL 32: 0.1093
  DPD 32: 0.1091

Component 2:
  LCAL 37: 0.1193
  LPD 37: 0.1187
  MCAL 15: 0.1083
  MPD 15: 0.1082
  PCAL 15: 0.1076

Component 3:
  BPD 31: 0.1313
  LPD 31: 0.1305
  DPD 31: 0.1291
  LCAL 31: 0.1270
  MPD 31: 0.1264

Component 4:
  LCAL 33: 0.1467
  PPD 11: -0.1385
  BPD 33: 0.1370
  BPD 11: -0.1348
  MCAL 11: -0.1345

Component 5:
  MCAL 24: 0.1543
  DCAL 24: 0.1515
  PPD 24: 0.1372
  BPD 24: 0.1369
  PCAL 24: 0.1334

Component 6:
  BCAL 37: 0.1513
  DPD 37: 0.1507
  MCAL 37: 0.1491
  DCAL 37: 0.1491
  MPD 37: 0.1476

Component 7:
  LPD 45: 0.1725
  BPD 45: 0.1690
  BCAL 45: 0.1674
  LCAL 45: 0.1637
  MCAL 45: 0.1630

Component 8:
  BPD 16: 0.1982
  PPD 16: 0.1976
  MPD 16: 0.1954
  BCAL 16: 0.1946
  MCAL 16: 0.1883

Component 9:
  LCAL 35: 0.1717
  LPD 35: 0.1643
  MPD 35: 0.1601
  BPD 35: 0.1597
  DCAL 35: 0.1556

Component 10:
  MPD 13: 0.2205


## Random Forest Classifier

In [25]:
from sklearn.ensemble import RandomForestClassifier
classifier = RandomForestClassifier(n_estimators=10,criterion='entropy', random_state=0)
classifier.fit(X_train, y_train)

from sklearn.metrics import confusion_matrix, accuracy_score
print("Training Accuracy:")
y_pred_train = classifier.predict(X_train)
cm_train = confusion_matrix(y_train, y_pred_train)
print(cm_train)
print(accuracy_score(y_train, y_pred_train))

print("Testing Accuracy:")
y_pred_test = classifier.predict(X_test)
cm_test = confusion_matrix(y_test, y_pred_test)
print(cm_test)
print(accuracy_score(y_test, y_pred_test))


Training Accuracy:
[[52  0  0  0  0]
 [ 0  3  0  0  0]
 [ 0  0  5  0  0]
 [ 0  0  0 17  0]
 [ 0  0  0  0 39]]
1.0
Testing Accuracy:
[[12  0  0  1  0]
 [ 2  1  0  0  0]
 [ 0  0  0  2  0]
 [ 0  0  0  3  2]
 [ 0  0  0  0  6]]
0.7586206896551724


## Decision Tree

In [26]:
from sklearn.tree import DecisionTreeClassifier
classifier = DecisionTreeClassifier(criterion = 'entropy', random_state = 0)
classifier.fit(X_train, y_train)

from sklearn.metrics import confusion_matrix, accuracy_score
print("Training Accuracy:")
y_pred_train = classifier.predict(X_train)
cm_train = confusion_matrix(y_train, y_pred_train)
print(cm_train)
print(accuracy_score(y_train, y_pred_train))

print("Testing Accuracy:")
y_pred_test = classifier.predict(X_test)
cm_test = confusion_matrix(y_test, y_pred_test)
print(cm_test)
print(accuracy_score(y_test, y_pred_test))


Training Accuracy:
[[52  0  0  0  0]
 [ 0  3  0  0  0]
 [ 0  0  5  0  0]
 [ 0  0  0 17  0]
 [ 0  0  0  0 39]]
1.0
Testing Accuracy:
[[10  0  0  1  2]
 [ 0  1  1  0  1]
 [ 0  1  0  0  1]
 [ 0  0  1  2  2]
 [ 0  0  0  1  5]]
0.6206896551724138


## Support Vector Machine

In [27]:
from sklearn.svm import SVC
classifier = SVC(kernel = 'linear', random_state = 0)
classifier.fit(X_train, y_train)
from sklearn.metrics import confusion_matrix, accuracy_score
print("Training Accuracy:")
y_pred_train = classifier.predict(X_train)
cm_train = confusion_matrix(y_train, y_pred_train)
print(cm_train)
print(accuracy_score(y_train, y_pred_train))

print("Testing Accuracy:")
y_pred_test = classifier.predict(X_test)
cm_test = confusion_matrix(y_test, y_pred_test)
print(cm_test)
print(accuracy_score(y_test, y_pred_test))


Training Accuracy:
[[50  0  0  2  0]
 [ 0  3  0  0  0]
 [ 3  0  1  0  1]
 [ 2  0  0 13  2]
 [ 1  0  0  4 34]]
0.8706896551724138
Testing Accuracy:
[[13  0  0  0  0]
 [ 2  1  0  0  0]
 [ 0  0  0  1  1]
 [ 0  0  1  2  2]
 [ 0  0  0  1  5]]
0.7241379310344828


## Kernel SVM

In [28]:
from sklearn.svm import SVC
classifier = SVC(kernel = 'rbf', random_state = 0)
classifier.fit(X_train, y_train)
from sklearn.metrics import confusion_matrix, accuracy_score
print("Training Accuracy:")
y_pred_train = classifier.predict(X_train)
cm_train = confusion_matrix(y_train, y_pred_train)
print(cm_train)
print(accuracy_score(y_train, y_pred_train))

print("Testing Accuracy:")
y_pred_test = classifier.predict(X_test)
cm_test = confusion_matrix(y_test, y_pred_test)
print(cm_test)
print(accuracy_score(y_test, y_pred_test))



Training Accuracy:
[[50  0  0  0  2]
 [ 2  1  0  0  0]
 [ 3  0  0  0  2]
 [ 7  0  0  4  6]
 [ 4  0  0  0 35]]
0.7758620689655172
Testing Accuracy:
[[12  0  0  0  1]
 [ 3  0  0  0  0]
 [ 0  0  0  0  2]
 [ 3  0  0  0  2]
 [ 0  0  0  0  6]]
0.6206896551724138


## K-Nearest Neighbors

In [29]:
from sklearn.neighbors import KNeighborsClassifier
classifier = KNeighborsClassifier(n_neighbors = 5, metric = 'minkowski', p = 2)
classifier.fit(X_train, y_train)
from sklearn.metrics import confusion_matrix, accuracy_score
print("Training Accuracy:")
y_pred_train = classifier.predict(X_train)
cm_train = confusion_matrix(y_train, y_pred_train)
print(cm_train)
print(accuracy_score(y_train, y_pred_train))

print("Testing Accuracy:")
y_pred_test = classifier.predict(X_test)
cm_test = confusion_matrix(y_test, y_pred_test)
print(cm_test)
print(accuracy_score(y_test, y_pred_test))


Training Accuracy:
[[46  0  0  3  3]
 [ 2  0  0  0  1]
 [ 3  0  1  0  1]
 [ 4  0  0  9  4]
 [ 3  0  0  2 34]]
0.7758620689655172
Testing Accuracy:
[[13  0  0  0  0]
 [ 3  0  0  0  0]
 [ 0  0  0  2  0]
 [ 0  0  0  2  3]
 [ 3  0  0  1  2]]
0.5862068965517241


## Logistic Regression

In [30]:
from sklearn.linear_model import LogisticRegression
classifier = LogisticRegression(random_state = 0)
classifier.fit(X_train, y_train)
from sklearn.metrics import confusion_matrix, accuracy_score
print("Training Accuracy:")
y_pred_train = classifier.predict(X_train)
cm_train = confusion_matrix(y_train, y_pred_train)
print(cm_train)
print(accuracy_score(y_train, y_pred_train))

print("Testing Accuracy:")
y_pred_test = classifier.predict(X_test)
cm_test = confusion_matrix(y_test, y_pred_test)
print(cm_test)
print(accuracy_score(y_test, y_pred_test))


Training Accuracy:
[[49  0  0  3  0]
 [ 1  2  0  0  0]
 [ 3  0  1  0  1]
 [ 1  0  0 13  3]
 [ 1  0  0  2 36]]
0.8706896551724138
Testing Accuracy:
[[13  0  0  0  0]
 [ 3  0  0  0  0]
 [ 0  0  0  1  1]
 [ 1  0  0  0  4]
 [ 1  1  0  0  4]]
0.5862068965517241


## Naive Bayes

In [31]:
from sklearn.naive_bayes import GaussianNB
classifier = GaussianNB()
classifier.fit(X_train, y_train)
from sklearn.metrics import confusion_matrix, accuracy_score
print("Training Accuracy:")
y_pred_train = classifier.predict(X_train)
cm_train = confusion_matrix(y_train, y_pred_train)
print(cm_train)
print(accuracy_score(y_train, y_pred_train))

print("Testing Accuracy:")
y_pred_test = classifier.predict(X_test)
cm_test = confusion_matrix(y_test, y_pred_test)
print(cm_test)
print(accuracy_score(y_test, y_pred_test))


Training Accuracy:
[[46  0  0  3  3]
 [ 1  2  0  0  0]
 [ 3  0  2  0  0]
 [11  0  0  5  1]
 [12  0  2  6 19]]
0.6379310344827587
Testing Accuracy:
[[11  0  0  2  0]
 [ 3  0  0  0  0]
 [ 1  0  0  1  0]
 [ 4  0  1  0  0]
 [ 1  0  0  3  2]]
0.4482758620689655


## XGBoost

In [32]:
from xgboost import XGBClassifier
classifier = XGBClassifier()
classifier.fit(X_train, y_train)
from sklearn.metrics import confusion_matrix, accuracy_score
print("Training Accuracy:")
y_pred_train = classifier.predict(X_train)
cm_train = confusion_matrix(y_train, y_pred_train)
print(cm_train)
print(accuracy_score(y_train, y_pred_train))

print("Testing Accuracy:")
y_pred_test = classifier.predict(X_test)
cm_test = confusion_matrix(y_test, y_pred_test)
print(cm_test)
print(accuracy_score(y_test, y_pred_test))


Training Accuracy:
[[52  0  0  0  0]
 [ 0  3  0  0  0]
 [ 0  0  5  0  0]
 [ 0  0  0 17  0]
 [ 0  0  0  0 39]]
1.0
Testing Accuracy:
[[12  0  0  1  0]
 [ 1  1  0  0  1]
 [ 0  0  0  2  0]
 [ 1  0  0  1  3]
 [ 0  0  0  0  6]]
0.6896551724137931


## CatBoost

In [33]:
from catboost import CatBoostClassifier
classifier = CatBoostClassifier()
classifier.fit(X_train, y_train)
from sklearn.metrics import confusion_matrix, accuracy_score
print("Training Accuracy:")
y_pred_train = classifier.predict(X_train)
cm_train = confusion_matrix(y_train, y_pred_train)
print(cm_train)
print(accuracy_score(y_train, y_pred_train))

print("Testing Accuracy:")
y_pred_test = classifier.predict(X_test)
cm_test = confusion_matrix(y_test, y_pred_test)
print(cm_test)
print(accuracy_score(y_test, y_pred_test))


Learning rate set to 0.070894
0:	learn: 1.5460431	total: 6.08ms	remaining: 6.07s
1:	learn: 1.4945293	total: 8.9ms	remaining: 4.44s
2:	learn: 1.4505588	total: 11.8ms	remaining: 3.94s
3:	learn: 1.4084142	total: 14.4ms	remaining: 3.58s
4:	learn: 1.3550187	total: 17.3ms	remaining: 3.43s
5:	learn: 1.3122714	total: 20ms	remaining: 3.31s
6:	learn: 1.2753701	total: 22.7ms	remaining: 3.23s
7:	learn: 1.2392652	total: 25.3ms	remaining: 3.14s
8:	learn: 1.2054200	total: 28ms	remaining: 3.09s
9:	learn: 1.1717234	total: 30.7ms	remaining: 3.04s
10:	learn: 1.1412962	total: 33.3ms	remaining: 2.99s
11:	learn: 1.1188926	total: 35.9ms	remaining: 2.96s
12:	learn: 1.0937329	total: 38.3ms	remaining: 2.91s
13:	learn: 1.0669349	total: 41ms	remaining: 2.89s
14:	learn: 1.0427716	total: 43.8ms	remaining: 2.88s
15:	learn: 1.0178914	total: 46.5ms	remaining: 2.86s
16:	learn: 0.9922588	total: 49.2ms	remaining: 2.85s
17:	learn: 0.9733338	total: 52ms	remaining: 2.84s
18:	learn: 0.9488513	total: 54.5ms	remaining: 2.81s
1

# ANN


In [34]:
import tensorflow as tf
ann = tf.keras.models.Sequential()
ann.add(tf.keras.layers.Dense(units=12, activation='relu'))
ann.add(tf.keras.layers.Dense(units=8, activation='relu'))
ann.add(tf.keras.layers.Dense(units=12, activation='relu'))
ann.add(tf.keras.layers.Dense(units=8, activation='relu'))
ann.add(tf.keras.layers.Dropout(rate=0.2))
ann.add(tf.keras.layers.Dense(units=5, activation='sigmoid'))
ann.compile(optimizer = 'adam', loss = 'sparse_categorical_crossentropy', metrics = ['accuracy'])
ann.fit(X_train, y_train, batch_size = 32, epochs = 100)

# Predicting the Test set results
y_pred = ann.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1) 
# Making the Confusion Matrix
from sklearn.metrics import confusion_matrix, accuracy_score
cm = confusion_matrix(y_test, y_pred_classes)
print(cm)
print(accuracy_score(y_test, y_pred_classes))
for i in range(5):
    print(f"\nClass {i}:")
    tn, fp, fn, tp = confusion_matrix((y_test == i), (y_pred_classes == i)).ravel()
    sensitivity = tp / (tp + fn) if (tp + fn) > 0 else 0
    specificity = tn / (tn + fp) if (tn + fp) > 0 else 0
    print(f"Sensitivity: {sensitivity:.4f}")
    print(f"Specificity: {specificity:.4f}")

Epoch 1/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.1703 - loss: 2.3224
Epoch 2/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.2413 - loss: 2.1368 
Epoch 3/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.2195 - loss: 2.2303 
Epoch 4/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.2091 - loss: 2.0977 
Epoch 5/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.2635 - loss: 1.8517 
Epoch 6/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.2163 - loss: 1.8987 
Epoch 7/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.3089 - loss: 1.8533 
Epoch 8/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.2899 - loss: 1.7840 
Epoch 9/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37