## Importing the basic libraries

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

## Reading the dataset and splitting into the dependent and independent variables

In [2]:
dataset = pd.read_excel("Dataset 6 186.xlsx")

feature_names = dataset.columns[:-3]

X = dataset.iloc[:, :-3]
y = dataset.iloc[:, -3]
print(feature_names)

Index(['AGE', 'SEX', 'ORAL HABITS', 'bleeding/ swollen gums ',
       'food enlodgement +pain ', 'pain during chewing', 'tooth mobility',
       '           bad odor', 'others(non periodontal reasons)',
       'brushing habits',
       ...
       'MCAL 47', 'BPD 47', 'BCAL 47', 'DPD 47', 'DCAL 47', 'LPD 47',
       'LCAL 47', 'DIABETES ASSESSMENT', 'GINGIVITIS', 'PERIODONTITIS'],
      dtype='object', length=248)


## Checking the dependent variable

In [3]:
print(y.head)
print(y.describe())

<bound method NDFrame.head of 0      4
1      4
2      4
3      2
4      3
      ..
195    4
196    3
197    3
198    3
199    0
Name: STAGING OF PERIODONTITIS, Length: 200, dtype: int64>
count    200.000000
mean       1.845000
std        1.793617
min        0.000000
25%        0.000000
50%        2.000000
75%        4.000000
max        4.000000
Name: STAGING OF PERIODONTITIS, dtype: float64


## Training and Testing data split

In [4]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)


## Feature scaling

In [5]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)


## Dimensionality Reduction

In [6]:
from sklearn.decomposition import PCA
pca = PCA(n_components=10)
X_train = pca.fit_transform(X_train)
X_test = pca.transform(X_test)
component_weights = pca.components_

## Extracting the important feature names

In [7]:
feature_weights_mapping = {}
for i, component in enumerate(component_weights):
    component_feature_weights = zip(feature_names, component)
    sorted_feature_weights = sorted(
        component_feature_weights, key=lambda x: abs(x[1]), reverse=True
    )
    feature_weights_mapping[f"Component {i+1}"] = sorted_feature_weights
print("Feature names contributing to Principal Components:")
for component, weights in feature_weights_mapping.items():
    print(f"{component}:")
    for feature, weight in weights[:5]:
        print(f"  {feature}: {weight:.4f}")
    print()


Feature names contributing to Principal Components:
Component 1:
  BCAL 23: 0.1036
  BCAL 32: 0.1031
  MCAL 32: 0.1006
  DCAL 32: 0.1004
  LCAL 32: 0.0992

Component 2:
  ORAL HABITS: 0.1106
  MCAL 15: -0.1051
  PCAL 15: -0.1001
  PPD 15: -0.0994
  BCAL 43: 0.0985

Component 3:
  MPD 33: 0.1529
  LPD 33: 0.1519
  BPD 33: 0.1514
  DPD 33: 0.1512
  MCAL 33: 0.1447

Component 4:
  DPD 31: 0.1388
  MPD 31: 0.1388
  LPD 31: 0.1372
  BPD 31: 0.1370
  BCAL 31: 0.1307

Component 5:
  BPD 14: 0.1520
  PPD 14: 0.1518
  DPD 14: 0.1511
  PCAL 14: 0.1500
  DPD 25: 0.1493

Component 6:
  LCAL 41: 0.1601
  DCAL 41: 0.1579
  MCAL 41: 0.1568
  DPD 41: 0.1559
  MPD 41: 0.1526

Component 7:
  DPD 24: 0.2216
  MPD 24: 0.2169
  BPD 24: 0.2155
  PPD 24: 0.2151
  PCAL 24: 0.2113

Component 8:
  MCAL 13: 0.1861
  DCAL 13: 0.1845
  PPD 13: 0.1833
  MPD 13: 0.1822
  DPD 13: 0.1814

Component 9:
  LCAL 46: 0.1666
  DPD 45: -0.1424
  BPD 45: -0.1411
  MCAL 47: 0.1380
  BCAL 45: -0.1358

Component 10:
  MCAL 23: 0

## Random Forest Classifier

In [8]:
from sklearn.ensemble import RandomForestClassifier
classifier = RandomForestClassifier(n_estimators=10,criterion='entropy', random_state=0)
classifier.fit(X_train, y_train)

from sklearn.metrics import confusion_matrix, accuracy_score
print("Training Accuracy:")
y_pred_train = classifier.predict(X_train)
cm_train = confusion_matrix(y_train, y_pred_train)
print(cm_train)
print(accuracy_score(y_train, y_pred_train))

print("Testing Accuracy:")
y_pred_test = classifier.predict(X_test)
cm_test = confusion_matrix(y_test, y_pred_test)
print(cm_test)
print(accuracy_score(y_test, y_pred_test))


Training Accuracy:
[[76  0  0  0  0]
 [ 0  4  0  0  0]
 [ 0  0  7  0  0]
 [ 0  0  0 28  0]
 [ 1  0  0  0 44]]
0.99375
Testing Accuracy:
[[13  0  0  1  1]
 [ 1  0  0  0  0]
 [ 0  0  0  0  1]
 [ 1  0  0  4  3]
 [ 2  0  0  5  8]]
0.625


## Decision Tree

In [9]:
from sklearn.tree import DecisionTreeClassifier
classifier = DecisionTreeClassifier(criterion = 'entropy', random_state = 0)
classifier.fit(X_train, y_train)

from sklearn.metrics import confusion_matrix, accuracy_score
print("Training Accuracy:")
y_pred_train = classifier.predict(X_train)
cm_train = confusion_matrix(y_train, y_pred_train)
print(cm_train)
print(accuracy_score(y_train, y_pred_train))

print("Testing Accuracy:")
y_pred_test = classifier.predict(X_test)
cm_test = confusion_matrix(y_test, y_pred_test)
print(cm_test)
print(accuracy_score(y_test, y_pred_test))


Training Accuracy:
[[76  0  0  0  0]
 [ 0  4  0  0  0]
 [ 0  0  7  0  0]
 [ 0  0  0 28  0]
 [ 0  0  0  0 45]]
1.0
Testing Accuracy:
[[13  0  0  1  1]
 [ 1  0  0  0  0]
 [ 0  0  0  0  1]
 [ 0  1  0  4  3]
 [ 2  0  1  2 10]]
0.675


## Support Vector Machine

In [10]:
from sklearn.svm import SVC
classifier = SVC(kernel = 'linear', random_state = 0)
classifier.fit(X_train, y_train)
from sklearn.metrics import confusion_matrix, accuracy_score
print("Training Accuracy:")
y_pred_train = classifier.predict(X_train)
cm_train = confusion_matrix(y_train, y_pred_train)
print(cm_train)
print(accuracy_score(y_train, y_pred_train))

print("Testing Accuracy:")
y_pred_test = classifier.predict(X_test)
cm_test = confusion_matrix(y_test, y_pred_test)
print(cm_test)
print(accuracy_score(y_test, y_pred_test))


Training Accuracy:
[[74  0  0  2  0]
 [ 1  2  0  1  0]
 [ 4  0  1  1  1]
 [ 3  0  0 18  7]
 [ 1  0  0  5 39]]
0.8375
Testing Accuracy:
[[14  0  0  0  1]
 [ 1  0  0  0  0]
 [ 0  0  0  0  1]
 [ 0  0  0  6  2]
 [ 0  0  0  7  8]]
0.7


## Kernel SVM

In [11]:
from sklearn.svm import SVC
classifier = SVC(kernel = 'rbf', random_state = 0)
classifier.fit(X_train, y_train)
from sklearn.metrics import confusion_matrix, accuracy_score
print("Training Accuracy:")
y_pred_train = classifier.predict(X_train)
cm_train = confusion_matrix(y_train, y_pred_train)
print(cm_train)
print(accuracy_score(y_train, y_pred_train))

print("Testing Accuracy:")
y_pred_test = classifier.predict(X_test)
cm_test = confusion_matrix(y_test, y_pred_test)
print(cm_test)
print(accuracy_score(y_test, y_pred_test))



Training Accuracy:
[[76  0  0  0  0]
 [ 3  1  0  0  0]
 [ 5  0  0  1  1]
 [13  0  0  6  9]
 [ 4  0  0  1 40]]
0.76875
Testing Accuracy:
[[14  0  0  0  1]
 [ 1  0  0  0  0]
 [ 0  0  0  0  1]
 [ 4  0  0  2  2]
 [ 3  0  0  2 10]]
0.65


## K-Nearest Neighbors

In [12]:
from sklearn.neighbors import KNeighborsClassifier
classifier = KNeighborsClassifier(n_neighbors = 5, metric = 'minkowski', p = 2)
classifier.fit(X_train, y_train)
from sklearn.metrics import confusion_matrix, accuracy_score
print("Training Accuracy:")
y_pred_train = classifier.predict(X_train)
cm_train = confusion_matrix(y_train, y_pred_train)
print(cm_train)
print(accuracy_score(y_train, y_pred_train))

print("Testing Accuracy:")
y_pred_test = classifier.predict(X_test)
cm_test = confusion_matrix(y_test, y_pred_test)
print(cm_test)
print(accuracy_score(y_test, y_pred_test))


Training Accuracy:
[[69  0  0  2  5]
 [ 3  0  0  0  1]
 [ 5  0  0  1  1]
 [ 5  0  1 14  8]
 [ 4  0  1  7 33]]
0.725
Testing Accuracy:
[[14  0  0  0  1]
 [ 1  0  0  0  0]
 [ 0  0  0  0  1]
 [ 1  0  0  4  3]
 [ 2  0  0  8  5]]
0.575


## Logistic Regression

In [13]:
from sklearn.linear_model import LogisticRegression
classifier = LogisticRegression(random_state = 0)
classifier.fit(X_train, y_train)
from sklearn.metrics import confusion_matrix, accuracy_score
print("Training Accuracy:")
y_pred_train = classifier.predict(X_train)
cm_train = confusion_matrix(y_train, y_pred_train)
print(cm_train)
print(accuracy_score(y_train, y_pred_train))

print("Testing Accuracy:")
y_pred_test = classifier.predict(X_test)
cm_test = confusion_matrix(y_test, y_pred_test)
print(cm_test)
print(accuracy_score(y_test, y_pred_test))


Training Accuracy:
[[73  0  0  3  0]
 [ 1  2  0  1  0]
 [ 4  0  1  1  1]
 [ 4  0  0 18  6]
 [ 3  0  1  6 35]]
0.80625
Testing Accuracy:
[[14  0  0  0  1]
 [ 1  0  0  0  0]
 [ 0  0  0  0  1]
 [ 0  0  0  6  2]
 [ 1  0  0  6  8]]
0.7


## Naive Bayes

In [14]:
from sklearn.naive_bayes import GaussianNB
classifier = GaussianNB()
classifier.fit(X_train, y_train)
from sklearn.metrics import confusion_matrix, accuracy_score
print("Training Accuracy:")
y_pred_train = classifier.predict(X_train)
cm_train = confusion_matrix(y_train, y_pred_train)
print(cm_train)
print(accuracy_score(y_train, y_pred_train))

print("Testing Accuracy:")
y_pred_test = classifier.predict(X_test)
cm_test = confusion_matrix(y_test, y_pred_test)
print(cm_test)
print(accuracy_score(y_test, y_pred_test))


Training Accuracy:
[[67  0  0  6  3]
 [ 2  1  0  1  0]
 [ 5  0  0  1  1]
 [19  0  0  5  4]
 [14  2  0 12 17]]
0.5625
Testing Accuracy:
[[13  0  0  0  2]
 [ 1  0  0  0  0]
 [ 0  0  0  0  1]
 [ 6  0  1  0  1]
 [ 9  1  0  1  4]]
0.425


## XGBoost

In [15]:
from xgboost import XGBClassifier
classifier = XGBClassifier()
classifier.fit(X_train, y_train)
from sklearn.metrics import confusion_matrix, accuracy_score
print("Training Accuracy:")
y_pred_train = classifier.predict(X_train)
cm_train = confusion_matrix(y_train, y_pred_train)
print(cm_train)
print(accuracy_score(y_train, y_pred_train))

print("Testing Accuracy:")
y_pred_test = classifier.predict(X_test)
cm_test = confusion_matrix(y_test, y_pred_test)
print(cm_test)
print(accuracy_score(y_test, y_pred_test))


Training Accuracy:
[[76  0  0  0  0]
 [ 0  4  0  0  0]
 [ 0  0  7  0  0]
 [ 0  0  0 28  0]
 [ 0  0  0  0 45]]
1.0
Testing Accuracy:
[[12  0  0  2  1]
 [ 1  0  0  0  0]
 [ 0  0  0  0  1]
 [ 1  0  0  5  2]
 [ 1  0  0  5  9]]
0.65


## CatBoost

In [16]:
! pip install catboost

Collecting catboost
  Downloading catboost-1.2.8-cp311-cp311-manylinux2014_x86_64.whl.metadata (1.2 kB)
Downloading catboost-1.2.8-cp311-cp311-manylinux2014_x86_64.whl (99.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m99.2/99.2 MB[0m [31m11.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: catboost
Successfully installed catboost-1.2.8


In [17]:
from catboost import CatBoostClassifier
classifier = CatBoostClassifier()
classifier.fit(X_train, y_train)
from sklearn.metrics import confusion_matrix, accuracy_score
print("Training Accuracy:")
y_pred_train = classifier.predict(X_train)
cm_train = confusion_matrix(y_train, y_pred_train)
print(cm_train)
print(accuracy_score(y_train, y_pred_train))

print("Testing Accuracy:")
y_pred_test = classifier.predict(X_test)
cm_test = confusion_matrix(y_test, y_pred_test)
print(cm_test)
print(accuracy_score(y_test, y_pred_test))


Learning rate set to 0.072066
0:	learn: 1.5347669	total: 55ms	remaining: 55s
1:	learn: 1.4784519	total: 60.6ms	remaining: 30.2s
2:	learn: 1.4213513	total: 66.9ms	remaining: 22.2s
3:	learn: 1.3776180	total: 72.8ms	remaining: 18.1s
4:	learn: 1.3373312	total: 78.5ms	remaining: 15.6s
5:	learn: 1.2851228	total: 84ms	remaining: 13.9s
6:	learn: 1.2464317	total: 89.8ms	remaining: 12.7s
7:	learn: 1.2094091	total: 95.3ms	remaining: 11.8s
8:	learn: 1.1732284	total: 101ms	remaining: 11.1s
9:	learn: 1.1382992	total: 106ms	remaining: 10.5s
10:	learn: 1.1095212	total: 112ms	remaining: 10s
11:	learn: 1.0842204	total: 117ms	remaining: 9.66s
12:	learn: 1.0576448	total: 123ms	remaining: 9.35s
13:	learn: 1.0264327	total: 129ms	remaining: 9.07s
14:	learn: 0.9995819	total: 134ms	remaining: 8.81s
15:	learn: 0.9776958	total: 140ms	remaining: 8.6s
16:	learn: 0.9508866	total: 145ms	remaining: 8.41s
17:	learn: 0.9338848	total: 151ms	remaining: 8.26s
18:	learn: 0.9109228	total: 157ms	remaining: 8.1s
19:	learn: 0.

# ANN


In [18]:
import tensorflow as tf
ann = tf.keras.models.Sequential()
ann.add(tf.keras.layers.Dense(units=12, activation='relu'))
ann.add(tf.keras.layers.Dense(units=8, activation='relu'))
ann.add(tf.keras.layers.Dense(units=12, activation='relu'))
ann.add(tf.keras.layers.Dense(units=8, activation='relu'))
ann.add(tf.keras.layers.Dropout(rate=0.2))
ann.add(tf.keras.layers.Dense(units=5, activation='sigmoid'))
ann.compile(optimizer = 'adam', loss = 'sparse_categorical_crossentropy', metrics = ['accuracy'])
ann.fit(X_train, y_train, batch_size = 32, epochs = 100)

# Predicting the Test set results
y_pred = ann.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)
# Making the Confusion Matrix
from sklearn.metrics import confusion_matrix, accuracy_score
cm = confusion_matrix(y_test, y_pred_classes)
print(cm)
print(accuracy_score(y_test, y_pred_classes))

Epoch 1/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 17ms/step - accuracy: 0.2706 - loss: 1.6571
Epoch 2/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 41ms/step - accuracy: 0.2761 - loss: 1.7752
Epoch 3/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step - accuracy: 0.3745 - loss: 1.7392
Epoch 4/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step - accuracy: 0.3923 - loss: 1.5464
Epoch 5/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step - accuracy: 0.4544 - loss: 1.4849 
Epoch 6/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step - accuracy: 0.4595 - loss: 1.4899
Epoch 7/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step - accuracy: 0.4438 - loss: 1.5327
Epoch 8/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step - accuracy: 0.4608 - loss: 1.4834
Epoch 9/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[