## Importing the basic libraries

In [35]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

## Reading the dataset and splitting into the dependent and independent variables

In [36]:
dataset = pd.read_excel("Dataset4_with radiograph.xlsx")

feature_names = dataset.columns[:-4]

X = dataset.iloc[:, :-3]
y = dataset.iloc[:, -3]  

## Checking the dependent variable

In [37]:
print(y.head)
print(y.describe())

<bound method NDFrame.head of 0      4
1      4
2      4
3      2
4      3
      ..
100    0
101    0
102    0
103    0
104    0
Name: STAGING OF PERIODONTITIS, Length: 105, dtype: int64>
count    105.000000
mean       1.609524
std        1.751347
min        0.000000
25%        0.000000
50%        1.000000
75%        4.000000
max        4.000000
Name: STAGING OF PERIODONTITIS, dtype: float64


## Training and Testing data split 

In [38]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)


## Feature scaling

In [39]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)


## Dimensionality Reduction

In [40]:
from sklearn.decomposition import PCA
pca = PCA(n_components=10)
X_train = pca.fit_transform(X_train)
X_test = pca.transform(X_test)
component_weights = pca.components_

## Extracting the important feature names

In [41]:
feature_weights_mapping = {}
for i, component in enumerate(component_weights):
    component_feature_weights = zip(feature_names, component)
    sorted_feature_weights = sorted(
        component_feature_weights, key=lambda x: abs(x[1]), reverse=True
    )
    feature_weights_mapping[f"Component {i+1}"] = sorted_feature_weights
print("Feature names contributing to Principal Components:")
for component, weights in feature_weights_mapping.items():
    print(f"{component}:")
    for feature, weight in weights[:5]: 
        print(f"  {feature}: {weight:.4f}")
    print()


Feature names contributing to Principal Components:
Component 1:
  BCAL 32: 0.0799
  DPD 32: 0.0782
  MPS 32: 0.0782
  DPS 32: 0.0782
  BGI 32: 0.0781

Component 2:
  ORAL HABITS: 0.0749
  DCAL 43: 0.0696
  DCS 33: 0.0690
  MCS 33: 0.0690
  LCS 33: 0.0690

Component 3:
  LCAL 33: 0.0909
  BPD 33: 0.0907
  LPS 43: 0.0906
  LPS 33: 0.0903
  MPS 33: 0.0903

Component 4:
  DPD 11: -0.0840
  MPD 11: -0.0823
  PPD 11: -0.0814
  PGI 11: -0.0801
  DGI 11: -0.0799

Component 5:
  MCS 24: 0.0831
  PGI 24: 0.0830
  MCS 25: 0.0830
  DCS 25: 0.0830
  PCS 25: 0.0830

Component 6:
  DPD 24: 0.1176
  MPD 24: 0.1167
  DCAL 24: 0.1146
  MPS 24: 0.1137
  BPS 24: 0.1137

Component 7:
  LCAL 35: 0.1169
  BCAL 35: 0.1144
  DCS 35: 0.1137
  MCS 35: 0.1137
  LCS 35: 0.1137

Component 8:
  MCAL 27: 0.0912
  DCAL 27: 0.0912
  MCS 27: 0.0907
  DCS 27: 0.0907
  PCS 27: 0.0907

Component 9:
  MGI 23: 0.1252
  DGI 23: 0.1215
  BGI 44: 0.1198
  BGI 23: 0.1195
  MGI 44: 0.1189

Component 10:
  BCAL 16: -0.1179
  MPS 

## Random Forest Classifier

In [42]:
from sklearn.ensemble import RandomForestClassifier
classifier = RandomForestClassifier(n_estimators=10,criterion='entropy', random_state=0)
classifier.fit(X_train, y_train)

from sklearn.metrics import confusion_matrix, accuracy_score
print("Training Accuracy:")
y_pred_train = classifier.predict(X_train)
cm_train = confusion_matrix(y_train, y_pred_train)
print(cm_train)
print(accuracy_score(y_train, y_pred_train))

print("Testing Accuracy:")
y_pred_test = classifier.predict(X_test)
cm_test = confusion_matrix(y_test, y_pred_test)
print(cm_test)
print(accuracy_score(y_test, y_pred_test))


Training Accuracy:
[[42  0  0  0  0]
 [ 0  4  0  0  0]
 [ 0  1  4  0  0]
 [ 0  0  0 11  0]
 [ 0  0  0  0 22]]
0.9880952380952381
Testing Accuracy:
[[9 0 0 0 1]
 [0 0 0 0 1]
 [0 0 0 1 1]
 [1 0 0 0 2]
 [0 1 0 0 4]]
0.6190476190476191


## Decision Tree

In [43]:
from sklearn.tree import DecisionTreeClassifier
classifier = DecisionTreeClassifier(criterion = 'entropy', random_state = 0)
classifier.fit(X_train, y_train)

from sklearn.metrics import confusion_matrix, accuracy_score
print("Training Accuracy:")
y_pred_train = classifier.predict(X_train)
cm_train = confusion_matrix(y_train, y_pred_train)
print(cm_train)
print(accuracy_score(y_train, y_pred_train))

print("Testing Accuracy:")
y_pred_test = classifier.predict(X_test)
cm_test = confusion_matrix(y_test, y_pred_test)
print(cm_test)
print(accuracy_score(y_test, y_pred_test))


Training Accuracy:
[[42  0  0  0  0]
 [ 0  4  0  0  0]
 [ 0  0  5  0  0]
 [ 0  0  0 11  0]
 [ 0  0  0  0 22]]
1.0
Testing Accuracy:
[[9 0 0 1 0]
 [0 0 0 0 1]
 [0 0 0 1 1]
 [0 0 0 1 2]
 [0 0 0 0 5]]
0.7142857142857143


## Support Vector Machine

In [44]:
from sklearn.svm import SVC
classifier = SVC(kernel = 'linear', random_state = 0)
classifier.fit(X_train, y_train)
from sklearn.metrics import confusion_matrix, accuracy_score
print("Training Accuracy:")
y_pred_train = classifier.predict(X_train)
cm_train = confusion_matrix(y_train, y_pred_train)
print(cm_train)
print(accuracy_score(y_train, y_pred_train))

print("Testing Accuracy:")
y_pred_test = classifier.predict(X_test)
cm_test = confusion_matrix(y_test, y_pred_test)
print(cm_test)
print(accuracy_score(y_test, y_pred_test))


Training Accuracy:
[[42  0  0  0  0]
 [ 2  2  0  0  0]
 [ 2  0  3  0  0]
 [ 3  0  0  7  1]
 [ 4  0  0  0 18]]
0.8571428571428571
Testing Accuracy:
[[9 0 0 0 1]
 [1 0 0 0 0]
 [1 0 0 0 1]
 [1 0 1 0 1]
 [0 1 0 0 4]]
0.6190476190476191


## Kernel SVM

In [45]:
from sklearn.svm import SVC
classifier = SVC(kernel = 'rbf', random_state = 0)
classifier.fit(X_train, y_train)
from sklearn.metrics import confusion_matrix, accuracy_score
print("Training Accuracy:")
y_pred_train = classifier.predict(X_train)
cm_train = confusion_matrix(y_train, y_pred_train)
print(cm_train)
print(accuracy_score(y_train, y_pred_train))

print("Testing Accuracy:")
y_pred_test = classifier.predict(X_test)
cm_test = confusion_matrix(y_test, y_pred_test)
print(cm_test)
print(accuracy_score(y_test, y_pred_test))



Training Accuracy:
[[41  0  0  0  1]
 [ 3  1  0  0  0]
 [ 2  0  1  0  2]
 [ 5  0  0  5  1]
 [ 8  0  0  0 14]]
0.7380952380952381
Testing Accuracy:
[[9 0 0 0 1]
 [1 0 0 0 0]
 [2 0 0 0 0]
 [2 0 0 0 1]
 [4 0 0 0 1]]
0.47619047619047616


## K-Nearest Neighbors

In [46]:
from sklearn.neighbors import KNeighborsClassifier
classifier = KNeighborsClassifier(n_neighbors = 5, metric = 'minkowski', p = 2)
classifier.fit(X_train, y_train)
from sklearn.metrics import confusion_matrix, accuracy_score
print("Training Accuracy:")
y_pred_train = classifier.predict(X_train)
cm_train = confusion_matrix(y_train, y_pred_train)
print(cm_train)
print(accuracy_score(y_train, y_pred_train))

print("Testing Accuracy:")
y_pred_test = classifier.predict(X_test)
cm_test = confusion_matrix(y_test, y_pred_test)
print(cm_test)
print(accuracy_score(y_test, y_pred_test))


Training Accuracy:
[[38  0  0  0  4]
 [ 1  1  0  2  0]
 [ 2  0  1  1  1]
 [ 3  0  0  4  4]
 [ 5  0  0  1 16]]
0.7142857142857143
Testing Accuracy:
[[10  0  0  0  0]
 [ 0  0  0  0  1]
 [ 0  0  0  1  1]
 [ 1  0  1  0  1]
 [ 2  0  0  2  1]]
0.5238095238095238


## Logistic Regression

In [47]:
from sklearn.linear_model import LogisticRegression
classifier = LogisticRegression(random_state = 0)
classifier.fit(X_train, y_train)
from sklearn.metrics import confusion_matrix, accuracy_score
print("Training Accuracy:")
y_pred_train = classifier.predict(X_train)
cm_train = confusion_matrix(y_train, y_pred_train)
print(cm_train)
print(accuracy_score(y_train, y_pred_train))

print("Testing Accuracy:")
y_pred_test = classifier.predict(X_test)
cm_test = confusion_matrix(y_test, y_pred_test)
print(cm_test)
print(accuracy_score(y_test, y_pred_test))


Training Accuracy:
[[38  0  0  1  3]
 [ 2  2  0  0  0]
 [ 2  0  3  0  0]
 [ 3  0  0  7  1]
 [ 4  0  0  0 18]]
0.8095238095238095
Testing Accuracy:
[[9 0 0 0 1]
 [1 0 0 0 0]
 [2 0 0 0 0]
 [1 0 1 0 1]
 [0 1 0 0 4]]
0.6190476190476191


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


## Naive Bayes

In [48]:
from sklearn.naive_bayes import GaussianNB
classifier = GaussianNB()
classifier.fit(X_train, y_train)
from sklearn.metrics import confusion_matrix, accuracy_score
print("Training Accuracy:")
y_pred_train = classifier.predict(X_train)
cm_train = confusion_matrix(y_train, y_pred_train)
print(cm_train)
print(accuracy_score(y_train, y_pred_train))

print("Testing Accuracy:")
y_pred_test = classifier.predict(X_test)
cm_test = confusion_matrix(y_test, y_pred_test)
print(cm_test)
print(accuracy_score(y_test, y_pred_test))


Training Accuracy:
[[38  0  0  2  2]
 [ 2  2  0  0  0]
 [ 2  0  2  1  0]
 [ 6  0  0  4  1]
 [ 9  0  2  3  8]]
0.6428571428571429
Testing Accuracy:
[[9 0 0 1 0]
 [1 0 0 0 0]
 [2 0 0 0 0]
 [2 0 1 0 0]
 [3 1 0 1 0]]
0.42857142857142855


## XGBoost

In [49]:
from xgboost import XGBClassifier
classifier = XGBClassifier()
classifier.fit(X_train, y_train)
from sklearn.metrics import confusion_matrix, accuracy_score
print("Training Accuracy:")
y_pred_train = classifier.predict(X_train)
cm_train = confusion_matrix(y_train, y_pred_train)
print(cm_train)
print(accuracy_score(y_train, y_pred_train))

print("Testing Accuracy:")
y_pred_test = classifier.predict(X_test)
cm_test = confusion_matrix(y_test, y_pred_test)
print(cm_test)
print(accuracy_score(y_test, y_pred_test))


Training Accuracy:
[[42  0  0  0  0]
 [ 0  4  0  0  0]
 [ 0  0  5  0  0]
 [ 0  0  0 11  0]
 [ 0  0  0  0 22]]
1.0
Testing Accuracy:
[[10  0  0  0  0]
 [ 0  0  0  0  1]
 [ 0  0  0  1  1]
 [ 1  0  0  0  2]
 [ 0  0  1  0  4]]
0.6666666666666666


## CatBoost

In [50]:
from catboost import CatBoostClassifier
classifier = CatBoostClassifier()
classifier.fit(X_train, y_train)
from sklearn.metrics import confusion_matrix, accuracy_score
print("Training Accuracy:")
y_pred_train = classifier.predict(X_train)
cm_train = confusion_matrix(y_train, y_pred_train)
print(cm_train)
print(accuracy_score(y_train, y_pred_train))

print("Testing Accuracy:")
y_pred_test = classifier.predict(X_test)
cm_test = confusion_matrix(y_test, y_pred_test)
print(cm_test)
print(accuracy_score(y_test, y_pred_test))


Learning rate set to 0.069737
0:	learn: 1.5692496	total: 2.13ms	remaining: 2.13s
1:	learn: 1.5253181	total: 3.97ms	remaining: 1.98s
2:	learn: 1.4848980	total: 5.9ms	remaining: 1.96s
3:	learn: 1.4389188	total: 7.66ms	remaining: 1.91s
4:	learn: 1.4072183	total: 9.24ms	remaining: 1.84s
5:	learn: 1.3717990	total: 10.8ms	remaining: 1.8s
6:	learn: 1.3365025	total: 12.6ms	remaining: 1.78s
7:	learn: 1.3086866	total: 14.2ms	remaining: 1.77s
8:	learn: 1.2796461	total: 16.1ms	remaining: 1.78s
9:	learn: 1.2516343	total: 17.8ms	remaining: 1.76s
10:	learn: 1.2302709	total: 19.5ms	remaining: 1.75s
11:	learn: 1.2093972	total: 21.1ms	remaining: 1.74s
12:	learn: 1.1861277	total: 22.8ms	remaining: 1.73s
13:	learn: 1.1597616	total: 24.7ms	remaining: 1.74s
14:	learn: 1.1410013	total: 26.4ms	remaining: 1.73s
15:	learn: 1.1231939	total: 28.1ms	remaining: 1.73s
16:	learn: 1.1042122	total: 29.8ms	remaining: 1.72s
17:	learn: 1.0888109	total: 31.6ms	remaining: 1.72s
18:	learn: 1.0707235	total: 33.3ms	remaining: 