## Importing the basic libraries

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

## Reading the dataset and splitting into the dependent and independent variables

In [2]:
dataset = pd.read_excel("Dataset4_with radiograph.xlsx")

feature_names = dataset.columns[:-4]

X = dataset.iloc[:, :-2]
y = dataset.iloc[:, -2]

## Checking the dependent variable

In [3]:
print(y.head)
print(y.describe())

<bound method NDFrame.head of 0      3
1      3
2      3
3      1
4      1
      ..
100    0
101    0
102    0
103    0
104    0
Name: GRADING OF PERIODONTITIS, Length: 105, dtype: int64>
count    105.000000
mean       1.000000
std        1.126601
min        0.000000
25%        0.000000
50%        1.000000
75%        2.000000
max        3.000000
Name: GRADING OF PERIODONTITIS, dtype: float64


## Training and Testing data split

In [4]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)


## Feature scaling

In [5]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)


## Dimensionality Reduction

In [6]:
from sklearn.decomposition import PCA
pca = PCA(n_components=10)
X_train = pca.fit_transform(X_train)
X_test = pca.transform(X_test)
component_weights = pca.components_

## Extracting the important feature names

In [7]:
feature_weights_mapping = {}
for i, component in enumerate(component_weights):
    component_feature_weights = zip(feature_names, component)
    sorted_feature_weights = sorted(
        component_feature_weights, key=lambda x: abs(x[1]), reverse=True
    )
    feature_weights_mapping[f"Component {i+1}"] = sorted_feature_weights
print("Feature names contributing to Principal Components:")
for component, weights in feature_weights_mapping.items():
    print(f"{component}:")
    for feature, weight in weights[:5]:
        print(f"  {feature}: {weight:.4f}")
    print()


Feature names contributing to Principal Components:
Component 1:
  BCAL 32: 0.0797
  DPD 32: 0.0780
  MPS 32: 0.0780
  DPS 32: 0.0780
  BGI 32: 0.0779

Component 2:
  ORAL HABITS: 0.0749
  DCAL 43: 0.0696
  DCS 33: 0.0691
  MCS 33: 0.0691
  LCS 33: 0.0691

Component 3:
  LCAL 33: 0.0908
  BPD 33: 0.0906
  LPS 43: 0.0905
  LPS 33: 0.0903
  MPS 33: 0.0902

Component 4:
  DPD 11: -0.0841
  MPD 11: -0.0823
  PPD 11: -0.0815
  PGI 11: -0.0802
  DGI 11: -0.0800

Component 5:
  MCS 25: 0.0829
  DCS 25: 0.0829
  PCS 25: 0.0829
  DCAL 25: 0.0827
  MGI 25: 0.0827

Component 6:
  DPD 24: 0.1182
  MPD 24: 0.1173
  DCAL 24: 0.1150
  MPS 24: 0.1143
  BPS 24: 0.1142

Component 7:
  LCAL 35: 0.1166
  BCAL 35: 0.1141
  DCS 35: 0.1134
  MCS 35: 0.1134
  LCS 35: 0.1134

Component 8:
  MCAL 27: 0.0915
  DCAL 27: 0.0915
  MCS 27: 0.0909
  DCS 27: 0.0909
  PCS 27: 0.0909

Component 9:
  MGI 23: -0.1244
  DGI 23: -0.1208
  BGI 44: -0.1189
  BGI 23: -0.1187
  PGI 23: -0.1181

Component 10:
  BCAL 16: -0.1198


## Random Forest Classifier

In [8]:
from sklearn.ensemble import RandomForestClassifier
classifier = RandomForestClassifier(n_estimators=10,criterion='entropy', random_state=0)
classifier.fit(X_train, y_train)

from sklearn.metrics import confusion_matrix, accuracy_score
print("Training Accuracy:")
y_pred_train = classifier.predict(X_train)
cm_train = confusion_matrix(y_train, y_pred_train)
print(cm_train)
print(accuracy_score(y_train, y_pred_train))

print("Testing Accuracy:")
y_pred_test = classifier.predict(X_test)
cm_test = confusion_matrix(y_test, y_pred_test)
print(cm_test)
print(accuracy_score(y_test, y_pred_test))


Training Accuracy:
[[42  0  0  0]
 [ 0 13  0  0]
 [ 0  1 17  0]
 [ 0  0  0 11]]
0.9880952380952381
Testing Accuracy:
[[10  0  0  0]
 [ 1  0  0  1]
 [ 1  2  1  2]
 [ 1  0  2  0]]
0.5238095238095238


## Decision Tree

In [9]:
from sklearn.tree import DecisionTreeClassifier
classifier = DecisionTreeClassifier(criterion = 'entropy', random_state = 0)
classifier.fit(X_train, y_train)

from sklearn.metrics import confusion_matrix, accuracy_score
print("Training Accuracy:")
y_pred_train = classifier.predict(X_train)
cm_train = confusion_matrix(y_train, y_pred_train)
print(cm_train)
print(accuracy_score(y_train, y_pred_train))

print("Testing Accuracy:")
y_pred_test = classifier.predict(X_test)
cm_test = confusion_matrix(y_test, y_pred_test)
print(cm_test)
print(accuracy_score(y_test, y_pred_test))


Training Accuracy:
[[42  0  0  0]
 [ 0 13  0  0]
 [ 0  0 18  0]
 [ 0  0  0 11]]
1.0
Testing Accuracy:
[[10  0  0  0]
 [ 0  1  1  0]
 [ 0  1  3  2]
 [ 1  0  2  0]]
0.6666666666666666


## Support Vector Machine

In [10]:
from sklearn.svm import SVC
classifier = SVC(kernel = 'linear', random_state = 0)
classifier.fit(X_train, y_train)
from sklearn.metrics import confusion_matrix, accuracy_score
print("Training Accuracy:")
y_pred_train = classifier.predict(X_train)
cm_train = confusion_matrix(y_train, y_pred_train)
print(cm_train)
print(accuracy_score(y_train, y_pred_train))

print("Testing Accuracy:")
y_pred_test = classifier.predict(X_test)
cm_test = confusion_matrix(y_test, y_pred_test)
print(cm_test)
print(accuracy_score(y_test, y_pred_test))


Training Accuracy:
[[41  0  1  0]
 [ 4  7  2  0]
 [ 2  2 14  0]
 [ 0  3  1  7]]
0.8214285714285714
Testing Accuracy:
[[9 1 0 0]
 [1 1 0 0]
 [2 2 1 1]
 [0 0 3 0]]
0.5238095238095238


## Kernel SVM

In [11]:
from sklearn.svm import SVC
classifier = SVC(kernel = 'rbf', random_state = 0)
classifier.fit(X_train, y_train)
from sklearn.metrics import confusion_matrix, accuracy_score
print("Training Accuracy:")
y_pred_train = classifier.predict(X_train)
cm_train = confusion_matrix(y_train, y_pred_train)
print(cm_train)
print(accuracy_score(y_train, y_pred_train))

print("Testing Accuracy:")
y_pred_test = classifier.predict(X_test)
cm_test = confusion_matrix(y_test, y_pred_test)
print(cm_test)
print(accuracy_score(y_test, y_pred_test))



Training Accuracy:
[[41  0  1  0]
 [ 8  2  3  0]
 [ 8  0 10  0]
 [ 3  0  5  3]]
0.6666666666666666
Testing Accuracy:
[[10  0  0  0]
 [ 2  0  0  0]
 [ 3  0  3  0]
 [ 3  0  0  0]]
0.6190476190476191


## K-Nearest Neighbors

In [12]:
from sklearn.neighbors import KNeighborsClassifier
classifier = KNeighborsClassifier(n_neighbors = 5, metric = 'minkowski', p = 2)
classifier.fit(X_train, y_train)
from sklearn.metrics import confusion_matrix, accuracy_score
print("Training Accuracy:")
y_pred_train = classifier.predict(X_train)
cm_train = confusion_matrix(y_train, y_pred_train)
print(cm_train)
print(accuracy_score(y_train, y_pred_train))

print("Testing Accuracy:")
y_pred_test = classifier.predict(X_test)
cm_test = confusion_matrix(y_test, y_pred_test)
print(cm_test)
print(accuracy_score(y_test, y_pred_test))


Training Accuracy:
[[41  1  0  0]
 [ 3  8  2  0]
 [ 4  5  9  0]
 [ 4  0  4  3]]
0.7261904761904762
Testing Accuracy:
[[10  0  0  0]
 [ 1  0  1  0]
 [ 1  2  0  3]
 [ 2  1  0  0]]
0.47619047619047616


## Logistic Regression

In [13]:
from sklearn.linear_model import LogisticRegression
classifier = LogisticRegression(random_state = 0)
classifier.fit(X_train, y_train)
from sklearn.metrics import confusion_matrix, accuracy_score
print("Training Accuracy:")
y_pred_train = classifier.predict(X_train)
cm_train = confusion_matrix(y_train, y_pred_train)
print(cm_train)
print(accuracy_score(y_train, y_pred_train))

print("Testing Accuracy:")
y_pred_test = classifier.predict(X_test)
cm_test = confusion_matrix(y_test, y_pred_test)
print(cm_test)
print(accuracy_score(y_test, y_pred_test))


Training Accuracy:
[[38  0  4  0]
 [ 6  3  4  0]
 [ 2  3 13  0]
 [ 2  0  2  7]]
0.7261904761904762
Testing Accuracy:
[[9 0 0 1]
 [2 0 0 0]
 [2 1 1 2]
 [0 1 2 0]]
0.47619047619047616


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


## Naive Bayes

In [14]:
from sklearn.naive_bayes import GaussianNB
classifier = GaussianNB()
classifier.fit(X_train, y_train)
from sklearn.metrics import confusion_matrix, accuracy_score
print("Training Accuracy:")
y_pred_train = classifier.predict(X_train)
cm_train = confusion_matrix(y_train, y_pred_train)
print(cm_train)
print(accuracy_score(y_train, y_pred_train))

print("Testing Accuracy:")
y_pred_test = classifier.predict(X_test)
cm_test = confusion_matrix(y_test, y_pred_test)
print(cm_test)
print(accuracy_score(y_test, y_pred_test))


Training Accuracy:
[[38  1  1  2]
 [ 8  3  2  0]
 [ 7  2  9  0]
 [ 3  1  4  3]]
0.6309523809523809
Testing Accuracy:
[[9 0 1 0]
 [2 0 0 0]
 [4 1 1 0]
 [3 0 0 0]]
0.47619047619047616


## XGBoost

In [15]:
from xgboost import XGBClassifier
classifier = XGBClassifier()
classifier.fit(X_train, y_train)
from sklearn.metrics import confusion_matrix, accuracy_score
print("Training Accuracy:")
y_pred_train = classifier.predict(X_train)
cm_train = confusion_matrix(y_train, y_pred_train)
print(cm_train)
print(accuracy_score(y_train, y_pred_train))

print("Testing Accuracy:")
y_pred_test = classifier.predict(X_test)
cm_test = confusion_matrix(y_test, y_pred_test)
print(cm_test)
print(accuracy_score(y_test, y_pred_test))


Training Accuracy:
[[42  0  0  0]
 [ 0 13  0  0]
 [ 0  0 18  0]
 [ 0  0  0 11]]
1.0
Testing Accuracy:
[[10  0  0  0]
 [ 0  1  0  1]
 [ 0  0  5  1]
 [ 1  0  2  0]]
0.7619047619047619


## CatBoost

In [16]:
from catboost import CatBoostClassifier
classifier = CatBoostClassifier()
classifier.fit(X_train, y_train)
from sklearn.metrics import confusion_matrix, accuracy_score
print("Training Accuracy:")
y_pred_train = classifier.predict(X_train)
cm_train = confusion_matrix(y_train, y_pred_train)
print(cm_train)
print(accuracy_score(y_train, y_pred_train))

print("Testing Accuracy:")
y_pred_test = classifier.predict(X_test)
cm_test = confusion_matrix(y_test, y_pred_test)
print(cm_test)
print(accuracy_score(y_test, y_pred_test))


Learning rate set to 0.069737
0:	learn: 1.3551784	total: 145ms	remaining: 2m 25s
1:	learn: 1.3239294	total: 148ms	remaining: 1m 13s
2:	learn: 1.2972645	total: 150ms	remaining: 49.8s
3:	learn: 1.2612413	total: 151ms	remaining: 37.7s
4:	learn: 1.2280032	total: 153ms	remaining: 30.4s
5:	learn: 1.2011215	total: 154ms	remaining: 25.6s
6:	learn: 1.1736670	total: 156ms	remaining: 22.1s
7:	learn: 1.1502298	total: 158ms	remaining: 19.6s
8:	learn: 1.1258456	total: 159ms	remaining: 17.5s
9:	learn: 1.1025862	total: 160ms	remaining: 15.8s
10:	learn: 1.0840730	total: 161ms	remaining: 14.5s
11:	learn: 1.0647567	total: 163ms	remaining: 13.4s
12:	learn: 1.0469323	total: 165ms	remaining: 12.5s
13:	learn: 1.0334000	total: 166ms	remaining: 11.7s
14:	learn: 1.0183371	total: 168ms	remaining: 11s
15:	learn: 1.0004348	total: 169ms	remaining: 10.4s
16:	learn: 0.9868614	total: 171ms	remaining: 9.86s
17:	learn: 0.9727540	total: 172ms	remaining: 9.38s
18:	learn: 0.9586528	total: 173ms	remaining: 8.95s
19:	learn: 