In [5]:
#without pca 
import pandas as pd
import numpy as np
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import train_test_split, StratifiedKFold, cross_val_score
from sklearn.feature_selection import RFE
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score, recall_score, f1_score

# Load dataset
df = pd.read_csv(r"D:\chronic_kidney_disease_formatted.csv")

# Separate features and target
X = df.drop(columns=['class']).values  
y = df['class'].values  

# Standardize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Base Gradient Boosting model for RFE
base_gb = GradientBoostingClassifier(random_state=42, n_estimators=100, learning_rate=0.1, subsample=0.8, max_depth=3)

# Recursive Feature Elimination with 6 features
rfe = RFE(estimator=base_gb, n_features_to_select=6)
X_rfe = rfe.fit_transform(X_scaled, y)

# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(X_rfe, y, test_size=0.2, random_state=42, stratify=y)

# Final Gradient Boosting model
gb_model = GradientBoostingClassifier(
    n_estimators=80,
    learning_rate=0.05,
    subsample=0.8,
    max_depth=2,
    min_samples_leaf=5,
    min_samples_split=10,
    random_state=42
)
gb_model.fit(X_train, y_train)
y_pred = gb_model.predict(X_test)

# Evaluation metrics
train_accuracy = accuracy_score(y_train, gb_model.predict(X_train))
test_accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')

# Stratified Cross-Validation
cv_stratified = StratifiedKFold(n_splits=15, shuffle=True, random_state=42)
stratified_scores = cross_val_score(gb_model, X_rfe, y, cv=cv_stratified, scoring='accuracy')

# Output results
print("Training Accuracy:", train_accuracy)
print("Testing Accuracy:", test_accuracy)
print("Confusion Matrix:\n", conf_matrix)
print("Precision:", precision)
print("Recall:", recall)
print("F1-score:", f1)


Training Accuracy: 0.9906542056074766
Testing Accuracy: 0.9753086419753086
Confusion Matrix:
 [[29  1]
 [ 1 50]]
Precision: 0.9753086419753086
Recall: 0.9753086419753086
F1-score: 0.9753086419753086


In [7]:
#with pca
import pandas as pd
import numpy as np
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import train_test_split, StratifiedKFold, cross_val_score
from sklearn.feature_selection import RFE
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score, recall_score, f1_score

# Load dataset
df = pd.read_csv(r"D:\chronic_kidney_disease_formatted.csv")

# Separate features and target
X = df.drop(columns=['class']).values  
y = df['class'].values  

# Standardize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

pca = PCA(n_components=0.95, random_state=42)
X_pca = pca.fit_transform(X_scaled)

# Base Gradient Boosting model for RFE
base_gb = GradientBoostingClassifier(
    n_estimators=80,
    learning_rate=0.05,
    subsample=0.8,
    max_depth=2,
    min_samples_leaf=5,
    min_samples_split=10,
    random_state=42
)

# Apply RFE after PCA
rfe = RFE(estimator=base_gb, n_features_to_select=6)
X_rfe = rfe.fit_transform(X_pca, y)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_rfe, y, test_size=0.2, random_state=42, stratify=y)

# Final model training
gb_model = GradientBoostingClassifier(
    n_estimators=80,
    learning_rate=0.05,
    subsample=0.8,
    max_depth=2,
    min_samples_leaf=5,
    min_samples_split=10,
    random_state=42
)
gb_model.fit(X_train, y_train)
y_pred = gb_model.predict(X_test)

# Evaluation metrics
train_accuracy = accuracy_score(y_train, gb_model.predict(X_train))
test_accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')

# Cross-validation
cv_stratified = StratifiedKFold(n_splits=15, shuffle=True, random_state=42)
stratified_scores = cross_val_score(gb_model, X_rfe, y, cv=cv_stratified, scoring='accuracy')

# Output
print("Training Accuracy:", train_accuracy)
print("Testing Accuracy:", test_accuracy)
print("Confusion Matrix:\n", conf_matrix)
print("Precision:", precision)
print("Recall:", recall)
print("F1-score:", f1)



Training Accuracy: 1.0
Testing Accuracy: 0.9753086419753086
Confusion Matrix:
 [[30  0]
 [ 2 49]]
Precision: 0.9768518518518519
Recall: 0.9753086419753086
F1-score: 0.9754599761051372
