In [8]:
#without pca
import pandas as pd
import numpy as np
from sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier, StackingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split, StratifiedKFold, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score, recall_score, f1_score

df = pd.read_csv(r"D:\chronic_kidney_disease_formatted.csv")

X = df.drop(columns=['class']).values
y = df['class'].values

# Standardize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, stratify=y, random_state=42)

gb_model = GradientBoostingClassifier(
    n_estimators=50,
    learning_rate=0.015,
    subsample=0.7,          
    max_depth=2,
    min_samples_leaf=8,
    min_samples_split=15,
    random_state=42
)

rf_model = RandomForestClassifier(
    n_estimators=50,
    max_depth=3,
    min_samples_leaf=8,
    min_samples_split=15,
    max_features='sqrt',    # Dropout-like effect
    random_state=42
)
# Meta-model with stronger regularization
meta_model = LogisticRegression(
    max_iter=1000,
    penalty='l2',
    C=0.1,
    solver='lbfgs',
    random_state=42
)

stacking_model = StackingClassifier(
    estimators=[
        ('gb', gb_model),
        ('rf', rf_model)
    ],
    final_estimator=meta_model,
    passthrough=False,
    cv=5,
    n_jobs=-1
)

# Perform cross-validation
cv = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)

# Cross-validation score 
cv_scores = cross_val_score(stacking_model, X_scaled, y, cv=cv, scoring='accuracy')


# Train the stacking model on the full training data
stacking_model.fit(X_train, y_train)

y_pred = stacking_model.predict(X_test)

train_accuracy = accuracy_score(y_train, stacking_model.predict(X_train))
test_accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')

# Output evaluation metrics
print("\nTraining Accuracy:", train_accuracy)
print("Testing Accuracy:", test_accuracy)
print("Confusion Matrix:\n", conf_matrix)
print("Precision:", precision)
print("Recall:", recall)
print("F1-score:", f1)



Training Accuracy: 0.9875389408099688
Testing Accuracy: 0.9753086419753086
Confusion Matrix:
 [[29  1]
 [ 1 50]]
Precision: 0.9753086419753086
Recall: 0.9753086419753086
F1-score: 0.9753086419753086


In [7]:
#with pca
import pandas as pd
import numpy as np
from sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier, StackingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split, StratifiedKFold, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score, recall_score, f1_score

df = pd.read_csv(r"D:\chronic_kidney_disease_formatted.csv")

X = df.drop(columns=['class']).values
y = df['class'].values

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Apply PCA to reduce dimensionality 
pca = PCA(n_components=0.95, random_state=42)  
X_pca = pca.fit_transform(X_scaled)

X_train, X_test, y_train, y_test = train_test_split(X_pca, y, test_size=0.2, stratify=y, random_state=42)

gb_model = GradientBoostingClassifier(
    n_estimators=50,
    learning_rate=0.015,
    subsample=0.7,          
    max_depth=2,
    min_samples_leaf=8,
    min_samples_split=15,
    random_state=42
)

rf_model = RandomForestClassifier(
    n_estimators=50,
    max_depth=3,
    min_samples_leaf=8,
    min_samples_split=15,
    max_features='sqrt',    # Dropout-like effect
    random_state=42
)

# Meta-model with stronger regularization
meta_model = LogisticRegression(
    max_iter=1000,
    penalty='l2',
    C=0.1,
    solver='lbfgs',
    random_state=42
)

# Stacking model with passthrough turned OFF
stacking_model = StackingClassifier(
    estimators=[
        ('gb', gb_model),
        ('rf', rf_model)
    ],
    final_estimator=meta_model,
    passthrough=False,
    cv=5,
    n_jobs=-1
)

# Perform cross-validation
cv = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)

# Cross-validation score 
cv_scores = cross_val_score(stacking_model, X_pca, y, cv=cv, scoring='accuracy')


# Train the stacking model on the full training data
stacking_model.fit(X_train, y_train)

# Predict and evaluate on the test set
y_pred = stacking_model.predict(X_test)

train_accuracy = accuracy_score(y_train, stacking_model.predict(X_train))
test_accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')

# Output evaluation metrics
print("\nTraining Accuracy:", train_accuracy)
print("Testing Accuracy:", test_accuracy)
print("Confusion Matrix:\n", conf_matrix)
print("Precision:", precision)
print("Recall:", recall)
print("F1-score:", f1)



Training Accuracy: 0.9937694704049844
Testing Accuracy: 0.9629629629629629
Confusion Matrix:
 [[29  1]
 [ 2 49]]
Precision: 0.9635125448028672
Recall: 0.9629629629629629
F1-score: 0.9630831935652582
