In [1]:
pip install ucimlrepo

Note: you may need to restart the kernel to use updated packages.


In [2]:
from ucimlrepo import fetch_ucirepo



from ucimlrepo import fetch_ucirepo 
  
# fetch dataset 
chronic_kidney_disease = fetch_ucirepo(id=336) 
  
# data (as pandas dataframes) 
X = chronic_kidney_disease.data.features 
y = chronic_kidney_disease.data.targets 
  
# metadata 
print(chronic_kidney_disease.metadata) 
  
# variable information 
print(chronic_kidney_disease.variables)

{'uci_id': 336, 'name': 'Chronic Kidney Disease', 'repository_url': 'https://archive.ics.uci.edu/dataset/336/chronic+kidney+disease', 'data_url': 'https://archive.ics.uci.edu/static/public/336/data.csv', 'abstract': 'This dataset can be used to predict the chronic kidney disease and it can be collected from the hospital nearly 2 months of period.', 'area': 'Other', 'tasks': ['Classification'], 'characteristics': ['Multivariate'], 'num_instances': 400, 'num_features': 24, 'feature_types': ['Real'], 'demographics': ['Age'], 'target_col': ['class'], 'index_col': None, 'has_missing_values': 'yes', 'missing_values_symbol': 'NaN', 'year_of_dataset_creation': 2015, 'last_updated': 'Mon Mar 04 2024', 'dataset_doi': '10.24432/C5G020', 'creators': ['L. Rubini', 'P. Soundarapandian', 'P. Eswaran'], 'intro_paper': None, 'additional_info': {'summary': 'We use the following representation to collect the dataset\r\n                        age\t\t-\tage\t\r\n\t\t\tbp\t\t-\tblood pressure\r\n\t\t\tsg\t

In [3]:
from sklearn.impute import SimpleImputer

# En sık tekrar eden değer ile doldurma
imputer = SimpleImputer(strategy='most_frequent')
X_filled_mode = imputer.fit_transform(X)

In [4]:
print(X_filled_mode.shape)

(400, 24)


In [6]:
from sklearn.preprocessing import OneHotEncoder

# Kategorik sütunları belirleme
categorical_column_indices = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]

# Kategorik sütunları dönüştürme
encoder = OneHotEncoder()  # Remove sparse=False
X_categorical = encoder.fit_transform(X_filled_mode[:, categorical_column_indices])


In [8]:
from sklearn.model_selection import train_test_split  # Importing train_test_split

# Bağımsız değişkenlerin ve bağımlı değişkenin eğitim ve test setlerine bölünmesi
X_train, X_test, y_train, y_test = train_test_split(X_categorical, y, test_size=0.2, random_state=42)

# Etiket verilerini sütun vektörüne dönüştürme
y_train = y_train.values.ravel()
y_test = y_test.values.ravel()

from sklearn.svm import SVC
# Modeli tanımlama ve eğitme
model = SVC()
model.fit(X_train, y_train)

# Eğitilmiş modelle test seti üzerinde tahmin yapma
y_pred = model.predict(X_test)

# Modelin performansını değerlendirme
from sklearn.metrics import classification_report
print(classification_report(y_test, y_pred))


              precision    recall  f1-score   support

         ckd       1.00      0.96      0.98        52
      notckd       0.93      1.00      0.97        28

    accuracy                           0.97        80
   macro avg       0.97      0.98      0.97        80
weighted avg       0.98      0.97      0.98        80



In [11]:

from sklearn.ensemble import RandomForestClassifier

# Modeli tanımlama ve eğitme
rf_model = RandomForestClassifier()
rf_model.fit(X_train, y_train)

# Eğitilmiş modelle test seti üzerinde tahmin yapma
rf_y_pred = rf_model.predict(X_test)

# Modelin performansını değerlendirme
print("Random Forest Modeli Performansı:")
print(classification_report(y_test, rf_y_pred))

Random Forest Modeli Performansı:
              precision    recall  f1-score   support

         ckd       1.00      0.94      0.97        52
      notckd       0.90      1.00      0.95        28

    accuracy                           0.96        80
   macro avg       0.95      0.97      0.96        80
weighted avg       0.97      0.96      0.96        80



In [12]:
from sklearn.neighbors import KNeighborsClassifier

# Modeli tanımlama ve eğitme
knn_model = KNeighborsClassifier()
knn_model.fit(X_train, y_train)

# Eğitilmiş modelle test seti üzerinde tahmin yapma
knn_y_pred = knn_model.predict(X_test)

# Modelin performansını değerlendirme
print("K-Nearest Neighbors (KNN) Modeli Performansı:")
print(classification_report(y_test, knn_y_pred))

K-Nearest Neighbors (KNN) Modeli Performansı:
              precision    recall  f1-score   support

         ckd       0.96      0.83      0.89        52
      notckd       0.74      0.93      0.83        28

    accuracy                           0.86        80
   macro avg       0.85      0.88      0.86        80
weighted avg       0.88      0.86      0.87        80



In [13]:
from sklearn.tree import DecisionTreeClassifier

# Modeli tanımlama ve eğitme
dt_model = DecisionTreeClassifier()
dt_model.fit(X_train, y_train)

# Eğitilmiş modelle test seti üzerinde tahmin yapma
dt_y_pred = dt_model.predict(X_test)

# Modelin performansını değerlendirme
print("Decision Trees (Karar Ağaçları) Modeli Performansı:")
print(classification_report(y_test, dt_y_pred))

Decision Trees (Karar Ağaçları) Modeli Performansı:
              precision    recall  f1-score   support

         ckd       0.92      0.88      0.90        52
      notckd       0.80      0.86      0.83        28

    accuracy                           0.88        80
   macro avg       0.86      0.87      0.86        80
weighted avg       0.88      0.88      0.88        80



In [14]:
from sklearn.linear_model import LogisticRegression

# Modeli tanımlama ve eğitme
logreg_model = LogisticRegression()
logreg_model.fit(X_train, y_train)

# Eğitilmiş modelle test seti üzerinde tahmin yapma
logreg_y_pred = logreg_model.predict(X_test)

# Modelin performansını değerlendirme
print("Lojistik Regresyon Modeli Performansı:")
print(classification_report(y_test, logreg_y_pred))

Lojistik Regresyon Modeli Performansı:
              precision    recall  f1-score   support

         ckd       1.00      0.96      0.98        52
      notckd       0.93      1.00      0.97        28

    accuracy                           0.97        80
   macro avg       0.97      0.98      0.97        80
weighted avg       0.98      0.97      0.98        80



In [15]:
from sklearn.neural_network import MLPClassifier

# Yapay sinir ağı modelini tanımlama ve parametrelerini belirleme
model = MLPClassifier(hidden_layer_sizes=(100, 50), max_iter=500)

# Modeli eğitme
model.fit(X_train, y_train)

# Eğitilmiş modelle test seti üzerinde tahmin yapma
y_pred = model.predict(X_test)

# Modelin performansını değerlendirme
from sklearn.metrics import classification_report
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

         ckd       1.00      0.94      0.97        52
      notckd       0.90      1.00      0.95        28

    accuracy                           0.96        80
   macro avg       0.95      0.97      0.96        80
weighted avg       0.97      0.96      0.96        80

