In [79]:
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier

In [80]:
df = pd.read_csv('data/diabetes.csv')
X, y = df[df.columns[:-1]], df[df.columns[-1]]

In [81]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

# LogisticRegression

In [82]:
logistic_regression = LogisticRegression(max_iter=1000)
logistic_regression.fit(X_train, y_train)
logistic_regression_predict = logistic_regression.predict(X_test)

In [83]:
print(classification_report(y_test, logistic_regression_predict))

              precision    recall  f1-score   support

           0       0.81      0.84      0.82       103
           1       0.65      0.59      0.62        51

    accuracy                           0.76       154
   macro avg       0.73      0.72      0.72       154
weighted avg       0.75      0.76      0.76       154



# KNeighborsClassifier

In [84]:
k_neighbors_classifier = KNeighborsClassifier()
k_neighbors_classifier.fit(X_train, y_train)
k_neighbors_classifier_predict = k_neighbors_classifier.predict(X_test)

In [85]:
print(classification_report(y_test, k_neighbors_classifier_predict))

              precision    recall  f1-score   support

           0       0.82      0.79      0.80       103
           1       0.60      0.65      0.62        51

    accuracy                           0.74       154
   macro avg       0.71      0.72      0.71       154
weighted avg       0.75      0.74      0.74       154



# DecisionTreeClassifier

In [86]:
decision_tree_classifier = DecisionTreeClassifier()
decision_tree_classifier.fit(X_train, y_train)
decision_tree_classifier_predict = decision_tree_classifier.predict(X_test)

In [87]:
print(classification_report(y_test, decision_tree_classifier_predict))

              precision    recall  f1-score   support

           0       0.79      0.74      0.76       103
           1       0.53      0.61      0.57        51

    accuracy                           0.69       154
   macro avg       0.66      0.67      0.67       154
weighted avg       0.71      0.69      0.70       154



# Accuracy сравнение

In [88]:
comparison = pd.DataFrame({
    'LogisticRegression': [accuracy_score(y_test, logistic_regression_predict)],
    'KNeighborsClassifier': [accuracy_score(y_test, k_neighbors_classifier_predict)],
    'DecisionTreeClassifier': [accuracy_score(y_test, decision_tree_classifier_predict)],
}, index=['accuracy']).transpose()
comparison.sort_values('accuracy', ascending=False)

Unnamed: 0,accuracy
LogisticRegression,0.75974
KNeighborsClassifier,0.74026
DecisionTreeClassifier,0.694805


In [89]:
print(f'Лучшая модель: {comparison.idxmax().item()}')
print(f'Худшая модель: {comparison.idxmin().item()}')

Лучшая модель: LogisticRegression
Худшая модель: DecisionTreeClassifier
