In [25]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [26]:
df=pd.read_csv("heart.csv")

In [27]:
df.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,63,1,3,145,233,1,0,150,0,2.3,0,0,1,1
1,37,1,2,130,250,0,1,187,0,3.5,0,0,2,1
2,41,0,1,130,204,0,0,172,0,1.4,2,0,2,1
3,56,1,1,120,236,0,1,178,0,0.8,2,0,2,1
4,57,0,0,120,354,0,1,163,1,0.6,2,0,2,1


In [28]:
df.isnull().sum()

age         0
sex         0
cp          0
trestbps    0
chol        0
fbs         0
restecg     0
thalach     0
exang       0
oldpeak     0
slope       0
ca          0
thal        0
target      0
dtype: int64

In [29]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report

In [30]:
def model_olustur(X, y):
    X_train, X_test, y_train, y_test =  train_test_split(X, y, test_size=0.20, random_state=111, stratify = y)
    logreg_model = LogisticRegression()
    logreg_model.fit(X_train, y_train)

    tahmin_eğitim = logreg_model.predict(X_train)
    tahmin_test = logreg_model.predict(X_test)
    hata_matrisi_eğitim = confusion_matrix(y_train, tahmin_eğitim)
    hata_matrisi_test = confusion_matrix(y_test, tahmin_test)
    print("Modelin doğruluk değeri : ",  logreg_model.score(X_test, y_test))
    print("Eğitim veri kümesi")
    print(classification_report(y_train,tahmin_eğitim) )
    print("Test veri kümesi")
    print(classification_report(y_test,tahmin_test) )
    return  None

In [31]:
X = df.drop('target', axis=1)
y = df['target']
model_olustur(X,y)

Modelin doğruluk değeri :  0.7213114754098361
Eğitim veri kümesi
              precision    recall  f1-score   support

           0       0.91      0.80      0.85       110
           1       0.85      0.93      0.89       132

    accuracy                           0.87       242
   macro avg       0.88      0.87      0.87       242
weighted avg       0.88      0.87      0.87       242

Test veri kümesi
              precision    recall  f1-score   support

           0       0.70      0.68      0.69        28
           1       0.74      0.76      0.75        33

    accuracy                           0.72        61
   macro avg       0.72      0.72      0.72        61
weighted avg       0.72      0.72      0.72        61





In [33]:
hasta_kisiler = df[df.target == 1]
saglikli_kisiler = df[df.target == 0]
print('Sağlıklı sayısı:', len(saglikli_kisiler))
print('Hasta Sayısı:', len(hasta_kisiler))

Sağlıklı sayısı: 138
Hasta Sayısı: 165


#### Veri kümemiz pek dengesiz bir veri kümesi değildir fakat yinede örneklem artırma ve azaltma yöntemlerini deneyeceğiz.

### Örneklem Arttırma

In [43]:
from sklearn.utils import resample

saglikli_arttırılmıs = resample(saglikli_kisiler,
                                     replace = True,
                                     n_samples = len(hasta_kisiler),
                                     random_state = 11)

artırılmıs_df = pd.concat([hasta_kisiler, saglikli_arttırılmıs])
artırılmıs_df.target.value_counts()

1    165
0    165
Name: target, dtype: int64

In [44]:
X = artırılmıs_df.drop('target', axis=1)
y = artırılmıs_df['target']
model_olustur(X,y)

Modelin doğruluk değeri :  0.8484848484848485
Eğitim veri kümesi
              precision    recall  f1-score   support

           0       0.88      0.80      0.84       132
           1       0.82      0.89      0.86       132

    accuracy                           0.85       264
   macro avg       0.85      0.85      0.85       264
weighted avg       0.85      0.85      0.85       264

Test veri kümesi
              precision    recall  f1-score   support

           0       0.90      0.79      0.84        33
           1       0.81      0.91      0.86        33

    accuracy                           0.85        66
   macro avg       0.85      0.85      0.85        66
weighted avg       0.85      0.85      0.85        66





### Örneklem Azaltma

In [47]:
hasta_azaltılmıs = resample(hasta_kisiler,
                                     replace = True,
                                     n_samples = len(saglikli_kisiler),
                                     random_state = 10)

azaltılmıs_df = pd.concat([saglikli_kisiler, hasta_azaltılmıs])
azaltılmıs_df.target.value_counts()

1    138
0    138
Name: target, dtype: int64

In [48]:
X = azaltılmıs_df.drop('target', axis=1)
y = azaltılmıs_df['target']
model_olustur(X,y)

Modelin doğruluk değeri :  0.8214285714285714
Eğitim veri kümesi
              precision    recall  f1-score   support

           0       0.86      0.83      0.84       110
           1       0.83      0.86      0.85       110

    accuracy                           0.85       220
   macro avg       0.85      0.85      0.85       220
weighted avg       0.85      0.85      0.85       220

Test veri kümesi
              precision    recall  f1-score   support

           0       0.91      0.71      0.80        28
           1       0.76      0.93      0.84        28

    accuracy                           0.82        56
   macro avg       0.84      0.82      0.82        56
weighted avg       0.84      0.82      0.82        56





### Smote

In [51]:
from imblearn.over_sampling import SMOTE
y = df.target
X = df.drop('target', axis=1)

sm = SMOTE(random_state=27, ratio=1.0)
X_smote, y_smote = sm.fit_sample(X, y)
model_olustur(X_smote, y_smote)

Modelin doğruluk değeri :  0.8484848484848485
Eğitim veri kümesi
              precision    recall  f1-score   support

           0       0.87      0.83      0.84       132
           1       0.83      0.87      0.85       132

    accuracy                           0.85       264
   macro avg       0.85      0.85      0.85       264
weighted avg       0.85      0.85      0.85       264

Test veri kümesi
              precision    recall  f1-score   support

           0       0.87      0.82      0.84        33
           1       0.83      0.88      0.85        33

    accuracy                           0.85        66
   macro avg       0.85      0.85      0.85        66
weighted avg       0.85      0.85      0.85        66





### Adasyn

In [54]:
from imblearn.over_sampling import ADASYN
y = df.target
X = df.drop('target', axis=1)

ad = ADASYN()
X_adasyn, y_adasyn = ad.fit_sample(X, y)
model_olustur(X_adasyn, y_adasyn)

ValueError: No samples will be generated with the provided ratio settings.