# *K-Means* İle Kalp Hastalığı Teşhisi yapmak

### Izlenecek adimlar:

1) Gerekli kütüphaneleri yüklemek

2) *Dataseti* inceleyip Analiz yapmak

3) Modeli Çekip *dataseti* modele tanıtmak

4) Modeli test etmek 

5) Çıkan sonuçlar karşılaştırarak doğruluk oranı belirlemek (*accuracy score*)


In [None]:
# pandas tablo halinde gösteren önemli bir kütüphanedir
# numpy kısacası Numerıcal python diye geçen dizileri matris haline getiren bir kütüphanedir
# k_means modelimizdir 
# MAE (mean absolute error) testin doğruluk oranını ölçer
# train_test_split içine ayarları yaptığımız bir komuttur
# 1.adim
import numpy as np
import pandas as pd
from sklearn.cluster import KMeans
from sklearn.model_selection import train_test_split
from sklearn.metrics import adjusted_rand_score
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error


In [None]:
# 2.adim
# bir değişkene dataseti tanımladık
df = pd.read_csv("heart.csv")

In [3]:
# tablo sklinde goruntuleyebiliyoruz
df.head(30)

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,52,1,0,125,212,0,1,168,0,1.0,2,2,3,0
1,53,1,0,140,203,1,0,155,1,3.1,0,0,3,0
2,70,1,0,145,174,0,1,125,1,2.6,0,0,3,0
3,61,1,0,148,203,0,1,161,0,0.0,2,1,3,0
4,62,0,0,138,294,1,1,106,0,1.9,1,3,2,0
5,58,0,0,100,248,0,0,122,0,1.0,1,0,2,1
6,58,1,0,114,318,0,2,140,0,4.4,0,3,1,0
7,55,1,0,160,289,0,0,145,1,0.8,1,1,3,0
8,46,1,0,120,249,0,0,144,0,0.8,2,0,3,0
9,54,1,0,122,286,0,0,116,1,3.2,1,2,2,0


In [4]:
# datasetin hakkinda bilgiler verir
df.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1025 entries, 0 to 1024
Data columns (total 14 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   age       1025 non-null   int64  
 1   sex       1025 non-null   int64  
 2   cp        1025 non-null   int64  
 3   trestbps  1025 non-null   int64  
 4   chol      1025 non-null   int64  
 5   fbs       1025 non-null   int64  
 6   restecg   1025 non-null   int64  
 7   thalach   1025 non-null   int64  
 8   exang     1025 non-null   int64  
 9   oldpeak   1025 non-null   float64
 10  slope     1025 non-null   int64  
 11  ca        1025 non-null   int64  
 12  thal      1025 non-null   int64  
 13  target    1025 non-null   int64  
dtypes: float64(1), int64(13)
memory usage: 112.2 KB


In [5]:
# featurlari bize numpyarry formatinda verir
df.columns.infer_objects()

Index(['age', 'sex', 'cp', 'trestbps', 'chol', 'fbs', 'restecg', 'thalach',
       'exang', 'oldpeak', 'slope', 'ca', 'thal', 'target'],
      dtype='object')

In [6]:
# X train degiskenimize featurlari tanimliyoruz 
# stadartscaler promptu bize feature degerlerini daha tutarili ve makinenin daha net bir deger tahmini yapmasini sagliyor 
X_cluster = df[['age', 'sex', 'cp', 'trestbps', 'chol', 'fbs', 'restecg', 'thalach',
       'exang', 'oldpeak', 'slope', 'ca', 'thal']]
# fit_transform ise donusum yapar
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_cluster)

In [None]:
# k-means modelimizi cagirip ayarlarini duzenliyoruz
# n_cluster kumelenecek kume sayisini belirtiyoruz
# random_state bir degiskendir ve herhangi bir deger girilebilir

# 3.adim ve 4.adim (fit_predict)
kmeans = KMeans(n_clusters=2, random_state=25)
cluster_labels = kmeans.fit_predict(X_scaled)

# fir predict promptu ile hem degerleri modele taniyoruz hem tahmin yapmasini sagliyoruz
# ve yeni bir "cluster" adinda yeni bir target seklinde yerlestimesini sagliyoruz
df["cluster"]= cluster_labels

df.head(30)

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target,cluster
0,52,1,0,125,212,0,1,168,0,1.0,2,2,3,0,0
1,53,1,0,140,203,1,0,155,1,3.1,0,0,3,0,1
2,70,1,0,145,174,0,1,125,1,2.6,0,0,3,0,1
3,61,1,0,148,203,0,1,161,0,0.0,2,1,3,0,0
4,62,0,0,138,294,1,1,106,0,1.9,1,3,2,0,1
5,58,0,0,100,248,0,0,122,0,1.0,1,0,2,1,0
6,58,1,0,114,318,0,2,140,0,4.4,0,3,1,0,1
7,55,1,0,160,289,0,0,145,1,0.8,1,1,3,0,1
8,46,1,0,120,249,0,0,144,0,0.8,2,0,3,0,0
9,54,1,0,122,286,0,0,116,1,3.2,1,2,2,0,1


In [None]:
centers = kmeans.cluster_centers_
# n kume kadar her kumenin icerigine bakiyoruz 
for i,centers in enumerate(centers):
    print(
        f"kume {i}: "
        f"Ortalama yas = {centers[0]:.2f}, "
        f"ortalama kalp basıncı = {centers[1]:.2f}, "
        f"ortalama kolesterol = {centers[2]:.2f}"
    )


kume 0: Ortalama yas = -0.26, ortalama kalp basıncı = -0.06, ortalama kolesterol = 0.40
kume 1: Ortalama yas = 0.45, ortalama kalp basıncı = 0.11, ortalama kolesterol = -0.68


In [None]:
cluster_2 = df[df["cluster"]== 1]
# kume 1 icin tablo halinde gostermesini istiyoruz
print(cluster_2)

      age  sex  cp  trestbps  chol  fbs  restecg  thalach  exang  oldpeak  \
1      53    1   0       140   203    1        0      155      1      3.1   
2      70    1   0       145   174    0        1      125      1      2.6   
4      62    0   0       138   294    1        1      106      0      1.9   
6      58    1   0       114   318    0        2      140      0      4.4   
7      55    1   0       160   289    0        0      145      1      0.8   
...   ...  ...  ..       ...   ...  ...      ...      ...    ...      ...   
1015   58    1   0       128   216    0        0      131      1      2.2   
1017   53    1   0       123   282    0        1       95      1      2.0   
1021   60    1   0       125   258    0        0      141      1      2.8   
1022   47    1   0       110   275    0        0      118      1      1.0   
1024   54    1   0       120   188    0        1      113      0      1.4   

      slope  ca  thal  target  cluster  
1         0   0     3       0     

In [None]:
# 5.adim ve son adim modelin yaptigi isin dogrulugunu bulmak icin bu yontemi kullaniriz
arc = mean_squared_error(df["target"], cluster_labels)
print(arc)

0.7931707317073171
