In [18]:
import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score

In [19]:
df=pd.read_csv('Heart.csv')

In [20]:
df=df.drop('Unnamed: 0', axis=1)

In [21]:
df.isnull().sum()

Age          0
Sex          0
ChestPain    0
RestBP       0
Chol         0
Fbs          0
RestECG      0
MaxHR        0
ExAng        0
Oldpeak      0
Slope        0
Ca           4
Thal         2
AHD          0
dtype: int64

In [22]:
# train= df[:200,:13]
# test = df[200:,:13]
from sklearn.model_selection import train_test_split
train , test = train_test_split(df, test_size=0.2)

In [23]:
test.shape

(61, 14)

In [24]:
X=train.drop("AHD",1)
X.head()

Unnamed: 0,Age,Sex,ChestPain,RestBP,Chol,Fbs,RestECG,MaxHR,ExAng,Oldpeak,Slope,Ca,Thal
280,57,1,asymptomatic,110,335,0,0,143,1,3.0,2,1.0,reversable
138,35,1,asymptomatic,120,198,0,0,130,1,1.6,2,0.0,reversable
22,58,1,nontypical,120,284,0,2,160,0,1.8,2,0.0,normal
50,41,0,nontypical,105,198,0,0,168,0,0.0,1,1.0,normal
68,59,1,asymptomatic,170,326,0,2,140,1,3.4,3,0.0,reversable


In [25]:
y=train["AHD"]

In [26]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [27]:
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler, OneHotEncoder

numeric_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='median')),
    ('scaler', StandardScaler())])

categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='constant', fill_value='missing')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))])

In [28]:
numeric_features = train.select_dtypes(include=['int64', 'float64']).columns
categorical_features = train.select_dtypes(include=['object']).drop(['AHD'], axis=1).columns

from sklearn.compose import ColumnTransformer

preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_features),
        ('cat', categorical_transformer, categorical_features)])

In [29]:
preprocessor

ColumnTransformer(n_jobs=None, remainder='drop', sparse_threshold=0.3,
         transformer_weights=None,
         transformers=[('num', Pipeline(memory=None,
     steps=[('imputer', SimpleImputer(copy=True, fill_value=None, missing_values=nan,
       strategy='median', verbose=0)), ('scaler', StandardScaler(copy=True, with_mean=True, with_std=True))]), Index(['Age', 'Sex', 'RestBP', 'Chol', 'Fbs', 'RestECG', 'M...nown='ignore',
       n_values=None, sparse=True))]), Index(['ChestPain', 'Thal'], dtype='object'))])

In [30]:
from sklearn.linear_model import LogisticRegression
lr = Pipeline(steps=[('preprocessor', preprocessor),
                      ('classifier', LogisticRegression())])
lr.fit(X_train, y_train)
y_pred = lr.predict(X_test)
accuracy_score(y_test, y_pred)



0.8163265306122449

In [31]:
from sklearn.ensemble import RandomForestClassifier
rf = Pipeline(steps=[('preprocessor', preprocessor),
                      ('classifier', RandomForestClassifier())])
rf.fit(X_train, y_train)
y_pred = rf.predict(X_test)
accuracy_score(y_test, y_pred)
# rf.score(X_test, y_test)



0.8571428571428571

In [32]:
from sklearn.tree import DecisionTreeClassifier
dt = Pipeline(steps=[('preprocessor', preprocessor),
                      ('classifier', DecisionTreeClassifier())])
dt.fit(X_train, y_train)
y_pred = dt.predict(X_test)
accuracy_score(y_test, y_pred)

0.7959183673469388

In [33]:
from sklearn.neighbors import KNeighborsClassifier
knn = Pipeline(steps=[('preprocessor', preprocessor),
                      ('classifier', KNeighborsClassifier(n_neighbors = 5, metric = 'minkowski', p = 2))])
knn.fit(X_train, y_train)
y_pred = knn.predict(X_test)
accuracy_score(y_test, y_pred)

0.8775510204081632

In [34]:
from sklearn.svm import SVC
svc = Pipeline(steps=[('preprocessor', preprocessor),
                      ('classifier', SVC(kernel = 'linear', random_state = 0))])
svc.fit(X_train, y_train)
y_pred = svc.predict(X_test)
accuracy_score(y_test, y_pred)

0.7959183673469388