In [None]:
# Libraries
import pandas as pd


In [32]:
dataset = pd.read_csv("./datasets/heart_data.csv")
dataset.drop(columns=['index', 'id'], axis=1, inplace=True)

# Preparing data

In [33]:
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder

y = dataset['cardio'].values
cardio = dataset.drop(['cardio'], axis=1)
cat_attribs = ['gender','cholesterol', 'gluc', 'smoke', 'alco', 'active']
cardio_num = cardio.drop(cat_attribs, axis=1)
num_attribs = list(cardio_num)

num_pipeline = Pipeline([('std_scaler', StandardScaler())])
full_pipeline = ColumnTransformer([
 ("num", num_pipeline, num_attribs), #num_pipeline
 ("cat", OneHotEncoder(), cat_attribs), #one hot encoder
 ])
cardio_prepared = full_pipeline.fit_transform(cardio)

# Split into train and test set

In [36]:
from sklearn.model_selection import train_test_split

y = dataset['cardio'].values
X = cardio_prepared.copy()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Binary Classifier

In [38]:
from sklearn.linear_model import SGDClassifier
sgd_clf = SGDClassifier(random_state=42)
sgd_clf.fit(X_train, y_train)

# Validating Training Set

In [44]:
# cross validation
from sklearn.model_selection import cross_val_score
cross_val_score(sgd_clf, X_train, y_train, cv=3, scoring="accuracy")

array([0.7166122 , 0.71548722, 0.72511518])

In [45]:
#confusing matrix
from sklearn.model_selection import cross_val_predict
y_train_pred = cross_val_predict(sgd_clf, X_train, y_train, cv=3)

from sklearn.metrics import confusion_matrix
confusion_matrix(y_train, y_train_pred)


array([[22395,  5638],
       [10094, 17873]], dtype=int64)

In [49]:
#precision
from sklearn.metrics import precision_score, recall_score, f1_score
precision = precision_score(y_train, y_train_pred)
recall = recall_score(y_train, y_train_pred)
f1 = f1_score(y_train, y_train_pred)

print("Precision: ", precision)
print("Recall: ", recall)
print("f1: ", f1)

Precision:  0.7601973544298414
Recall:  0.6390746236636036
f1:  0.6943937215898053


In [50]:
y_scores = cross_val_predict(sgd_clf, X_train, y_train, cv=3,method="decision_function")


In [51]:
y_scores

array([ 0.77830726,  0.02836163,  0.87736326, ...,  0.07242128,
       -0.27818007, -1.05666699])