In [0]:
import os

if not os.path.exists("heart-attack-prediction.csv"):
    !rm -rf sample_data
    from google.colab import files
    files.upload()
    !xz -d *.xz
    print("Uploaded files:")
    !ls
else:
    print("Already have files:")
    !ls

In [0]:
# Prepare dataframe
import pandas as pd
import numpy as np

df = (pd.read_csv("heart-attack-prediction.csv", na_values="?")
#         .replace("?", np.nan) # change ? to nan
        
        .drop(["ca", "thal", "slope"], axis=1)) # drop columns w/ too many nulls

df["restecg"] = df["restecg"].astype(pd.Int64Dtype())

# Encode feature cp using one-hot encoding
df = pd.concat([df, pd.get_dummies(df["cp"], prefix="cp", drop_first=True)], axis=1) # Don't drop first if using SVM
df = pd.concat([df, pd.get_dummies(df["restecg"], prefix="restecg", drop_first=True)], axis=1)


# Reorder and drop columns
df = df[["age", "sex", "cp_2", "cp_3", "cp_4", "trestbps", "chol", "fbs", "restecg_1", "restecg_2", "thalach", "exang", "oldpeak", "num"]]

df.info()

In [0]:
from sklearn.impute import SimpleImputer

imputer = SimpleImputer(strategy="mean")

imputer.fit(df)

df = pd.DataFrame(imputer.transform(df), columns = df.columns)

df.head()

In [0]:
from sklearn.model_selection import train_test_split

x = df.drop("num", axis=1)
y = df["num"]


x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2)

print(len(x_train), 'train examples')
print(len(x_test), 'test examples')

In [0]:
# Classifier code here

In [0]:
from sklearn.metrics import confusion_matrix

cm = confusion_matrix(y_predict, y_test)

cm

In [0]:
acc = (cm[0][0]+cm[1][1])/(cm.sum().sum())
prec = (cm[1][1])/(cm[1][0]+cm[1][1])
rec = (cm[1][1])/(cm[0][1]+cm[1][1])
f1 = 2*prec*rec/(prec+rec)
f2 = 5*prec*rec/(4*prec+rec)

metrics = pd.DataFrame({"Metric": ["f1", "f2", "precision", "recall", "accuracy"], "Score": [f1, f2, prec, rec, acc]})
metrics