In [135]:
import pandas as pd

csv_path = 'Heart_Attack_Classification.csv'
df = pd.read_csv(csv_path, sep=';', quotechar='"')
print(df.head())

   Age     Sex           Chest pain  Blood pressure  Cholestoral Diabetes  \
0   59  Female         Asymptomatic             140          177       No   
1   60  Female         Asymptomatic             125          258       No   
2   64  Female         Asymptomatic             128          263       No   
3   76    Male  Non-atypical angina             140          197       No   
4   65    Male         Asymptomatic             150          225       No   

   Max heart rate Exercice induced angina  Number of vessels  \
0             162                     Yes                  1   
1             141                     Yes                  1   
2             105                     Yes                  1   
3             116                      No                  0   
4             114                      No                  3   

         Other observations Risk of heart attack  
0  Reversible vessel defect                   No  
1  Reversible vessel defect                   No  

Create a heatmap of the data to see the correlation between features

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

replace_dict = {
  'Yes': 1, 
  'No': 0, 
  'Female': 1, 
  'Male': 0,
  'Typical angina': 0,
  'Atypical angina': 0,
  'Non-atypical angina': 0, 
  'Asymptomatic': 1,
  'Blood disorder': 1,
  'Reversible vessel defect': 1,
  'Fixed vessel defect': 0,
  'Normal blood flow' : 0
}

df.replace(replace_dict, inplace=True)

df.fillna(0, inplace=True)

print(df.head())

corr_matrix = df.corr(numeric_only=False)
sns.heatmap(corr_matrix, annot=True)

sns.pairplot(df)
plt.show()

### Pre-process data

In [165]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Risk of heart attack is the target variable
X = df.drop('Risk of heart attack', axis=1) # Features
y = df['Risk of heart attack'] # Target

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.8, random_state=42)

In [173]:
# Initialize classifier
rf_clf = RandomForestClassifier()

# Fit model
rf_clf.fit(X_train, y_train)

# Predict
y_pred = rf_clf.predict(X_test)

# Evaluate model
accuracy = accuracy_score(y_test, y_pred)
print(f'Random Forest Classifier Accuracy: {accuracy}')

Random Forest Classifier Accuracy: 0.7875


In [80]:
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import LinearSVC

# Initialize classifier
svm_clf = make_pipeline(
  PolynomialFeatures(degree=2),
  StandardScaler(),
  LinearSVC(C=5, max_iter=10_000, random_state=42)
)

# Fit the model
svm_clf.fit(X_train, y_train)

# Predict
y_pred = svm_clf.predict(X_test)

# Evaluate
accuracy = accuracy_score(y_test, y_pred)
print(f"SVM Classifier Accuracy: {accuracy}")


SVM Classifier Accuracy: 0.6833333333333333


