<a href="https://colab.research.google.com/github/innocentmatutu/Machine-learning/blob/main/Heart_disease_prediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [13]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.impute import SimpleImputer
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
import xgboost as xgb
from sklearn.metrics import accuracy_score,precision_score,recall_score,f1_score

df = pd.read_csv("/content/archive (9).zip")

#Encoding of categorical columns
for col in df.select_dtypes(include=['object']).columns:
  encoder = LabelEncoder()
  df[col] = encoder.fit_transform(df[col])

#Filling missing values with the mean in the numerical columns
def impute_with_extension(X_train,X_valid):
  cols_with_missing = [col for col in X_train.columns if df[col].isnull().any()]
  X_train_plus = X_train.copy()
  X_valid_plus = X_valid.copy()

  for col in cols_with_missing:
    X_train_plus[col +'_was_missing'] = X_train_plus.isnull()
    X_valid_plus[col +'_was_missing'] = X_valid_plus.isnull()

  impute = SimpleImputer(strategy='constant')
  imputed_X_train = pd.DataFrame(impute.fit_transform(X_train_plus),columns = X_train_plus.columns)
  imputed_X_valid = pd.DataFrame(impute.transform(X_valid_plus),columns = X_valid_plus.columns)

  return imputed_X_train,imputed_X_valid

#Feature and target selection
y = df['HeartDisease']
df.drop(['HeartDisease'],axis=1,inplace=True)

features = ['Age','Sex','ChestPainType','RestingBP','Cholesterol', 'FastingBS',
       'RestingECG', 'MaxHR', 'ExerciseAngina', 'Oldpeak', 'ST_Slope']
X = df[features]

#Training,testing and spliting of the rows
X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.2, random_state=1)

imputed_X_train,imputed_X_valid = impute_with_extension(X_train,X_valid)

#Model selection,training and predicting
model = xgb.XGBClassifier(n_estimators=500,learning_rate=0.05)
model.fit(imputed_X_train,y_train)
prediction = model.predict(imputed_X_valid)

#Metrics
print(f'Accuracy score: {accuracy_score(y_valid,prediction)}')
print(f'Precision score: {precision_score(y_valid,prediction)}')
print(f'Recall score: {recall_score(y_valid,prediction)}')
print(f'F1 score: {f1_score(y_valid,prediction)}')


Accuracy score: 0.8858695652173914
Precision score: 0.908256880733945
Recall score: 0.9
F1 score: 0.9041095890410958
