In [42]:
import pandas as pd
import numpy as np

from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier

from sklearn.feature_selection import SelectKBest , chi2 , RFE
from sklearn.metrics import accuracy_score , classification_report , confusion_matrix
from sklearn.model_selection import train_test_split

In [43]:
df = pd.read_csv('heart.csv')

In [44]:
df.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,63,1,3,145,233,1,0,150,0,2.3,0,0,1,1
1,37,1,2,130,250,0,1,187,0,3.5,0,0,2,1
2,41,0,1,130,204,0,0,172,0,1.4,2,0,2,1
3,56,1,1,120,236,0,1,178,0,0.8,2,0,2,1
4,57,0,0,120,354,0,1,163,1,0.6,2,0,2,1


# Top featurs

In [45]:
df.columns

Index(['age', 'sex', 'cp', 'trestbps', 'chol', 'fbs', 'restecg', 'thalach',
       'exang', 'oldpeak', 'slope', 'ca', 'thal', 'target'],
      dtype='object')

In [46]:
x = df[['age', 'sex', 'cp', 'trestbps', 'chol', 'fbs', 'restecg', 'thalach',
       'exang', 'oldpeak', 'slope', 'ca']]
y = df['target']

In [47]:
skb = SelectKBest(score_func = chi2 , k= 10)
bf = skb.fit(x,y)

In [48]:
feature_scores = pd.DataFrame(bf.scores_,columns=["Feature Scores"])

In [49]:
feature_cn = pd.DataFrame(x.columns, columns = ["Feature Names"])

In [50]:
best_features = pd.concat([feature_cn, feature_scores], axis = 1)
best_features = best_features.sort_values(by = "Feature Scores", ascending= False)

In [66]:
best_features

Unnamed: 0,Feature Names,Feature Scores
7,thalach,188.320472
9,oldpeak,72.644253
11,ca,66.440765
2,cp,62.598098
8,exang,38.914377
4,chol,23.936394
0,age,23.286624
3,trestbps,14.823925
10,slope,9.804095
1,sex,7.576835


In [52]:
x_train, x_test, y_train, y_test = train_test_split(x,y,test_size = 0.33,random_state = 101)

# Regression

In [53]:
lr = LogisticRegression(max_iter = 1000)
lr.fit(x_train , y_train)

lr.score(x_test,y_test)

0.86

In [58]:
y_pred = lr.predict(x_test)
#print(y_pred)
print(classification_report(y_test,y_pred))

              precision    recall  f1-score   support

           0       0.89      0.81      0.85        48
           1       0.84      0.90      0.87        52

    accuracy                           0.86       100
   macro avg       0.86      0.86      0.86       100
weighted avg       0.86      0.86      0.86       100



# Random forest

In [60]:
rt = RandomForestClassifier(n_estimators = 200)
rt.fit(x_train , y_train)

RandomForestClassifier(n_estimators=200)

In [61]:
rt.score(x_test,y_test)

0.84

In [62]:
y_pre = rt.predict(x_test)
print(classification_report(y_test,y_pre))

              precision    recall  f1-score   support

           0       0.88      0.77      0.82        48
           1       0.81      0.90      0.85        52

    accuracy                           0.84       100
   macro avg       0.85      0.84      0.84       100
weighted avg       0.84      0.84      0.84       100



# Importing Model

In [None]:
import joblib

In [None]:
file_lr = open("lr_model","wb")
joblib.dump(lr,file_lr)
file_lr.close()


In [None]:
file_rt= open("rf_model","wb")
joblib.dump(rt,file_rt)
file_rt.close()