# Imports

In [75]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
import math

# Creating DataFrame

In [76]:
heart_data= {
    "Age": [29, 54, 37, 45, 63, 50, 48, 40, 58, 42, 55, 47, 35, 62, 53],
    "Cholesterol": [200, 230, 180, 195, 240, 210, 190, 220, 210, 205, 250, 215, 185, 245, 225],
    "Max_Heart_Rate": [150, 120, 165, 140, 110, 155, 145, 135, 125, 145, 115, 130, 160, 118, 135],
    "Exercise_Induced_Angina": [1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0],
    "Heart_Disease": [1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1],  # Balanced target variable
}

In [77]:
df=pd.DataFrame(heart_data)
df

Unnamed: 0,Age,Cholesterol,Max_Heart_Rate,Exercise_Induced_Angina,Heart_Disease
0,29,200,150,1,1
1,54,230,120,0,1
2,37,180,165,0,0
3,45,195,140,1,0
4,63,240,110,1,1
5,50,210,155,0,0
6,48,190,145,1,1
7,40,220,135,0,0
8,58,210,125,1,1
9,42,205,145,0,0


# DF INFO

In [78]:
df.shape

(15, 5)

In [79]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 15 entries, 0 to 14
Data columns (total 5 columns):
 #   Column                   Non-Null Count  Dtype
---  ------                   --------------  -----
 0   Age                      15 non-null     int64
 1   Cholesterol              15 non-null     int64
 2   Max_Heart_Rate           15 non-null     int64
 3   Exercise_Induced_Angina  15 non-null     int64
 4   Heart_Disease            15 non-null     int64
dtypes: int64(5)
memory usage: 732.0 bytes


In [80]:
df.describe()

Unnamed: 0,Age,Cholesterol,Max_Heart_Rate,Exercise_Induced_Angina,Heart_Disease
count,15.0,15.0,15.0,15.0,15.0
mean,47.866667,213.333333,136.533333,0.466667,0.533333
std,9.98475,21.684974,16.910971,0.516398,0.516398
min,29.0,180.0,110.0,0.0,0.0
25%,41.0,197.5,122.5,0.0,0.0
50%,48.0,210.0,135.0,0.0,1.0
75%,54.5,227.5,147.5,1.0,1.0
max,63.0,250.0,165.0,1.0,1.0


# Handling Missing Values

In [81]:
df.isna().sum()

Age                        0
Cholesterol                0
Max_Heart_Rate             0
Exercise_Induced_Angina    0
Heart_Disease              0
dtype: int64

In [None]:
df['Age']=df['Age'].fillna(df['Age'].mode()[0])

In [83]:
df['Cholesterol']=df['Cholesterol'].fillna(math.floor(df['Cholesterol'].mean()))

In [84]:
df['Max_Heart_Rate']=df['Max_Heart_Rate'].fillna(df['Max_Heart_Rate'].median())

In [85]:
df['Exercise_Induced_Angina']=df['Exercise_Induced_Angina'].ffill()

In [86]:
df.isna().sum()

Age                        0
Cholesterol                0
Max_Heart_Rate             0
Exercise_Induced_Angina    0
Heart_Disease              0
dtype: int64

# Encoding

In [87]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 15 entries, 0 to 14
Data columns (total 5 columns):
 #   Column                   Non-Null Count  Dtype
---  ------                   --------------  -----
 0   Age                      15 non-null     int64
 1   Cholesterol              15 non-null     int64
 2   Max_Heart_Rate           15 non-null     int64
 3   Exercise_Induced_Angina  15 non-null     int64
 4   Heart_Disease            15 non-null     int64
dtypes: int64(5)
memory usage: 732.0 bytes


### No object cols to encode only we have to convert bool col to int

In [88]:
bool_col=df.select_dtypes(include=bool).columns
df[bool_col]=df[bool_col].astype(int)

In [89]:
df

Unnamed: 0,Age,Cholesterol,Max_Heart_Rate,Exercise_Induced_Angina,Heart_Disease
0,29,200,150,1,1
1,54,230,120,0,1
2,37,180,165,0,0
3,45,195,140,1,0
4,63,240,110,1,1
5,50,210,155,0,0
6,48,190,145,1,1
7,40,220,135,0,0
8,58,210,125,1,1
9,42,205,145,0,0


# Splitting

In [90]:
y=df['Heart_Disease']
x=df.drop(columns=['Heart_Disease'])

In [91]:
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=42)

# Normalisation

In [92]:
normalizer=MinMaxScaler()

In [93]:
x_train=pd.DataFrame(normalizer.fit_transform(x_train),columns=x_train.columns)
x_test=pd.DataFrame(normalizer.transform(x_test),columns=x_test.columns)

In [94]:
x_test

Unnamed: 0,Age,Cholesterol,Max_Heart_Rate,Exercise_Induced_Angina
0,0.25,0.357143,0.636364,0.0
1,0.428571,0.5,0.363636,0.0
2,-0.214286,0.285714,0.727273,1.0


# Training

In [95]:
model=LogisticRegression()

In [96]:
model.fit(x_train,y_train)

# Evaluation

In [97]:
y_pred=model.predict(x_test)

In [98]:
y_pred

array([0, 1, 0])

In [99]:
y_test

9     0
11    0
0     1
Name: Heart_Disease, dtype: int64

In [100]:
accuracy=accuracy_score(y_test,y_pred)

In [101]:
accuracy

0.3333333333333333