In [1]:
import pandas as pd

# Data pre_processing

In [2]:
hf = pd.read_csv('Heart_Failure_Details.csv')
hf 

Unnamed: 0,S no.,age,anaemia,creatinine phosphokinase,diabetes,ejection fraction,high bp,platelets,serum creatinine,sex,smoking,death
0,1,75.0,0,582,0,20,1,265000.00,1.9,1,0,1
1,2,55.0,0,7861,0,38,0,263358.03,1.1,1,0,1
2,3,65.0,0,146,0,20,0,162000.00,1.3,1,1,1
3,4,50.0,1,111,0,20,0,210000.00,1.9,1,0,1
4,5,65.0,1,160,1,20,0,327000.00,2.7,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...
294,295,62.0,0,61,1,38,1,155000.00,1.1,1,1,0
295,296,55.0,0,1820,0,38,0,270000.00,1.2,0,0,0
296,297,45.0,0,2060,1,60,0,742000.00,0.8,0,0,0
297,298,45.0,0,2413,0,38,0,140000.00,1.4,1,1,0


In [3]:
categorical_cols = []
continuous_cols = []

for col in hf.columns:
    if hf[col].dtype == 'object' or hf[col].nunique() < 10:
        categorical_cols.append(col)
    else:
        continuous_cols.append(col)

print('Categorical columns:', categorical_cols)
print('Continuous columns:', continuous_cols)

Categorical columns: ['anaemia', 'diabetes', 'high bp', 'sex', 'smoking', 'death']
Continuous columns: ['S no.', 'age', 'creatinine phosphokinase', 'ejection fraction', 'platelets', 'serum creatinine']


In [4]:
hf.isnull().sum() 

S no.                       0
age                         0
anaemia                     0
creatinine phosphokinase    0
diabetes                    0
ejection fraction           0
high bp                     0
platelets                   0
serum creatinine            0
sex                         0
smoking                     0
death                       0
dtype: int64

In [5]:
hf.columns

Index(['S no.', 'age', 'anaemia', 'creatinine phosphokinase', 'diabetes',
       'ejection fraction', 'high bp', 'platelets', 'serum creatinine', 'sex',
       'smoking', 'death'],
      dtype='object')

In [6]:
hf.shape 

(299, 12)

In [7]:
hf.duplicated().sum() 

0

# Exploratory analysis

## Value counts

In [8]:
import plotly.graph_objs as go
from plotly.offline import iplot
hf['sex'].value_counts() 
trace = go.Pie(labels=hf['sex'].value_counts().index, values=hf['sex'].value_counts().values)
data = [trace]
layout = go.Layout(title='Heart Failure Dataset - Sex')
fig = go.Figure(data=data, layout=layout)
iplot(fig)

In [9]:
hf['age'].value_counts()

trace = go.Bar(x=hf['age'].value_counts().index, y=hf['age'].value_counts().values,marker=dict(color='black'))
data = [trace]
layout = go.Layout(title='Heart Failure Dataset - age')
fig = go.Figure(data=data, layout=layout)
iplot(fig)

## High bp

In [10]:
graph = go.Bar(x=hf['age'], y=hf['high bp'],marker=dict(color='black')) 
data = [graph]
layout = go.Layout(title='Age vs High Blood Pressure', xaxis=dict(title='Age'), yaxis=dict(title='High Blood Pressure'))
fig = go.Figure(data=data, layout=layout)
iplot(fig)

In [11]:
death_by_sex = hf.groupby('sex')['high bp'].sum() 
death_by_sex.index = ['1', '0']
trace1 = go.Pie(labels=death_by_sex.index, values=death_by_sex.values, hole=.3)
data = [trace1]
layout = go.Layout(title='High Blood Pressure by Sex', xaxis=dict(title='Sex'), yaxis=dict(title='High Blood Pressure'))
fig = go.Figure(data=data, layout=layout)
iplot(fig)

## Death

In [12]:
graphing = go.Bar(x=hf['age'], y=hf['death'], marker=dict(color='black'))
data = [graphing]
layout = go.Layout(title='Age vs Death', xaxis=dict(title='Age'), yaxis=dict(title='Death'))
fig = go.Figure(data=data, layout=layout)
iplot(fig)

In [13]:

death_by_sex = hf.groupby('sex')['death'].sum()
death_by_sex.index = ['1', '0']
trace1 = go.Pie(labels=death_by_sex.index, values=death_by_sex.values, hole=.3)
data = [trace1]
layout = go.Layout(title='Death by Sex', xaxis=dict(title='Sex'), yaxis=dict(title='Death'))
fig = go.Figure(data=data, layout=layout)
iplot(fig)

## Diabetes

In [14]:
diabetes_by_sex = hf.groupby('sex')['diabetes'].sum()
diabetes_by_sex.index = ['1', '0']
trace1 = go.Pie(labels=diabetes_by_sex.index, values=diabetes_by_sex.values, hole=.3)
data = [trace1]
layout = go.Layout(title='Diabetes by Sex', xaxis=dict(title='Sex'), yaxis=dict(title='Diabetes'))
fig = go.Figure(data=data, layout=layout)
iplot(fig)

In [15]:
graphing = go.Bar(x=hf['age'], y=hf['diabetes'], marker=dict(color='black'))
data = [graphing]
layout = go.Layout(title='Age vs Diabetes', xaxis=dict(title='Age'), yaxis=dict(title='Diabetes'))
fig = go.Figure(data=data, layout=layout)
iplot(fig)

## Anaemia

In [16]:
anaemia_by_sex = hf.groupby('sex')['anaemia'].sum()
anaemia_by_sex.index = ['1', '0']
trace1 = go.Pie(labels=death_by_sex.index, values=death_by_sex.values, hole=.4)
data = [trace1]
layout = go.Layout(title='anaemia by Sex', xaxis=dict(title='Sex'), yaxis=dict(title='Anaemia'))
fig = go.Figure(data=data, layout=layout)
iplot(fig)

In [17]:
graphing = go.Bar(x=hf['age'], y=hf['anaemia'], marker=dict(color='black'))
data = [graphing]
layout = go.Layout(title='Age vs Anaemia', xaxis=dict(title='Age'), yaxis=dict(title='Anaemia'))
fig = go.Figure(data=data, layout=layout)
iplot(fig)