In [None]:
import numpy as np 
import pandas as pd

In [None]:
df = pd.read_csv('/content/hungarian.csv')

In [None]:
df.head(5)

In [None]:
df.columns

In [None]:
from bokeh.io import output_notebook
from bokeh.io import show
from bokeh.plotting import figure
from bokeh.transform import cumsum
from bokeh.palettes import Spectral6
from bokeh.models import ColumnDataSource
from bokeh.layouts import gridplot
from math import pi

In [None]:
output_notebook()

In [None]:
df['target'].value_counts()

In [None]:
df['target'].value_counts()[0]

In [None]:
unique = ["0", '1']
top = [df['target'].value_counts()[0], df['target'].value_counts()[1]]
source = ColumnDataSource(data = dict(Target = unique, counts = top, color = Spectral6))

In [None]:
p = figure(
    x_range = unique,
    plot_height = 500,
    plot_width = 500,
    x_axis_label = 'Target',
    y_axis_label = 'Count(Target)',
    title = 'Count of People Having Heart Disease and Not Having Heart Disease',
    tools = "hover", tooltips="@Target: @counts"
) 

p.vbar(
    x = 'Target',
    top = 'counts',
    bottom = 0,
    width = 0.9,
    source = source,
    color = 'color'
)

In [None]:
target = {
            'No Heart Disease' : df['target'].value_counts()[0], 
          'Have Heart Disease' : df['target'].value_counts()[1]
         }

data = pd.Series(target).reset_index(name = 'value').rename(columns = {'index':'target'})
data['angle'] = data['value']/data['value'].sum() * 2 * pi
data['color'] = ['skyblue', 'salmon']

In [None]:
p1 = figure(
            plot_height = 500, 
            plot_width = 500, 
            title = "Proportion of People Having Heart Disease and not Having Heart Disease", 
            toolbar_location = None, 
            tools = "hover", 
            tooltips = "@target: @value", 
            x_range = (-0.5, 1.0)
            )

p1.wedge(
        x = 0, y = 1, radius = 0.4,
        start_angle = cumsum('angle', include_zero=True), 
        end_angle = cumsum('angle'),
        line_color = "white", 
        fill_color = 'color', 
        legend_field = 'target', 
        source = data
        )

p1.legend.location = "top_right"

p1.legend.label_text_font_size = '5pt'

In [None]:
show(gridplot([[p], [p1]]))

In [None]:
print("Percentage of people having Heart Disease", round(df['target'].value_counts()[1] / (df['target'].value_counts()[0] + df['target'].value_counts()[1]), 2) * 100)
print("Percentage of people not having Heart Disease", round(df['target'].value_counts()[0] / (df['target'].value_counts()[0] + df['target'].value_counts()[1]), 2) * 100)

In [None]:
df.isnull().sum()

In [None]:
for column in df.columns:
    if len(df[column].unique()) <= 10:
        print(f"{column} : {df[column].unique()}")
        categorical_var.append(column)
        print()
    else:
      continuous_var.append(column) 
        
print("Categorical Variables are: ", categorical_var)
print("Continuous Variables are: ", continuous_var)

In [None]:
def count_of_each_category(column_name):
    """
    A function which will plot the count of each category for a particular column using bokeh.
    """
    values = {}
    for i in df[column_name].value_counts().index:
        values[i] = df[column_name].value_counts()[i]
    column = list(values.keys())
    top = list(values.values())
    source = ColumnDataSource(data = dict(Classes = column, counts = top, color = Spectral6))

    p2 = figure(
        plot_height = 400,
        plot_width = 400,
        x_axis_label = column_name, 
        y_axis_label = 'Count(Classes)',
        tools="hover", tooltips="@Classes: @counts"
    )

    p2.vbar(
        x = 'Classes',
        top = 'counts',
        bottom = 0,
        width = 0.9,
        source = source,
        color = 'color'
    )
    
    return p2
    