## Importing Libraries and Visualization Design
Necessary Python libraries for data analysis and visualization are imported, and the seaborn style is configured, along with the definition of color palettes.

In [None]:
import pandas as pd
from ydata_profiling import ProfileReport
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
from pywaffle.waffle import Waffle
%matplotlib inline
sns.set_style("darkgrid")

colors6 = sns.color_palette(['#1337f5', '#E80000', '#0f1e41', '#fd523e', '#404e5c', '#c9bbaa'], 6)
colors2 = sns.color_palette(['#1337f5', '#E80000'], 2)
colors1 = sns.color_palette(['#1337f5'], 1)

## Data Reading

The code reads a CSV file into a Pandas DataFrame and displays the first few rows.

In [None]:
df = pd.read_csv("data/dataset.csv")
df.head()

In [None]:
profile = ProfileReport(df, title="Profiling Report")

In [None]:
df.info()

This section describes  statistical summary measures for the numerical columns in the Pandas DataFrame.

In [None]:
df.describe()

This section provides more information for the categorical columns in the Pandas DataFrame.

In [None]:
df.describe(include="object")

## EDA
In exploratory data analysis (EDA), data patterns and insights are uncovered through the examination of key statistical measures, visualizations, and distributions, providing a foundational understanding of the heart disease indicators dataset's structure and characteristics.

In [None]:
def show_relation(col, according_to, type_='dis'):
    
    plt.figure(figsize=(15, 7))
    
    if type_ == 'dis':
        sns.displot(data=df, x=col, hue=according_to, kind='kde', palette=colors2)
    elif type_ == 'count':
        if according_to is not None:
            perc = df.groupby(col)[according_to].value_counts(normalize=True).reset_index(name='Percentage')
            sns.barplot(data=perc, x=col, y='Percentage', hue=according_to, palette=colors6,
                        order=df[col].value_counts().index)
        else:
            sns.countplot(data=df, x=col, hue=according_to, palette=colors1, order=df[col].value_counts().index)
    
    if according_to is None:
        plt.title(f'{col}')
    else:
        plt.title(f'{col} in relation to {according_to}')

Percentage of people in the dataset with heart disease

In [None]:
plt.pie(disease_size.values(), labels=disease_size.keys(), autopct='%1.1f%%', colors=colors2)
plt.title('Heart Disease Percentage')
plt.show()

Exploring the relatinship between BMI and heart disease

In [None]:
obj_cols = df.select_dtypes(include='object').columns[1:]
num_cols = df.select_dtypes(exclude='object').columns
print(f'Object columns : {obj_cols}', end='\n\n')
print(f'Numberical columns : {num_cols}')

In [None]:
show_relation(num_cols[0], 'HeartDisease');

Exploring the relationship between BMI and the diseases/illnesses in the dataset

In [None]:
fig, ax = plt.subplots(figsize = (14,6))
sns.kdeplot(df[df["HeartDisease"]=='Yes']["BMI"], alpha=1,shade = False, color=colors6[0], label="HeartDisease", ax = ax)
sns.kdeplot(df[df["KidneyDisease"]=='Yes']["BMI"], alpha=1,shade = False, color=colors6[1], label="KidneyDisease", ax = ax)
sns.kdeplot(df[df["SkinCancer"]=='Yes']["BMI"], alpha=1,shade = False, color=colors6[2], label="SkinCancer", ax = ax)
sns.kdeplot(df[df["Asthma"]=='Yes']["BMI"], alpha=1,shade = False, color=colors6[3], label="Asthma", ax = ax)
sns.kdeplot(df[df["Stroke"]=='Yes']["BMI"], alpha=1,shade = False, color=colors6[4], label="Stroke", ax = ax)
sns.kdeplot(df[df["Diabetic"]=='Yes']["BMI"], alpha=1,shade = False, color=colors6[5], label="Diabetic", ax = ax)

ax.set_xlabel("BMI")
ax.set_ylabel("Frequency")
ax.legend(bbox_to_anchor=(1.02, 1), loc=2, borderaxespad=0.)
plt.show()

Exploring the relationship between Mental health and heart disease

In [None]:
show_relation(num_cols[2], 'HeartDisease')

Exploring the relationship between Age and heart disease

In [None]:
fig, (ax1, ax2) = plt.subplots(ncols=2, figsize=(20, 6))

sns.histplot(data=df.loc[df.HeartDisease == 'No'].sort_values("AgeCategory"), x='AgeCategory',
                  color=colors1, ax=ax1);
ax1.set_title("Age Distribution of Poeple Without Heart Disease")

sns.histplot(data=df.loc[df.HeartDisease == 'Yes'].sort_values("AgeCategory"), x='AgeCategory',
                  color=colors2[1], ax=ax2);
ax2.set_title("Age Distribution of Heart Disease Patients")


fig.tight_layout()

Exploring the distribution of heart disease between genders

In [None]:
show_relation(obj_cols[4], 'HeartDisease', type_='count')

Exploring the distribution of heart disease incidents among races

In [None]:
plt.figure(figsize=(16, 6), dpi=80)
x = df.groupby('Race').HeartDisease.value_counts(normalize=True).reset_index(name='Percentage')
x = x.loc[x.HeartDisease == 'Yes'].sort_values('Percentage', ascending=False)

sns.barplot(data=x, x='Race', y='Percentage', palette=colors6);

Exploring the relationship between individual general health perception and heart disease

In [None]:
x = df.groupby('HeartDisease').GenHealth.value_counts(normalize=True).reset_index(name='Percentage')
x = x.sort_values(by='GenHealth', key=lambda x: x.map({'Excellent': 0, 
                                                       'Very good': 1, 
                                                       'Good': 2, 
                                                       'Fair': 3, 
                                                       'Poor': 4} ))

x.Percentage = round(x.Percentage * 100, 1) 

fig = px.bar(data_frame=x, x='Percentage', y='HeartDisease', color='GenHealth',
       text=x.Percentage.map(lambda x: str(x) + '%'), 
       color_discrete_sequence=['#1337f5', '#E80000', '#0f1e41', '#fd523e', '#404e5c', '#c9bbaa'])
fig.update_layout(title="What Individuals Say Their General Health Is", title_x=0.5)

fig.show()

Exploring the relationship between Sleep hours and incidents of Heart Disease in the dataset

In [None]:
relative = df.groupby('HeartDisease').SleepTime.value_counts(normalize=True).reset_index(name='Percentage')

plt.figure(figsize=(16, 6), dpi=80)
ax = sns.barplot(data=relative, x='SleepTime', y='Percentage', hue='HeartDisease', palette=colors2);

ax.set_title("Percentage of Sleep Times by Heart Disease");

Exploring the relationship between Smoking and heart disease

In [None]:
show_relation(obj_cols[0], 'HeartDisease', type_='count')

Exploring the perception smokers have of their general health

In [None]:
show_relation('GenHealth', 'Smoking', 'count')

Exploring the relationship between Stroke incidents and Heart Disease

In [None]:
show_relation(obj_cols[2], 'HeartDisease', type_='count')

Exploring the relationship between Diabetes and Heart Disease

In [None]:
show_relation(obj_cols[7], 'HeartDisease', type_='count')

Exploring the relationship between Asthma and Heart Disease

In [None]:
show_relation(obj_cols[10], 'HeartDisease', type_='count')

Exploring the relationship between Kidney and Heart Disease

In [None]:
show_relation(obj_cols[11], 'HeartDisease', type_='count')