In [None]:
import pandas as pd
import plotly.graph_objects as go
from pathlib import Path
import os
from plotly.subplots import make_subplots


In [None]:
file_dir = Path().resolve().parent
file_dir = os.path.join(file_dir, 'data')
file_dir = os.path.join(file_dir, 'heart_2020_cleaned.csv')
data = pd.read_csv(file_dir)

In [None]:
data.info()

In [None]:
data.info()

In [None]:
for i in data.columns:
    print(i)
    print(data[i].unique())

In [None]:
# HeartDisease
# BMI            
# Smoking        
# AlcoholDrinking
# Stroke         
# PhysicalHealth 
# MentalHealth   
# DiffWalking    
# Sex            
# AgeCategory    
# Race           
# Diabetic       
# PhysicalActivit
# GenHealth      
# SleepTime      
# Asthma         
# KidneyDisease  
# SkinCancer     

In [None]:
data2 = data.groupby(by='SleepTime')['PhysicalHealth']

In [None]:
fig_num = make_subplots(rows=4,cols=1,shared_xaxes=False,subplot_titles=['Sleep Time','Physical Health','Mental Health','BMI'])
row = 1
for i in ['SleepTime','PhysicalHealth','MentalHealth']:
    x = data.loc[data['HeartDisease']=='Yes',i]
    y = x.value_counts(normalize=True)
    fig_num.add_trace(go.Bar(x=y.keys(),y=y.values,name=(i+ ' and heart disease'),legendgroup=row),row=row,col=1)
    x = data.loc[data['HeartDisease']=='No',i]
    y = x.value_counts(normalize=True)
    fig_num.add_trace(go.Bar(x=y.keys(),y=y.values,name=(i+ ' and no heart disease'),legendgroup=row),row=row,col=1)    
    row = row+1

x = data.loc[data['HeartDisease']=='Yes','BMI']
y = x.value_counts(normalize=True)
fig_num.add_trace(go.Histogram(x=y.keys(),y=y.values,name=('BMI'+ ' and heart disease'),legendgroup=row),row=row,col=1)
x = data.loc[data['HeartDisease']=='No','BMI']
y = x.value_counts(normalize=True)
fig_num.add_trace(go.Histogram(x=y.keys(),y=y.values,name=('BMI'+ ' and no heart disease'),legendgroup=row),row=row,col=1)    
fig_num.update_layout(
    title="Numeric data normalized",
    legend=dict(
        orientation="v",
        y = 1,
        x=1,
        tracegroupgap=310,
    ),
    hovermode="x unified",
    plot_bgcolor="#EEE",
    width = 1000,
    height = 1400,
    )
fig_num.show()


In [None]:
fig_norm = make_subplots(rows=4,cols=1,shared_xaxes=False,subplot_titles=['Sleep Time','Physical Health','Mental Health','BMI'])
row = 1
for i in ['SleepTime','PhysicalHealth','MentalHealth']:
    x = data.loc[data['HeartDisease']=='Yes',i]
    y = x.value_counts()
    fig_norm.add_trace(go.Bar(x=y.keys(),y=y.values,name=(i+ ' and heart disease'),legendgroup=row),row=row,col=1)
    x = data.loc[data['HeartDisease']=='No',i]
    y = x.value_counts()
    fig_norm.add_trace(go.Bar(x=y.keys(),y=y.values,name=(i+ ' and no heart disease'),legendgroup=row),row=row,col=1)    
    row = row+1

x = data.loc[data['HeartDisease']=='Yes','BMI']
y = x.value_counts()
fig_norm.add_trace(go.Histogram(x=y.keys(),y=y.values,name=('BMI'+ ' and heart disease'),legendgroup=row),row=row,col=1)
x = data.loc[data['HeartDisease']=='No','BMI']
y = x.value_counts()
fig_norm.add_trace(go.Histogram(x=y.keys(),y=y.values,name=('BMI'+ ' and no heart disease'),legendgroup=row),row=row,col=1) 
fig_norm.update_layout(
    title="Numeric data plots",
    legend=dict(
        groupclick = 'toggleitem',
        orientation="v",
        y = 1,
        x=1,
        tracegroupgap=310,
    ),
    hovermode="x unified",
    plot_bgcolor="#EEE",
    width = 1000,
    height = 1400,
    )
fig_norm.show()   


In [None]:
lista = ['Smoking', 'AlcoholDrinking', 'Stroke', 'DiffWalking', 'Sex', 'AgeCategory',
       'Race', 'Diabetic', 'PhysicalActivity', 'GenHealth',
       'Asthma', 'KidneyDisease', 'SkinCancer']
select_bin_columns=[]
for i in lista:
    if len(data[i].unique())<=2:
        print(i)
        select_bin_columns.append(i)

In [None]:
fig_categorical = make_subplots(rows=len(lista),cols=1,shared_xaxes=False,subplot_titles=lista)
row = 1
for i in lista:
    x = data.loc[data['HeartDisease']=='Yes',i]
    y = x.value_counts()
    fig_categorical.add_trace(go.Bar(x=y.keys(),y=y.values,name=(i+ ' and heart disease'),legendgroup=row),row=row,col=1)
    x = data.loc[data['HeartDisease']=='No',i]
    y = x.value_counts()
    fig_categorical.add_trace(go.Bar(x=y.keys(),y=y.values,name=(i+ ' and no heart disease'),legendgroup=row),row=row,col=1)    
    row = row+1

fig_categorical.update_layout(
    title="Categorical data plots",
    barmode = 'stack',
    legend=dict(
        groupclick = 'toggleitem',
        orientation="v",
        y = 1,
        x=1,
        tracegroupgap=270,
    ),
    hovermode="x unified",
    plot_bgcolor="#EEE",
    width = 1000,
    height = 4200,
    )
fig_categorical.show()   

In [None]:
import wandb

In [2]:
!wandb login --relogin

[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
[34m[1mwandb[0m: Paste an API key from your profile and hit enter, or press ctrl+c to quit: 
Aborted!


In [None]:
run = wandb.init(project="project_heart",save_code=True)

In [None]:
run.log(
    {
        "categorical eda": wandb.Plotly(fig_categorical)
    }
)
run.log(
    {
        "numerical eda": wandb.Plotly(fig_num)
    }
)
run.log(
    {
        "numerical normalizado eda": wandb.Plotly(fig_norm)
    }
)