<a href="https://colab.research.google.com/github/alexcpass/python_ad/blob/main/medical_data_visualizer.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import seaborn as sns

In [None]:
df = pd.read_csv('/content/medical_examination.csv', sep=',')

In [None]:
df.head(2)

Graphic 1

In [None]:
# Filtrando os dados para pacientes com cardio=1 e cardio=0
df_cardio_1 = df[df['cardio'] == 1]
df_cardio_0 = df[df['cardio'] == 0]

# Função para contar os valores e retornar uma lista de contagens
def count_values(data, column):
    return data[column].value_counts().sort_index()

# Lista das colunas de interesse
columns = ['cholesterol', 'gluc', 'alco', 'active', 'smoke']

# Configuração do gráfico
fig, axs = plt.subplots(nrows=len(columns), ncols=2, figsize=(10, 15))

for i, col in enumerate(columns):
    counts_cardio_1 = count_values(df_cardio_1, col)
    counts_cardio_0 = count_values(df_cardio_0, col)

    axs[i, 0].bar(counts_cardio_1.index, counts_cardio_1.values)
    axs[i, 0].set_title(f'Cardio = 1 - {col}')

    axs[i, 1].bar(counts_cardio_0.index, counts_cardio_0.values)
    axs[i, 1].set_title(f'Cardio = 0 - {col}')

# Ajuste de layout dos subplots
plt.tight_layout()

# Exibição do gráfico
plt.show()


Add an overweight column to the data.

In [None]:
# Cálculo do IMC (Índice de Massa Corporal)
df['imc'] = df['weight'] / (df['height'] / 100) ** 2

def calculate_overweight(imc):
    if imc > 25:
        return 1
    else:
        return 0

# Aplicação da função para criar a coluna "overweight"
df['overweight'] = df['imc'].apply(calculate_overweight)

df.head()


Normalize

In [None]:
def normalize_value(value):
    if value == 1:
        return 0
    elif value > 1:
        return 1
    else:
        return value

# Aplicação da função de normalização nas colunas 'cholesterol' e 'gluc'
df['cholesterol'] = df['cholesterol'].apply(normalize_value)
df['gluc'] = df['gluc'].apply(normalize_value)

# Exibição dos dados atualizados
df.head()


Graphic 2

In [None]:
df_long = pd.melt(df, id_vars=['cardio'], value_vars=['cholesterol', 'gluc', 'alco', 'active', 'smoke'],
                  var_name='feature', value_name='value')

# Criar o gráfico usando catplot() do Seaborn
sns.catplot(x='value', col='cardio', hue='feature', data=df_long, kind='count')
plt.show()


Diastolic pressure is higher than systolic 

In [None]:
df_clean = df[df['ap_lo'] <= df['ap_hi']]
df_clean.head()


Height is less than the 2.5th percentile 

In [None]:
df_clean = df[df['height'] >= df['height'].quantile(0.025)]
df_clean.head()


Height is more than the 97.5th percentile

In [None]:
df_clean = df[df['height'] >= df['height'].quantile(0.975)]
df_clean.head()


Weight is less than the 2.5th percentile

In [None]:
df_clean = df[df['weight'] <= df['weight'].quantile(0.025)]
df_clean.head()


Weight is more than the 97.5th percentile

In [None]:
df_clean = df[df['weight'] > df['weight'].quantile(0.975)]
df_clean.head()


Create a correlation matrix using the dataset

In [None]:
df = df.fillna(0)  


correlation_matrix = df.corr()
mask = np.triu(np.ones_like(correlation_matrix, dtype=bool))

fig, ax = plt.subplots(figsize=(10, 8))
sns.heatmap(correlation_matrix, mask=mask, annot=True, fmt=".2f", cmap='coolwarm', cbar=True, square=True, ax=ax)
plt.tight_layout()
plt.show()
