<a href="https://colab.research.google.com/github/amirmsk/data.analysis.python.course/blob/main/medical_data_visualization.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd

# Load the medical examination dataset
df = pd.read_csv('medical_examination.csv')


In [None]:
# Calculate BMI
df['bmi'] = df['weight'] / df['height'] ** 2

# Determine if a person is overweight
df['overweight'] = df['bmi'] > 25

# Convert overweight to a binary variable
df['overweight'] = df['overweight'].astype('int')


In [None]:
# Normalize cholesterol
df['cholesterol'] = df['cholesterol'].apply(lambda x: 0 if x == 1 else 1)

# Normalize glucose
df['glucose'] = df['glucose'].apply(lambda x: 0 if x == 1 else 1)


In [None]:
# Create a list of categorical features
categorical_features = ['cholesterol', 'glucose', 'alcohol', 'active', 'smoke']

# Convert the data into long format
long_df = df.melt(id_vars='Cardio', value_vars=categorical_features)

# Create a chart using seaborn's catplot()
import seaborn as sns

sns.catplot(x='variable', y='value', hue='Cardio', data=long_df, kind='count')


In [None]:
# Filter out patients with diastolic pressure higher than systolic pressure
df = df[(df['ap_lo'] <= df['ap_hi'])]

# Filter out patients with height less than the 2.5th percentile
df = df[(df['height'] >= df['height'].quantile(0.025))]

# Filter out patients with height more than the 97.5th percentile
df = df[(df['height'] <= df['height'].quantile(0.975))]

# Filter out patients with weight less than the 2.5th percentile
df = df[(df['weight'] >= df['weight'].quantile(0.025))]

# Filter out patients with weight more than the 97.5th percentile
df = df[(df['weight'] <= df['weight'].quantile(0.975))]


In [None]:
import numpy as np

# Calculate the correlation matrix
corr_matrix = np.corrcoef(df.drop(['Cardio'], axis=1))

# Plot the correlation matrix using seaborn's heatmap()
sns.heatmap(corr_matrix, mask=np.triu(np.ones_like(corr_matrix)))


In [None]:
import numpy as np

# Get a list of all variables in the DataFrame
variables = df.columns.tolist()

# Check if any variables are set to None
for variable in variables:
    if np.isnan(df[variable].mean()):
        print(variable)
