In [17]:
# Side-by-side barplot of age distribution
# interactiveness with average duration of stay between two genders, duration of stay, age, test result numbers (inconclusive, normal, abnormal)
import pandas as pd
import altair as alt

df = pd.read_csv("healthcare_dataset_cleaned.csv")

bins = [20, 30, 40, 50, 60, 70, 80, 90]
labels = [f"{bins[i]}–{bins[i+1]}" for i in range(len(bins)-1)]

df['age_group'] = pd.cut(df['Age'], bins=bins, labels=labels, right=False)
df['result_type'] = df['Test Results']

agg_df = df.groupby(['age_group', 'Gender']).agg(
    count=('Name', 'count'),
    avg_duration=('Duration of Stay', lambda x: round(x.mean())),
).reset_index()

color_map = {'Female': '#bebada',
             'Male': '#fdb462'}

chart = alt.Chart(agg_df).mark_bar().encode(
    x=alt.X('age_group:N', title='Age Group', axis=alt.Axis(labelAngle=0)),
    xOffset='Gender:N',  # makes the bars side-by-side
    y=alt.Y('count:Q', title='Number of Patients', scale=alt.Scale(zero=True)),
    color=alt.Color('Gender:N', scale=alt.Scale(domain=list(color_map.keys()), range=list(color_map.values()))),
    tooltip=[
        alt.Tooltip('age_group:N', title='Age Group'),
        alt.Tooltip('Gender:N'),
        alt.Tooltip('count:Q', title='Number of Patients'),
        alt.Tooltip('avg_duration:Q', title='Avg. Duration of Stay')
    ]
).properties(
    title='Age Distribution by Gender With Average Duration Stay Info',
    width=700,
    height=400
)

chart.show()
chart.save('Gender_Average_Duration.html')


  agg_df = df.groupby(['age_group', 'Gender']).agg(


In [None]:
grouped = df.groupby(['Gender', 'result_type']).size().reset_index(name='Count')

color_map = {'Female': '#8dd3c7',
             'Male': '#fb8072'}

chart = alt.Chart(grouped).mark_bar().encode(
    x=alt.X('result_type:N', title='Test Result Type', axis = alt.Axis(labelAngle = 0)),
    xOffset='Gender:N', 
    y=alt.Y('Count:Q', title='Number of Patients'),
    color=alt.Color('Gender:N', title='Gender', scale=alt.Scale(domain=list(color_map.keys()), range=list(color_map.values()))),
).properties(
    title='Distribution of Test Result by Gender',
    width=500,
    height=400
)

chart.save("Gender_Test_Results.html")
chart.show()