In [None]:
import collections
import numpy as np
import pandas as pd
import csv
import altair as alt

In [None]:
def plotGroupDistribution(data, groups, attribute_name, custom_labels):
  data[attribute_name+'_group'] = pd.cut(data[attribute_name], bins=groups, labels=custom_labels, right=False)
  group_counts = data[attribute_name+'_group'].value_counts()
  total_records = len(data)
  group_proportions = group_counts / total_records

  chart = alt.Chart(pd.DataFrame({'Proportion': group_proportions,
                        attribute_name: group_proportions.index.astype(str)})
       ).mark_bar().encode(
      x=alt.X(attribute_name+':N', title=attribute_name, axis=alt.Axis(labelAngle=0)),
      y=alt.Y('Proportion:Q', title='Proportion')
  ).properties(
      width=500)
  return chart

## Diabetes Group Distributions

In [None]:
data_dir = '/path/to/diabetes.csv'
diabetes_data = pd.read_csv(data_dir)
diabetes_data = diabetes_data.sample(frac=1).reset_index(drop=True)

In [None]:
preg_groups = [0,1,3,6,9,18]
bmi_groups = [0, 18.5, 25, 30, 40, 100]
age_groups = [20,30,40,50,100]

In [None]:
plot_preg = plotGroupDistribution(diabetes_data, preg_groups, 'Pregnancies', ["0", "1 − 2", "3 − 5", "6 − 8", "≥ 9"])
plot_bmi = plotGroupDistribution(diabetes_data, bmi_groups, 'BMI', ["0 ≤ x ≤ 18.5", "18.5 < x ≤ 25", "25 < x ≤ 30", "30 < x ≤ 40", "x > 40"])
plot_age = plotGroupDistribution(diabetes_data, age_groups, 'Age', ["20 < x ≤ 30", "30 < x ≤ 40", "40 < x ≤ 50", "x > 50"])

plot_preg.show()
plot_bmi.show()
plot_age.show()

## Heart Groups Distribution

In [None]:
data_dir = '/path/to/heart.csv'
heart_data = pd.read_csv(data_dir)
heart_data = heart_data.sample(frac=1).reset_index(drop=True)

In [None]:
age_groups = [25,40,55,70,85]

plot_age = plotGroupDistribution(heart_data, age_groups, 'Age', ["25 < x ≤ 40", "40 < x ≤ 55", "55 < x ≤ 70", "x > 70"])

plot_age.show()

## Laplace Distribution

In [None]:
x = np.linspace(-10, 10, 1000)
scales = [0.5, 1, 2, 4]
mu = 0  # location parameter

# Create data for all scales
data = []
for scale in scales:
    # Laplacian PDF: (1/(2*b)) * exp(-|x-mu|/b)
    pdf = (1/(2*scale)) * np.exp(-np.abs(x - mu)/scale)

    # Create dataframe for current scale
    df = pd.DataFrame({
        'x': x,
        'density': pdf,
        'scale': [f'b = {scale}'] * len(x)
    })
    data.append(df)

# Combine all data
df_combined = pd.concat(data, ignore_index=True)

# Create the plot
plot = alt.Chart(df_combined).mark_line().encode(
    x=alt.X('x', title='x'),
    y=alt.Y('density', title='Probability Density'),
    color=alt.Color('scale:N', title='Scale'),
    tooltip=['x', 'density', 'scale']
).properties(
    width=600,
    height=400,
    #title='Laplacian Distribution with Different Scale Parameters'
).configure_axis(
    labelFontSize=12,
    titleFontSize=14
).configure_title(
    fontSize=16
).configure_legend(
    labelFontSize=12,
    titleFontSize=14
)

# Display the plot
plot

## Gaussian Distribution

In [None]:
x = np.linspace(-10, 10, 1000)
std_devs = [0.5, 1, 2, 4]
mu = 0  # mean parameter

# Create data for all standard deviations
data = []
for sigma in std_devs:
    # Gaussian PDF: (1/(sigma*sqrt(2*pi))) * exp(-(x-mu)^2/(2*sigma^2))
    pdf = (1/(sigma * np.sqrt(2*np.pi))) * np.exp(-(x - mu)**2 / (2*sigma**2))

    # Create dataframe for current standard deviation
    df = pd.DataFrame({
        'x': x,
        'density': pdf,
        'std_dev': [f'σ = {sigma}'] * len(x)
    })
    data.append(df)

# Combine all data
df_combined = pd.concat(data, ignore_index=True)

# Create the plot
plot = alt.Chart(df_combined).mark_line().encode(
    x=alt.X('x', title='x'),
    y=alt.Y('density', title='Probability Density'),
    color=alt.Color('std_dev:N', title='Standard Deviation'),
    tooltip=['x', 'density', 'std_dev']
).properties(
    width=600,
    height=400,
    #title='Gaussian Distribution with Different Standard Deviations'
).configure_axis(
    labelFontSize=12,
    titleFontSize=14
).configure_title(
    fontSize=16
).configure_legend(
    labelFontSize=12,
    titleFontSize=14
)

# Display the plot
plot