In [None]:
# if running in Google Colab, install required packages and enable widget manager
try:
    import warnings
    import logging
    from google.colab import output

    # Install daspi and ipywidgets in Colab
    !pip install daspi ipywidgets --quiet

    # Suppress matplotlib font warnings in Colab
    warnings.filterwarnings("ignore", category=UserWarning, module="matplotlib")
    logging.getLogger('matplotlib.font_manager').setLevel(logging.ERROR)

    output.enable_custom_widget_manager()
except ModuleNotFoundError:
    pass

In [None]:
import numpy as np
import daspi as dsp
import pandas as pd
from scipy.special import comb

from ipywidgets import interact
from ipywidgets import IntSlider
from ipywidgets import Dropdown

dsp.STR.LANGUAGE = 'de'

np.random.seed(42)

def theoretical_dice_probability(n_dices: int) -> pd.DataFrame:
    """Calculate theoretical probability distribution for sum of n dice."""
    min_sum = n_dices
    max_sum = 6 * n_dices
    
    # Calculate theoretical probabilities using combinatorics
    theoretical_probs = []
    sums = list(range(min_sum, max_sum + 1))
    
    for s in sums:
        # Number of ways to get sum s with n_dices dice
        ways = 0
        for i in range(max(0, s - 6*n_dices), min(n_dices, s - n_dices) + 1):
            ways += (-1)**i * comb(n_dices, i) * comb(s - 6*i - 1, n_dices - 1)
        
        prob = ways / (6**n_dices)
        theoretical_probs.append(prob)
    
    theoretical_data = pd.DataFrame({
        'sum': sums,
        'theoretical_probability': theoretical_probs
    })
    theoretical_data['theoretical_cum_probability'] = theoretical_data['theoretical_probability'].cumsum()
    
    return theoretical_data

def plot_dice_probability_distribution(
        n_dices: int, exp_samples: int = 6, kind: str = 'Empirisch') -> None:
    """Plot empirical and theoretical probability distributions for sum of n dice."""
    hue = None
    dodge = False
    sub_title_addition = ''
    datasets = []
    n_samples = 10**exp_samples
    dice_rolls = np.random.randint(1, 7, size=(n_samples, n_dices)).sum(axis=1)
    if kind == 'Empirisch':
        sub_title_addition = ' (Empirisch)'
    elif kind == 'Theoretisch':
        sub_title_addition = ' (Theoretisch)'
    elif kind == 'Beide':
        sub_title_addition = ' (Empirisch vs. Theoretisch)'
        hue = 'Datenquelle'
        dodge = (True, False, True, False)
    
    if kind in ['Empirisch', 'Beide']:
        empirical_data = (pd
            .DataFrame(dice_rolls, columns=['sum'])
            .value_counts()
            .to_frame()
            .sort_index()
            .reset_index(drop=False))
        empirical_data['cum_count'] = empirical_data['count'].cumsum()
        empirical_data['probability'] = empirical_data['count'] / n_samples
        empirical_data['cum_probability'] = empirical_data['probability'].cumsum()
        empirical_data['Datenquelle'] = 'Empirisch'
        datasets.append(empirical_data)
    
    if kind in ['Theoretisch', 'Beide']:
        theoretical_data = theoretical_dice_probability(n_dices)
        theoretical_data['count'] = theoretical_data['theoretical_probability'] * n_samples
        theoretical_data['cum_count'] = theoretical_data['count'].cumsum()
        theoretical_data['probability'] = theoretical_data['theoretical_probability']
        theoretical_data['cum_probability'] = theoretical_data['theoretical_cum_probability']
        theoretical_data['Datenquelle'] = 'Theoretisch'
        datasets.append(theoretical_data)

    data = pd.concat(datasets, ignore_index=True)

    chart = dsp.JointChart(
            source=data,
            target=('count', 'cum_count', 'probability', 'cum_probability'),
            feature='sum',
            hue=hue,
            dodge=dodge,
            sharex=True,
            categorical_feature=True,
            nrows=2,
            ncols=2,
            stretch_figsize=1.6,
            dpi=400,
        ).plot(
            dsp.Bar,
        ).plot(
            dsp.Scatter,
        ).plot(
            dsp.Line,
            on_last_axes=True,
        ).plot(
            dsp.Bar,
        ).plot(
            dsp.Scatter,
        ).plot(
            dsp.Line,
            on_last_axes=True,
        ).stripes(
            mean=True,
        ).label(
            fig_title='Wahrscheinlichkeitsverteilung der Augensumme von Würfeln',
            sub_title=f'{n_samples:,} Würfe mit {n_dices} sechsseitigen Würfeln{sub_title_addition}'.replace(',', "'"),
            target_label=('Anzahl Beobachtungen', 'Kumulative Beobachtungen', 'Wahrscheinlichkeit', 'Kumulative Wahrscheinlichkeit'),
            feature_label='Augensumme',
            )

In [None]:
interact(
    plot_dice_probability_distribution,
    n_dices=IntSlider(
        value=1, min=1, max=10, step=1, description='N Würfel:'),
    exp_samples=IntSlider(
        value=2, min=1, max=6, step=1, description='Stichprobengröße:'),
    kind=Dropdown(
        value='Empirisch',
        options=['Empirisch', 'Theoretisch', 'Beide'],
        description='Datenquelle:',))

In [None]:
def p_coin_same_face(n_coins: int, n_flips: int) -> float:
    p_0 = 1 / 2
    return p_0 * (1 - p_0)**(n_flips-1) ** n_coins

def p_dice_same_face(n_dices: int, n_rolls: int) -> float:
    p_0 = 1 / 6
    return p_0 * (1 - p_0)**(n_rolls-1) ** n_dices

for i in range(1, 10):
    print(f'Wahrscheinlichkeit, dass bei {1} Münzen, die {i} mal geworfen werden, Zahl oben liegt: {p_coin_same_face(1, i):.10f}')