# Geometric distribution
## P = p*(1-p)^(m-1), m = 1,2,...

### Глобальные переменные

In [10]:
eps = 1e-7
m = list(range(1, 201))

### Вычисление теоретических вероятностей в m[i] 

In [11]:
def define_probabilities(m, p):
    p_list = []
    summ = 0
    for i in m:
        P = p * (1 - p)**(i-1)
        p_list.append(P)
        summ += P
        if (summ >= 1 - eps):
            break
    return dict(zip(m, p_list))

### Разбиение на интервалы (точки интервалов)

In [12]:
def define_points(p_list):
    points = [0]
    point = 0
    for i in p_list:
        point += i
        points.append(point)
    points.append(1)
    return points

### Выделение интервалов 

In [13]:
from collections import namedtuple
def define_intervals(points):
    intervals = []
    Interval = namedtuple('Interval', ['name','start', 'end', 'left_closed','right_closed'])
    for i in range(0, len(points)-2):
        intervals.append(Interval(name = i+1, start = points[i], end = points[i+1], left_closed = False, right_closed = True)) 
    intervals.append(Interval(name = len(points)-1, start = points[len(points)-2], end = points[len(points)-1], left_closed = False, right_closed = False)) 
    return intervals

### Принадлежность точки к интервалу

In [14]:
def find_in_interval(value, intervals):
    for interval in intervals:
        left_condition = value > interval.start if not interval.left_closed else value >= interval.start
        right_condition = value < interval.end if not interval.right_closed else value <= interval.end
        if left_condition and right_condition:
            return interval
    return None

### Количество вхождений случайных в каждый интервал

In [15]:
def count_occurrences(intervals, numbers):
    counts = {}
    for interval in intervals:
        counts[interval.name] = 0  
    for number in numbers:
        for interval in intervals:
            if interval.left_closed:
                if interval.right_closed:
                    if interval.start <= number <= interval.end:
                        counts[interval.name] += 1
                        break
                else:
                    if interval.start <= number < interval.end:
                        counts[interval.name] += 1
                        break
            else:
                if interval.right_closed:
                    if interval.start < number <= interval.end:
                        counts[interval.name] += 1
                        break
                else:
                    if interval.start < number < interval.end:
                        counts[interval.name] += 1
                        break
    return counts

### Удаление незначащих вероятностей (0.0)

In [16]:
def remove_insignificants(counts):
    vals = list(counts.values())
    while vals and vals[-1] == 0:
        vals.pop()
    new_counts = {}
    i = 0
    for key in counts:
        if i < len(vals):
            new_counts[key] = vals[i]
            i += 1
        else:
            break
    return new_counts

## Визуализация

In [17]:
import ipywidgets as widgets
import numpy as np
import matplotlib.pyplot as plt
from IPython.display import display
import pandas as pd

In [18]:
def geometrical_distribution(p, factor):
    r = np.random.random(factor)
    probabilities = define_probabilities(m, p)

    points = define_points(probabilities.values())
    intervals = define_intervals(points)

    counts = count_occurrences(intervals, r)
    #new_counts = remove_insignificants(counts)
    
    # frequencies = [count / factor for count in new_counts.values()]
    # practical_distribution = dict(zip(new_counts.keys(), frequencies))
    
    # theoreticals = list(probabilities.values())[:len(frequencies)]
    # theoretical_distribution = dict(zip(new_counts.keys(), theoreticals))
    
    frequencies = [count / factor for count in counts.values()]
    practical_distribution = dict(zip(counts.keys(), frequencies[:-1]))
    
    theoreticals = list(probabilities.values())[:len(frequencies)]
    theoretical_distribution = dict(zip(counts.keys(), theoreticals))
    
    fig, ax = plt.subplots(figsize=(12, 8))
    r1 = np.arange(len(practical_distribution.keys()))
    rects1 = ax.bar(r1 - 0.2, practical_distribution.values(), width=0.4, label='Практические вероятности', color='#00b6b2')
    
    r2 = np.arange(len(theoretical_distribution.keys()))
    rects2 = ax.bar(r2 + 0.2, theoretical_distribution.values(), width=0.4, label='Теоретические вероятности', color='#ff5b1b')

    def autolabel(rects, clr, offset):
        for rect in rects:
            height = rect.get_height()
            ax.annotate(f'{height:.5f}',
                        xy=(rect.get_x() + rect.get_width()/2, height),
                        xytext=(0, offset), 
                        textcoords="offset points",
                        ha='center', va='bottom', fontsize=6, color = clr)

    autolabel(rects1, '#00b6b2', 3)
    autolabel(rects2, '#ff5b1b', 9)

    ax.set_xlabel('Интервалы')
    ax.set_ylabel('Вероятность')
    ax.set_title('Относительная частота геометрического распределения')
    ax.set_xticks(r1) 
    ax.set_xticklabels(practical_distribution.keys()) 
    ax.legend()
    
    fig.tight_layout()
    
    plt.grid(True, axis='y', linestyle = ':')
    plt.grid(True, axis='x', linestyle = ':')
    plt.show()

    for i in intervals:
        if i.left_closed == False and i.right_closed == True:
            print(f"{i.name} : ({i.start};{i.end}]")
        else:
            print(f"{i.name} : ({i.start};{i.end})")

    df = pd.DataFrame.from_dict(data = counts, orient = 'index', columns = ['Кол-во вхождений'])
    display(df)
    
    pd.options.display.float_format = '{:.7f}'.format
    d = {'Теоретическая': theoretical_distribution.values(), 'Относительная': practical_distribution.values()}
    df = pd.DataFrame(data = d, index = theoretical_distribution.keys())
    display(df)

p_slider = widgets.FloatSlider(
    value=0.53,
    min=0.1,
    max=0.9,
    step=0.01,
    description='P: '
)

factor_dropdown = widgets.Dropdown(
    options=[1000, 10000, 100000, 1000000],
    value=100000,
    description='Количество попыток: '
)

interactive_plot = widgets.interactive(geometrical_distribution, p = p_slider, factor = factor_dropdown)

display(interactive_plot)

interactive(children=(FloatSlider(value=0.53, description='P: ', max=0.9, min=0.1, step=0.01), Dropdown(descri…