# Lesson 2

## Common imports and settings

In [None]:
%matplotlib inline

import random
from itertools import islice
from typing import Generator

import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import pandas as pd

sns.set(style='whitegrid')

## Generators from previous lesson

In [None]:
def geometric_generator(p: float = 0.5) -> Generator[int, None, None]:
    while True:
        n = 0
        while random.random() > p:
            n += 1
        yield n

In [None]:
def binomial_generator(p: float = 0.5, n: int = 10) -> Generator[int, None, None]:
    while True:
        k = 0
        for i in range(n):
            if random.random() > p:
                k += 1
        yield k

## Exercise 1. Geometric distribution. Statistics.

In [None]:
n_points = [10 ** 3, 10 ** 5, 10 ** 7]

In [None]:
points = np.array([np.fromiter(geometric_generator(), int, n) for n in n_points])

In [None]:
mean = np.array([np.mean(p) for p in points])
std = np.array([np.std(p) for p in points]) ** 2
rf = np.sqrt(std) / mean

df = pd.DataFrame(dict(
    n_points=n_points,
    mean=mean,
    std=std,
    rf=rf,
))
df

## Exercise 2. Binomial distribution. Statistics.

In [None]:
n_points = [10 ** 3, 10 ** 5, 10 ** 7]

In [None]:
points = np.array([np.fromiter(binomial_generator(), int, n) for n in n_points])

In [None]:
mean = np.array([np.mean(p) for p in points])
std = np.array([np.std(p) for p in points]) ** 2
rf = np.sqrt(std) / mean

df = pd.DataFrame(dict(
    n_points=n_points,
    mean=mean,
    std=std,
    rf=rf,
))
df

## Exercise 3. Geometric distribution. Probabilities.

In [None]:
gen = geometric_generator()
for n in [10 ** 3, 10 ** 5, 10 ** 7]:
    points = list(islice(gen, n))
    uniques = list(sorted(set(points)))
    df = pd.DataFrame(dict(
        value=uniques,
        p=[points.count(u) / n for u in uniques]
    ))
    print(
f'''n_points={n}
{df.to_string(index=False)}
''')

## Exercise 4. Binomial distribution. Probabilities.

In [None]:
gen = binomial_generator()
for n in [10 ** 3, 10 ** 5, 10 ** 7]:
    points = list(islice(gen, n))
    uniques = list(sorted(set(points)))
    df = pd.DataFrame(dict(
        value=uniques,
        p=[points.count(u) / n for u in uniques]
    ))
    print(
f'''n_points={n}
{df.to_string(index=False)}
''')