# Исследование свойст характеристики

## Подготовим вспомогательные инструменты для дальнейших исллдеований

In [17]:
import numpy as np
from sklearn.neighbors import NearestNeighbors
import networkx as nx

In [4]:
SEED = 42

Генерация случайных величин, которые имеют распределение $Exp(λ)$ и $LogNormal(0, σ)$

In [63]:
from numpy.random import default_rng

rng = default_rng(seed=SEED)

In [64]:
def genrate_exp(n, lam):
  return rng.exponential(scale=1/lam, size=n)

def generate_lognormal(n, sigma):
  return rng.lognormal(mean=0, sigma=sigma, size=n)

**Построение KNN-графа** \
Построим KNN-граф на нашей случайной выборке $\Xi$

In [39]:
def build_knn_graph(k, vertices):
    v = np.asarray(vertices, dtype=float).reshape(-1, 1)
    n = v.shape[0]

    nbrs = NearestNeighbors(n_neighbors=k+1).fit(v)
    distances, indices = nbrs.kneighbors(v)

    G = nx.Graph()
    G.add_nodes_from(range(n))

    for i in range(n):
        for j in indices[i][1:]:
            G.add_edge(i, j)

    return G

Фукнция, которая вычисялет минимальную степень вершины в графе

In [19]:
def min_degree(G: nx.Graph) -> int:
    return min(deg for _, deg in G.degree())

**Построение дистанционного графа** \
Построим дистанционный граф на нашей случайной выборке $\Xi$

In [37]:

def build_distance_graph(d, vertices):
    v = np.asarray(vertices)
    n = v.size

    G = nx.Graph()
    G.add_nodes_from(range(n))

    for i in range(n):
        for j in range(i+1, n):
            if abs(v[i] - v[j]) <= d:
                G.add_edge(i, j)

    return G

Фукнция, которая вычисялет кликовое числоfrom networkx.algorithms.clique import graph_clique_number в графе

In [38]:
def get_clique_number(G: nx.Graph) -> int:
    return max((len(clique) for clique in nx.find_cliques(G)), default=0)

**метод Монте-Карло**

In [65]:
def monte_carlo(M, n, param, graph_param, gen_func, graph_func, res_func):
  results = []
  for _ in range(M):
    vertices = gen_func(n, param)
    G = graph_func(graph_param, vertices)
    results.append(res_func(G))
  return results

## Исследуем, как ведет себя $Τ$ в зависимости от параметров процедуры построения графа и размера выборки

Зафиксируем параемтры распределения: \
* $Exp(λ)$, $\; λ_0 = \frac{1}{\sqrt{e^2 - e}}$
* $LogNormal(0, \sigma)$, $\; \sigma_0 = 1$

In [35]:
import math

lam0 = 1/(math.sqrt(math.e**2 - math.e))
sigma0 = 1

#### Исследуем KNN-граф

In [67]:
M = 500
n_array = [100, 500, 1000, 5000, 10000]
k_array = [2, 4, 7, 8, 9, 10, 12, 13, 16, 17, 22, 32, 64, 71, 100, 128, 224, 256, 316]

Для $Exp(\lambda_0)$

In [None]:
for n in n_array:
    for k in k_array:
        if k >= n:
            continue
        results = monte_carlo(M, n, lam0, k, genrate_exp, build_knn_graph, min_degree)
        mean_val = np.mean(results)
        var_val = np.var(results, ddof=1)
        std_val = math.sqrt(var_val)
        se = std_val / math.sqrt(M)
        ci_lower = mean_val - 1.96 * se
        ci_upper = mean_val + 1.96 * se

        print(f"n={n}, k={k} -> mean = {mean_val:.4f}, var = {var_val:.4f}, 95% CI = ({ci_lower:.4f}, {ci_upper:.4f})")

Для $LogNormal(0, \sigma_0)$

In [None]:
for n in n_array:
    for k in k_array:
        if k >= n:
          continue
        results = monte_carlo(M, n, sigma0, k, generate_lognormal, build_knn_graph, min_degree)
        mean_val = np.mean(results)
        var_val = np.var(results, ddof=1)
        std_val = math.sqrt(var_val)
        se = std_val / math.sqrt(M)
        ci_lower = mean_val - 1.96 * se
        ci_upper = mean_val + 1.96 * se

        print(f"n={n}, k={k} -> mean = {mean_val:.4f}, var = {var_val:.4f}, 95% CI = ({ci_lower:.4f}, {ci_upper:.4f})")

#### Исследуем дистанционный-граф

In [69]:
M = 500
n_array = [100, 500, 1000, 5000, 10000]
d_array = [
    [0.0605, 0.1210, 0.2420],
    [0.0315, 0.0629, 0.1258],
    [0.0235, 0.0469, 0.0938],
    [0.0116, 0.0233, 0.0466],
    [0.0086, 0.0171, 0.0342]
]

Для $Exp(\lambda_0)$

In [71]:
for i, n in enumerate(n_array):
    for d in d_array[i]:
        results = monte_carlo(M, n, lam0, d, genrate_exp, build_distance_graph, get_clique_number)
        mean_val = np.mean(results)
        var_val = np.var(results, ddof=1)
        std_val = math.sqrt(var_val)
        se = std_val / math.sqrt(M)
        ci_lower = mean_val - 1.96 * se
        ci_upper = mean_val + 1.96 * se

        print(f"n={n}, d={d} -> mean = {mean_val:.4f}, var = {var_val:.4f}, 95% CI = ({ci_lower:.4f}, {ci_upper:.4f})")

n=100, d=0.0605 -> mean = 6.5360, var = 1.2953, 95% CI = (6.4362, 6.6358)
n=100, d=0.121 -> mean = 9.3980, var = 2.2040, 95% CI = (9.2679, 9.5281)
n=100, d=0.242 -> mean = 14.6080, var = 4.7879, 95% CI = (14.4162, 14.7998)
n=500, d=0.0315 -> mean = 13.6620, var = 2.7493, 95% CI = (13.5167, 13.8073)
n=500, d=0.0629 -> mean = 21.3900, var = 5.5931, 95% CI = (21.1827, 21.5973)


KeyboardInterrupt: 

Для $LogNormal(0, \sigma_0)$

In [None]:
for i, n in enumerate(n_array):
    for d in d_array[i]:
        results = monte_carlo(M, n, sigma0, d, generate_lognormal, build_distance_graph, get_clique_number)
        mean_val = np.mean(results)
        var_val = np.var(results, ddof=1)
        std_val = math.sqrt(var_val)
        se = std_val / math.sqrt(M)
        ci_lower = mean_val - 1.96 * se
        ci_upper = mean_val + 1.96 * se

        print(f"n={n}, d={d} -> mean = {mean_val:.4f}, var = {var_val:.4f}, 95% CI = ({ci_lower:.4f}, {ci_upper:.4f})")

## Исследуем, как ведет себя $Τ$ в зависимости от параметров распределения

#### Исследуем KNN-граф

In [75]:
n = 100
k = 10
d = 0.15

Для $Exp(\lambda)$

In [74]:
lam_array = [0.1, 0.25, 0.5, 1.0, 2.0, 4.0]

In [None]:
for lam in lam_array:
    results = monte_carlo(M, n, lam, k, genrate_exp, build_knn_graph, min_degree)
    mean_val = np.mean(results)
    var_val = np.var(results, ddof=1)
    std_val = math.sqrt(var_val)
    se = std_val / math.sqrt(M)
    ci_lower = mean_val - 1.96 * se
    ci_upper = mean_val + 1.96 * se

    print(f"n={n}, k={k} -> mean = {mean_val:.4f}, var = {var_val:.4f}, 95% CI = ({ci_lower:.4f}, {ci_upper:.4f})")

Для $LogNormal(0, \sigma)$

In [None]:
sigma_array = [0.25, 0.5, 0.75, 1.0, 1.5, 2.0]

In [None]:
for sigma in sigma_array:
    results = monte_carlo(M, n, sigma, k, generate_lognormal, build_knn_graph, min_degree)
    mean_val = np.mean(results)
    var_val = np.var(results, ddof=1)
    std_val = math.sqrt(var_val)
    se = std_val / math.sqrt(M)
    ci_lower = mean_val - 1.96 * se
    ci_upper = mean_val + 1.96 * se

    print(f"n={n}, k={k} -> mean = {mean_val:.4f}, var = {var_val:.4f}, 95% CI = ({ci_lower:.4f}, {ci_upper:.4f})")

#### Исследуем дистанционный-граф

Для $Exp(\lambda)$

In [None]:
lam_array = [0.1, 0.25, 0.5, 1.0, 2.0, 4.0]

In [None]:
for lam in lam_array:
    results = monte_carlo(M, n, lam, d, genrate_exp, build_distance_graph, get_clique_number)
    mean_val = np.mean(results)
    var_val = np.var(results, ddof=1)
    std_val = math.sqrt(var_val)
    se = std_val / math.sqrt(M)
    ci_lower = mean_val - 1.96 * se
    ci_upper = mean_val + 1.96 * se

    print(f"n={n}, d={d} -> mean = {mean_val:.4f}, var = {var_val:.4f}, 95% CI = ({ci_lower:.4f}, {ci_upper:.4f})")

Для $LogNormal(0, \sigma)$

In [None]:
sigma_array = [0.25, 0.5, 0.75, 1.0, 1.5, 2.0]

In [None]:
for sigma in sigma_array:
    results = monte_carlo(M, n, sigma, d, generate_lognormal, build_distance_graph, get_clique_number)
    mean_val = np.mean(results)
    var_val = np.var(results, ddof=1)
    std_val = math.sqrt(var_val)
    se = std_val / math.sqrt(M)
    ci_lower = mean_val - 1.96 * se
    ci_upper = mean_val + 1.96 * se

    print(f"n={n}, d={d} -> mean = {mean_val:.4f}, var = {var_val:.4f}, 95% CI = ({ci_lower:.4f}, {ci_upper:.4f})")