# Исследование свойст характеристики

## Подготовим вспомогательные инструменты для дальнейших исллдеований

In [1]:
import numpy as np
from sklearn.neighbors import NearestNeighbors
import networkx as nx

In [2]:
SEED = 42

Генерация случайных величин, которые имеют распределение $Normal(0,σ)$ и $SkewNormal(α)$

In [28]:
from numpy.random import default_rng
from scipy.stats import skewnorm

rng = default_rng(seed=SEED)

In [29]:
def generate_normal(n, sigma):
  return rng.normal(0, sigma, n)

def generate_skewnormal(n, alpha):
  return skewnorm.rvs(alpha, size=n)

**Построение KNN-графа** \
Построим KNN-граф на нашей случайной выборке $\Xi$

In [5]:
def build_knn_graph(k, vertices):
    v = np.asarray(vertices, dtype=float).reshape(-1, 1)
    n = v.shape[0]

    nbrs = NearestNeighbors(n_neighbors=k+1).fit(v)
    distances, indices = nbrs.kneighbors(v)

    G = nx.Graph()
    G.add_nodes_from(range(n))

    for i in range(n):
        for j in indices[i][1:]:
            G.add_edge(i, j)

    return G

Функция, которая вычисляет число треугольников в графе

In [6]:
def count_triangles(G: nx.Graph) -> int:
    tr_per_node = nx.triangles(G)
    total_trs = sum(tr_per_node.values()) // 3
    return total_trs

**Построение дистанционного графа** \
Построим дистанционный граф на нашей случайной выборке $\Xi$

In [13]:
def build_distance_graph(d, vertices):
    v = np.asarray(vertices)
    n = v.size

    G = nx.Graph()
    G.add_nodes_from(range(n))

    for i in range(n):
        for j in range(i+1, n):
            if abs(v[i] - v[j]) <= d:
                G.add_edge(i, j)

    return G

Функция, которая вычисляет хроматическое число графа

In [14]:
def chromatic_number(G: nx.Graph) -> int:
    if len(G) == 0:
        return 0
    if G.number_of_edges() == 0:
        return 1
    coloring = nx.coloring.greedy_color(G, strategy="DSATUR")
    return max(coloring.values()) + 1

**метод Монте-Карло**

In [15]:
def monte_carlo(M, n, param, graph_param, gen_func, graph_func, res_func):
  results = []
  for _ in range(M):
    vertices = gen_func(n, param)
    G = graph_func(graph_param, vertices)
    results.append(res_func(G))
  return results

## Исследуем, как ведет себя $Τ$ в зависимости от параметров процедуры построения графа и размера выборки

Зафиксируем параметры распределения:
* $Normal(0,σ)$, $\; \sigma_0 = 1$
* $SkewNormal(α)$, $\; \alpha_0 = 1$

In [16]:
import math

sigma0 = 1
alpha0 = 1

#### Исследуем KNN-граф

In [23]:
M = 500
n_array = [100, 200, 300, 400, 500]
k_array = [2, 4, 7, 8, 9, 10]

Для $Normal(0,σ_0)$

In [24]:
for n in n_array:
    for k in k_array:
        if k >= n:
            continue
        results = monte_carlo(M, n, sigma0, k, generate_normal, build_knn_graph, count_triangles)
        mean_val = np.mean(results)
        var_val = np.var(results, ddof=1)
        std_val = math.sqrt(var_val)
        se = std_val / math.sqrt(M)
        ci_lower = mean_val - 1.96 * se
        ci_upper = mean_val + 1.96 * se

        print(f"n={n}, k={k} -> mean = {mean_val:.4f}, var = {var_val:.4f}, 95% CI = ({ci_lower:.4f}, {ci_upper:.4f})")

n=100, k=2 -> mean = 39.9980, var = 11.1723, 95% CI = (39.7050, 40.2910)
n=100, k=4 -> mean = 220.9980, var = 141.6493, 95% CI = (219.9548, 222.0412)
n=100, k=7 -> mean = 750.9200, var = 1177.5006, 95% CI = (747.9122, 753.9278)
n=100, k=8 -> mean = 994.8460, var = 1973.6135, 95% CI = (990.9519, 998.7401)
n=100, k=9 -> mean = 1281.9980, var = 3359.9940, 95% CI = (1276.9171, 1287.0789)
n=100, k=10 -> mean = 1603.9000, var = 4374.5711, 95% CI = (1598.1025, 1609.6975)
n=200, k=2 -> mean = 78.9740, var = 18.8711, 95% CI = (78.5932, 79.3548)
n=200, k=4 -> mean = 431.6500, var = 329.7751, 95% CI = (430.0582, 433.2418)
n=200, k=7 -> mean = 1442.7920, var = 2403.9847, 95% CI = (1438.4943, 1447.0897)
n=200, k=8 -> mean = 1904.6960, var = 3789.6950, 95% CI = (1899.3000, 1910.0920)
n=200, k=9 -> mean = 2437.3520, var = 6647.2105, 95% CI = (2430.2055, 2444.4985)
n=200, k=10 -> mean = 3037.9700, var = 10144.7065, 95% CI = (3029.1414, 3046.7986)
n=300, k=2 -> mean = 117.9520, var = 32.5588, 95% CI = 

Для $SkewNormal(α_0)$

In [30]:
for n in n_array:
    for k in k_array:
        if k >= n:
          continue
        results = monte_carlo(M, n, alpha0, k, generate_skewnormal, build_knn_graph, chromatic_number)
        mean_val = np.mean(results)
        var_val = np.var(results, ddof=1)
        std_val = math.sqrt(var_val)
        se = std_val / math.sqrt(M)
        ci_lower = mean_val - 1.96 * se
        ci_upper = mean_val + 1.96 * se

        print(f"n={n}, k={k} -> mean = {mean_val:.4f}, var = {var_val:.4f}, 95% CI = ({ci_lower:.4f}, {ci_upper:.4f})")

n=100, k=2 -> mean = 3.0000, var = 0.0000, 95% CI = (3.0000, 3.0000)
n=100, k=4 -> mean = 5.0000, var = 0.0000, 95% CI = (5.0000, 5.0000)
n=100, k=7 -> mean = 8.0000, var = 0.0000, 95% CI = (8.0000, 8.0000)
n=100, k=8 -> mean = 9.0000, var = 0.0000, 95% CI = (9.0000, 9.0000)
n=100, k=9 -> mean = 10.0000, var = 0.0000, 95% CI = (10.0000, 10.0000)
n=100, k=10 -> mean = 11.0000, var = 0.0000, 95% CI = (11.0000, 11.0000)
n=200, k=2 -> mean = 3.0000, var = 0.0000, 95% CI = (3.0000, 3.0000)


KeyboardInterrupt: 

#### Исследуем дистанционный-граф

In [31]:
d_array = [
    [0.0605, 0.1210, 0.2420],
    [0.0315, 0.0629, 0.1258],
    [0.0235, 0.0469, 0.0938],
    [0.0116, 0.0233, 0.0466],
    [0.0086, 0.0171, 0.0342]
]

Для $Normal(0,σ_0)$

In [32]:
for i, n in enumerate(n_array):
    for d in d_array[i]:
        results = monte_carlo(M, n, sigma0, k, generate_normal, build_knn_graph, count_triangles)
        mean_val = np.mean(results)
        var_val = np.var(results, ddof=1)
        std_val = math.sqrt(var_val)
        se = std_val / math.sqrt(M)
        ci_lower = mean_val - 1.96 * se
        ci_upper = mean_val + 1.96 * se

        print(f"n={n}, d={d} -> mean = {mean_val:.4f}, var = {var_val:.4f}, 95% CI = ({ci_lower:.4f}, {ci_upper:.4f})")

n=100, d=0.0605 -> mean = 220.1220, var = 156.4681, 95% CI = (219.0256, 221.2184)
n=100, d=0.121 -> mean = 221.2280, var = 148.7295, 95% CI = (220.1590, 222.2970)
n=100, d=0.242 -> mean = 221.6000, var = 166.6493, 95% CI = (220.4685, 222.7315)
n=200, d=0.0315 -> mean = 431.0660, var = 341.3924, 95% CI = (429.4464, 432.6856)
n=200, d=0.0629 -> mean = 430.2340, var = 304.9972, 95% CI = (428.7032, 431.7648)
n=200, d=0.1258 -> mean = 430.2340, var = 311.7107, 95% CI = (428.6864, 431.7816)
n=300, d=0.0235 -> mean = 640.9800, var = 458.1960, 95% CI = (639.1037, 642.8563)
n=300, d=0.0469 -> mean = 638.6720, var = 508.8501, 95% CI = (636.6947, 640.6493)
n=300, d=0.0938 -> mean = 639.1100, var = 476.3666, 95% CI = (637.1969, 641.0231)
n=400, d=0.0116 -> mean = 850.6380, var = 651.7224, 95% CI = (848.4003, 852.8757)
n=400, d=0.0233 -> mean = 849.7300, var = 567.2316, 95% CI = (847.6424, 851.8176)
n=400, d=0.0466 -> mean = 850.8940, var = 593.6340, 95% CI = (848.7583, 853.0297)
n=500, d=0.0086 ->

Для $SkewNormal(α_0)$

In [33]:
for i, n in enumerate(n_array):
    for d in d_array[i]:
        results = monte_carlo(M, n, alpha0, k, generate_skewnormal, build_knn_graph, chromatic_number)
        mean_val = np.mean(results)
        var_val = np.var(results, ddof=1)
        std_val = math.sqrt(var_val)
        se = std_val / math.sqrt(M)
        ci_lower = mean_val - 1.96 * se
        ci_upper = mean_val + 1.96 * se

        print(f"n={n}, d={d} -> mean = {mean_val:.4f}, var = {var_val:.4f}, 95% CI = ({ci_lower:.4f}, {ci_upper:.4f})")

n=100, d=0.0605 -> mean = 5.0000, var = 0.0000, 95% CI = (5.0000, 5.0000)
n=100, d=0.121 -> mean = 5.0000, var = 0.0000, 95% CI = (5.0000, 5.0000)
n=100, d=0.242 -> mean = 5.0000, var = 0.0000, 95% CI = (5.0000, 5.0000)
n=200, d=0.0315 -> mean = 5.0000, var = 0.0000, 95% CI = (5.0000, 5.0000)
n=200, d=0.0629 -> mean = 5.0000, var = 0.0000, 95% CI = (5.0000, 5.0000)
n=200, d=0.1258 -> mean = 5.0000, var = 0.0000, 95% CI = (5.0000, 5.0000)
n=300, d=0.0235 -> mean = 5.0000, var = 0.0000, 95% CI = (5.0000, 5.0000)
n=300, d=0.0469 -> mean = 5.0000, var = 0.0000, 95% CI = (5.0000, 5.0000)
n=300, d=0.0938 -> mean = 5.0000, var = 0.0000, 95% CI = (5.0000, 5.0000)


KeyboardInterrupt: 

## Исследуем, как ведет себя $Τ$ в зависимости от параметров распределения

#### Исследуем KNN-граф

In [34]:
n = 100
k = 10
d = 0.15

Для $Normal(0,σ)$

In [35]:
sigma_array = [0.1, 0.25, 0.5, 1.0, 2.0, 4.0]

In [36]:
for sigma in sigma_array:
    results = monte_carlo(M, n, sigma, k, generate_normal, build_knn_graph, count_triangles)
    mean_val = np.mean(results)
    var_val = np.var(results, ddof=1)
    std_val = math.sqrt(var_val)
    se = std_val / math.sqrt(M)
    ci_lower = mean_val - 1.96 * se
    ci_upper = mean_val + 1.96 * se

    print(f"n={n}, k={k} -> mean = {mean_val:.4f}, var = {var_val:.4f}, 95% CI = ({ci_lower:.4f}, {ci_upper:.4f})")

n=100, k=10 -> mean = 1605.1380, var = 5151.3497, 95% CI = (1598.8468, 1611.4292)
n=100, k=10 -> mean = 1605.8440, var = 5260.6850, 95% CI = (1599.4864, 1612.2016)
n=100, k=10 -> mean = 1601.0240, var = 4859.4022, 95% CI = (1594.9137, 1607.1343)
n=100, k=10 -> mean = 1605.4740, var = 5181.8530, 95% CI = (1599.1642, 1611.7838)
n=100, k=10 -> mean = 1592.2560, var = 4179.5937, 95% CI = (1586.5892, 1597.9228)
n=100, k=10 -> mean = 1604.3360, var = 4764.8007, 95% CI = (1598.2855, 1610.3865)


Для $SkewNormal(α)$

In [37]:
alpha_array = [0.25, 0.5, 0.75, 1.0, 1.5, 2.0]

In [38]:
for alpha in alpha_array:
    results = monte_carlo(M, n, alpha, k, generate_skewnormal, build_knn_graph, chromatic_number)
    mean_val = np.mean(results)
    var_val = np.var(results, ddof=1)
    std_val = math.sqrt(var_val)
    se = std_val / math.sqrt(M)
    ci_lower = mean_val - 1.96 * se
    ci_upper = mean_val + 1.96 * se

    print(f"n={n}, k={k} -> mean = {mean_val:.4f}, var = {var_val:.4f}, 95% CI = ({ci_lower:.4f}, {ci_upper:.4f})")

n=100, k=10 -> mean = 11.0000, var = 0.0000, 95% CI = (11.0000, 11.0000)
n=100, k=10 -> mean = 11.0000, var = 0.0000, 95% CI = (11.0000, 11.0000)
n=100, k=10 -> mean = 11.0000, var = 0.0000, 95% CI = (11.0000, 11.0000)
n=100, k=10 -> mean = 11.0000, var = 0.0000, 95% CI = (11.0000, 11.0000)
n=100, k=10 -> mean = 11.0000, var = 0.0000, 95% CI = (11.0000, 11.0000)
n=100, k=10 -> mean = 11.0000, var = 0.0000, 95% CI = (11.0000, 11.0000)


#### Исследуем дистанционный-граф

Для $Normal(0,σ)$

In [39]:
sigma_array = [0.1, 0.25, 0.5, 1.0, 2.0, 4.0]

In [43]:
for sigma in sigma_array:
    results = monte_carlo(M, n, sigma, k, generate_normal, build_knn_graph, count_triangles)
    mean_val = np.mean(results)
    var_val = np.var(results, ddof=1)
    std_val = math.sqrt(var_val)
    se = std_val / math.sqrt(M)
    ci_lower = mean_val - 1.96 * se
    ci_upper = mean_val + 1.96 * se

    print(f"n={n}, d={d} -> mean = {mean_val:.4f}, var = {var_val:.4f}, 95% CI = ({ci_lower:.4f}, {ci_upper:.4f})")

n=100, d=0.15 -> mean = 1605.9100, var = 4794.9318, 95% CI = (1599.8404, 1611.9796)
n=100, d=0.15 -> mean = 1605.4740, var = 5089.5444, 95% CI = (1599.2207, 1611.7273)
n=100, d=0.15 -> mean = 1602.0140, var = 4456.5108, 95% CI = (1596.1625, 1607.8655)
n=100, d=0.15 -> mean = 1604.6020, var = 4254.0156, 95% CI = (1598.8850, 1610.3190)
n=100, d=0.15 -> mean = 1603.8980, var = 4788.5447, 95% CI = (1597.8324, 1609.9636)
n=100, d=0.15 -> mean = 1601.1540, var = 4681.5975, 95% CI = (1595.1565, 1607.1515)


Для $SkewNormal(α)$

In [44]:
alpha_array = [0.25, 0.5, 0.75, 1.0, 1.5, 2.0]

In [45]:
for alpha in alpha_array:
    results = monte_carlo(M, n, alpha, k, generate_skewnormal, build_knn_graph, chromatic_number)
    mean_val = np.mean(results)
    var_val = np.var(results, ddof=1)
    std_val = math.sqrt(var_val)
    se = std_val / math.sqrt(M)
    ci_lower = mean_val - 1.96 * se
    ci_upper = mean_val + 1.96 * se

    print(f"n={n}, d={d} -> mean = {mean_val:.4f}, var = {var_val:.4f}, 95% CI = ({ci_lower:.4f}, {ci_upper:.4f})")

n=100, d=0.15 -> mean = 11.0000, var = 0.0000, 95% CI = (11.0000, 11.0000)
n=100, d=0.15 -> mean = 11.0000, var = 0.0000, 95% CI = (11.0000, 11.0000)
n=100, d=0.15 -> mean = 11.0000, var = 0.0000, 95% CI = (11.0000, 11.0000)
n=100, d=0.15 -> mean = 11.0000, var = 0.0000, 95% CI = (11.0000, 11.0000)
n=100, d=0.15 -> mean = 11.0000, var = 0.0000, 95% CI = (11.0000, 11.0000)
n=100, d=0.15 -> mean = 11.0000, var = 0.0000, 95% CI = (11.0000, 11.0000)
