In [None]:
import numpy as np
from scipy import stats
from IPython.display import display, Latex

# Установка seed для воспроизводимости
np.random.seed(42)

n = 500

## 1. Критерий знаков

In [None]:
def sign_test(x, y):
    signs = np.where(x > y, "-", np.where(x < y, "+", "0"))
    minus_count = np.sum(signs == "-")
    plus_count = np.sum(signs == "+")
    t = min(minus_count, plus_count)
    return t, minus_count, plus_count


### 1.1 выборки

In [None]:
x1 = np.random.normal(2, np.sqrt(5), n)
y1 = np.random.normal(2, np.sqrt(8), n)
t1, minus1, plus1 = sign_test(x1, y1)
print("1.1:")
print(f"t = {t1}, '-' count = {minus1}, '+' count = {plus1}")

### 1.2 выборки

In [None]:
n = 500

def modified_sign_test(x, y):
    # Вычисляем разницы
    diffs = y - x
    
    # Игнорируем нулевые разницы
    mask = diffs != 0
    filtered_diffs = diffs[mask]
    abs_diffs = np.abs(filtered_diffs)
    
    # Присваиваем ранги абсолютным разницам
    ranks = stats.rankdata(abs_diffs)
    
    # Для каждой пары создаем 4 ранга:
    # 2 для положительных и 2 для отрицательных
    expanded_ranks = np.repeat(ranks, 2)
    
    # Разделяем на положительные и отрицательные
    signs = np.sign(filtered_diffs)
    pos_ranks = expanded_ranks[np.repeat(signs < 0, 2)]
    neg_ranks = expanded_ranks[np.repeat(signs > 0, 2)]
    
    # Суммируем ранги
    sum_pos = np.sum(pos_ranks) if len(pos_ranks) > 0 else 0
    sum_neg = np.sum(neg_ranks) if len(neg_ranks) > 0 else 0
    
    W = min(sum_pos, sum_neg)
    
    return W, sum_pos, sum_neg, len(filtered_diffs)

# 1.2 выборки
x2 = np.random.normal(2, np.sqrt(5), n)
y2 = np.random.normal(2.2, np.sqrt(7), n)
t, sum_pos2, sum_neg2, n_diff2 = modified_sign_test(x2, y2)

print("1.2 Модифицированный критерий знаков:")
print(f"t = {t}")
print(f"Сумма рангов '+' = {sum_pos2}")
print(f"Сумма рангов '-' = {sum_neg2}")
print(f"Количество ненулевых разниц: {n_diff2}")
print(f"Общее количество рангов: {2 * n_diff2}")  # Умножаем на 2, так как каждый знак дает 2 ранга



## 2. Ранговый коэффициент корреляции Кэндела

In [None]:
def kendall_tau(x, y):
    tau, p_value = stats.kendalltau(x, y)
    return tau, p_value

### 2.1 a) Выборку пополам

In [None]:
x3 = np.random.normal(2, np.sqrt(5), n)
half = n // 2
tau1, p1 = kendall_tau(x3[:half], x3[half:half*2])
print("\n2.1 a):")
print(f"Kendall's tau = {tau1}")


### 2.1 б) Две выборки

In [None]:
def print_if_in(val: np.float64, lbound: np.float64 = None, rbound: np.float64 = None):
    if lbound is None:
        tex = str(val) + (
            "<=" if val <= rbound else ">") + f"{rbound}"
        
        display(Latex(f"${tex}$"))
        if val <= rbound:
            return True
        else:
            return False
    tex = str(val) + (
        "\\in" if lbound <= val <= rbound else "\\notin") + f"({lbound}; {rbound})"
    
    display(Latex(f"${tex}$"))
    if lbound <= val <= rbound:
        return True
    else:
        return False

In [None]:
x4 = np.random.normal(2, np.sqrt(5), n)
y4 = 3 * x4 - 5 + np.random.normal(0, 2, n)
tau2, p2 = kendall_tau(x4, y4)
print("\n2.1 б):")
print(f"Kendall's tau = {tau2}")

alpha = 0.95
p = stats.pearsonr(x4, y4).statistic
t = (p / np.sqrt(1 - np.pow(p, 2))) * np.sqrt(n - 2)
t_true_left = stats.t.ppf(alpha/ 2, df=n - 2)
t_true_right = -t_true_left
isn_cor = print_if_in(t, rbound=t_true_right)
if isn_cor:
    print("x1 and x2 are not correlated, H0 approved")
else:
    print("x1 and x2 are correlated, H0 disapproved")
display(Latex(f"$\\hat{{p}} = {p}$"))


### 2.2 Низкий коэффициент лин. зависимости

In [None]:
x5 = np.random.normal(2, np.sqrt(5), n)
y5 = 0.05 * x5 - 5 + np.random.normal(0, 2, n)
tau3, p3 = kendall_tau(x5, y5)
print("\n2.2:")
print(f"Kendall's tau = {tau3}")

alpha = 0.95
p = stats.pearsonr(x5, y5).statistic
t = (p / np.sqrt(1 - np.pow(p, 2))) * np.sqrt(n - 2)
t_true_left = stats.t.ppf(alpha/ 2, df=n - 2)
t_true_right = -t_true_left
isn_cor = print_if_in(t, rbound=t_true_right)
if isn_cor:
    print("x1 and x2 are not correlated, H0 approved")
else:
    print("x1 and x2 are correlated, H0 disapproved")
display(Latex(f"$\\hat{{p}} = {p}$"))


## 3. Критерий отсутствия автокорреляции ближайших соседей

In [None]:
x6 = np.random.normal(3, np.sqrt(7), n)
sum_x = np.sum(x6)
sum_x_sq = np.sum(x6**2)
sum_consec = np.sum(x6[:-1] * x6[1:])

numerator = n*sum_consec - sum_x**2 + n * x6[0] * x6[-1]
denominator = n * sum_x_sq - sum_x**2
p = numerator / denominator
Mp = -1/(n-1)
Dp = n*(n-3)/(n+1)/(n-1)**2
t = (p-Mp)/np.sqrt(Dp)
print("3.1:", t)


for j in range(1, n):
    x6[j] = x6[j] - x6[j-1] + 2 * np.random.normal(0, 1)
sum_x = np.sum(x6)
sum_x_sq = np.sum(x6**2)
sum_consec = np.sum(x6[:-1] * x6[1:])

numerator = sum_consec - sum_x**2 + n * x6[0] * x6[-1]
denominator = n * sum_x_sq - sum_x**2
p = numerator / denominator
Mp = -1/(n-1)
Dp = n*(n-3)/(n+1)/(n-1)**2
t = (p-Mp)/np.sqrt(Dp)
print("3.2:", t)
