In [1]:
import numpy as np
import pandas as pd
from scipy.stats import spearmanr, mannwhitneyu, wilcoxon, friedmanchisquare, kruskal

设置测试数组的大小

In [2]:
arr_sizes = [256, 512, 1024, 2048, 4196, 8192]
features = [128, 256, 512, 1024, 2048, 4096, 8192]

初始化随机数组

In [3]:
# 设置随机种子
np.random.seed(42)
# 设置数组大小
arr_size = arr_sizes[2]
feature = features[3]
# 初始化一个随机数组
min_val = 0
max_val = 2**32 - 1
random_arr = np.random.randint(min_val, max_val, size=(arr_size, feature), dtype=np.uint32)
print(arr_size, feature)
print(random_arr.shape)


1024 1024
(1024, 1024)


spearmanr计算时间统计

for循环版本

In [4]:
# 初始化spearmenr结果数组
spearmanr_res = np.zeros((arr_size, arr_size))
# pandas时间戳计时
start = pd.Timestamp.now()
for i in range(arr_size):
  for j in range(arr_size):
    spearmanr_res[i, j] = spearmanr(random_arr[i], random_arr[j])[0]
end = pd.Timestamp.now()
print('Spearmanr time cost: ', end - start)

Spearmanr time cost:  0 days 00:00:35.359321


matrix版本

In [12]:
start = pd.Timestamp.now()
spearmanr_res2 = spearmanr(random_arr, axis=1)[0]
end = pd.Timestamp.now()
print(spearmanr_res2.shape)
print('Spearmanr time cost: ', end - start)

(8192, 8192)
Spearmanr time cost:  0 days 00:00:28.625532


Mann–Whitney U test计算时间统计

for循环版本

In [4]:
# 初始化mannwhitneyu结果数组
mannwhitneyu_res = np.zeros((arr_size, arr_size))
# pandas时间戳计时
start = pd.Timestamp.now()
for i in range (arr_size):
  for j in range(arr_size):
    mannwhitneyu_res[i, j] = mannwhitneyu(random_arr[i], random_arr[j])[0]
end = pd.Timestamp.now()
print('Mannwhitneyu shape: ', mannwhitneyu_res.shape)
print('Mannwhitneyu time cost: ', end - start)

Mannwhitneyu shape:  (1024, 1024)
Mannwhitneyu time cost:  0 days 00:20:28.956170


Wilcoxon signed-rank test 两变量检验版本

In [7]:
# 初始化wilcoxon结果数组
wilcoxon_res = np.zeros((arr_size, arr_size))
# pandas时间戳计时
start = pd.Timestamp.now()
for i in range (arr_size):
  for j in range(arr_size):
    if i != j:
      wilcoxon_res[i, j] = wilcoxon(random_arr[i], random_arr[j])[0]
    else:
      wilcoxon_res[i, j] = 7.5
end = pd.Timestamp.now() 
print('Wilcoxon time cost: ', end - start)

Wilcoxon time cost:  0 days 00:00:17.142066


friedman test多变量检验

In [13]:
friedman_res = 0
# pandas时间戳计时
start = pd.Timestamp.now()
friedman_res = friedmanchisquare(*random_arr)[0]
end = pd.Timestamp.now()
print('Friedman time cost: ', end - start) 

Friedman time cost:  0 days 00:00:01.521011


In [9]:
friedman_res

215.82563514560752

In [None]:
kruskal_res = np.zeros((arr_size, arr_size))

start = pd.Timestamp.now()
for i in range(arr_size):
  for j in range(arr_size):
    if i != j:
      # 将两个样本的数据合并成一个数组，并添加一个指示变量
      data = np.concatenate([random_arr[i], random_arr[j]])
      groups = np.concatenate([np.zeros(feature), np.ones(feature)])
      
      # 进行Kruskal-Wallis检验
      
      kruskal_res[i, j] = kruskal(*[data[groups == k] for k in np.unique(groups)])[0]
end = pd.Timestamp.now()
print('Kruskal time cost: ', end - start)