# Imports:

In [1]:
import numpy as np
import pandas as pd
from scipy.stats import chi2, norm, cauchy, pareto

# Check whether sigma_G^* converges:

In [2]:
np.random.seed(333)

In [3]:
def squared_sans_fourths(s):
    s2 = np.mean(s ** 2)
    s4 = np.mean(s ** 4)
    return ((s2 ** 2) - s4 / len(s)) / (s2)

In [4]:
def gen_norm_squared_sans_fourths(N, n):
    return [
        squared_sans_fourths(
            norm.rvs(size=n)
    ) for _ in range(N)]

def gen_chi_squared_sans_fourths(N, n):
    return [
        squared_sans_fourths(
            chi2.rvs(size=n, df = 1)
    ) for _ in range(N)]

def gen_cauchy_sans_fourths(N, n):
    return [
        squared_sans_fourths(
            cauchy.rvs(size=n)
    ) for _ in range(N)]


def gen_pareto_sans_fourths(N, n, b = 9):
    return [
        squared_sans_fourths(
            pareto.rvs(size=n, b = b)
    ) for _ in range(N)]

In [5]:
for i in range(1, 5):
    s = gen_norm_squared_sans_fourths(10_000, 10 ** i)
    print(f"~~~~~~~~~~~~~~~~~{i}~~~~~~~~~~~~~~~")
    print(np.mean(s))
    print(np.std(s))

~~~~~~~~~~~~~~~~~1~~~~~~~~~~~~~~~
0.7534877987636056
0.3480344115534798
~~~~~~~~~~~~~~~~~2~~~~~~~~~~~~~~~
0.9717020666857672
0.13667590856081055
~~~~~~~~~~~~~~~~~3~~~~~~~~~~~~~~~
0.9969034088911914
0.04449942812519478
~~~~~~~~~~~~~~~~~4~~~~~~~~~~~~~~~
0.999671255277501
0.014044062640958473


In [6]:
for i in range(1, 5):
    s = gen_chi_squared_sans_fourths(10_000, 10 ** i)
    print(f"~~~~~~~~~~~~~~~~~{i}~~~~~~~~~~~~~~~")
    print(np.mean(s))
    print(np.std(s))

~~~~~~~~~~~~~~~~~1~~~~~~~~~~~~~~~
1.4902268765795792
1.474954157498682
~~~~~~~~~~~~~~~~~2~~~~~~~~~~~~~~~
2.7151327900652227
0.8484966846379363
~~~~~~~~~~~~~~~~~3~~~~~~~~~~~~~~~
2.9690160535273162
0.3013465592982034
~~~~~~~~~~~~~~~~~4~~~~~~~~~~~~~~~
2.998704477005671
0.09730910429651954


In [7]:
for i in range(1, 5):
    s = gen_cauchy_sans_fourths(10_000, 10 ** i)
    print(f"~~~~~~~~~~~~~~~~~{i}~~~~~~~~~~~~~~~")
    print(np.mean(s))
    print(np.std(s))

~~~~~~~~~~~~~~~~~1~~~~~~~~~~~~~~~
21.883572607418166
151.5010195712923
~~~~~~~~~~~~~~~~~2~~~~~~~~~~~~~~~
402.4562721311495
8969.109152547231
~~~~~~~~~~~~~~~~~3~~~~~~~~~~~~~~~
4633.683463417528
180504.04304569346
~~~~~~~~~~~~~~~~~4~~~~~~~~~~~~~~~
31804.210246460992
414545.5127911238


In [8]:
for i in range(1, 5):
    s = gen_pareto_sans_fourths(10_000, 10 ** i)
    print(f"~~~~~~~~~~~~~~~~~{i}~~~~~~~~~~~~~~~")
    print(np.mean(s))
    print(np.std(s))

~~~~~~~~~~~~~~~~~1~~~~~~~~~~~~~~~
1.1480119148212793
0.0980342154074865
~~~~~~~~~~~~~~~~~2~~~~~~~~~~~~~~~
1.2718337448631698
0.03759151828671276
~~~~~~~~~~~~~~~~~3~~~~~~~~~~~~~~~
1.2843378350401586
0.012067555465863515
~~~~~~~~~~~~~~~~~4~~~~~~~~~~~~~~~
1.2855787802049417
0.0038817124467144604


In [9]:
for i in range(1, 5):
    s = gen_pareto_sans_fourths(10_000, 10 ** i, b = 5)
    print(f"~~~~~~~~~~~~~~~~~{i}~~~~~~~~~~~~~~~")
    print(np.mean(s))
    print(np.std(s))

~~~~~~~~~~~~~~~~~1~~~~~~~~~~~~~~~
1.4447489066712136
0.2581450094168604
~~~~~~~~~~~~~~~~~2~~~~~~~~~~~~~~~
1.6389508887112465
0.12225884357835434
~~~~~~~~~~~~~~~~~3~~~~~~~~~~~~~~~
1.6632231597787384
0.04393536574075072
~~~~~~~~~~~~~~~~~4~~~~~~~~~~~~~~~
1.666344222867762
0.014369169324516413


In [10]:
for i in range(1, 5):
    s = gen_pareto_sans_fourths(10_000, 10 ** i, b = 3)
    print(f"~~~~~~~~~~~~~~~~~{i}~~~~~~~~~~~~~~~")
    print(np.mean(s))
    print(np.std(s))

~~~~~~~~~~~~~~~~~1~~~~~~~~~~~~~~~
2.1239383798817135
0.7933975771043452
~~~~~~~~~~~~~~~~~2~~~~~~~~~~~~~~~
2.7241375146538367
0.5766452984599947
~~~~~~~~~~~~~~~~~3~~~~~~~~~~~~~~~
2.9171975525258773
0.3533283760409992
~~~~~~~~~~~~~~~~~4~~~~~~~~~~~~~~~
2.9742299503260368
0.20123084092287966


In [11]:
for i in range(1, 5):
    s = gen_pareto_sans_fourths(10_000, 10 ** i, b = 1)
    print(f"~~~~~~~~~~~~~~~~~{i}~~~~~~~~~~~~~~~")
    print(np.mean(s))
    print(np.std(s))

~~~~~~~~~~~~~~~~~1~~~~~~~~~~~~~~~
89.99196022665795
2144.5171238457774
~~~~~~~~~~~~~~~~~2~~~~~~~~~~~~~~~
729.0757939903634
6688.097730308052
~~~~~~~~~~~~~~~~~3~~~~~~~~~~~~~~~
7852.239195577441
150693.2485103794
~~~~~~~~~~~~~~~~~4~~~~~~~~~~~~~~~
121453.82538958898
5525949.344751528


In [12]:
for i in range(1, 5):
    s = gen_pareto_sans_fourths(10_000, 10 ** i, b = 2.1)
    print(f"~~~~~~~~~~~~~~~~~{i}~~~~~~~~~~~~~~~")
    print(np.mean(s))
    print(np.std(s))

~~~~~~~~~~~~~~~~~1~~~~~~~~~~~~~~~
3.6245546243564313
10.483824329454297
~~~~~~~~~~~~~~~~~2~~~~~~~~~~~~~~~
5.677811024347859
3.403382990967455
~~~~~~~~~~~~~~~~~3~~~~~~~~~~~~~~~
7.536218293314596
2.8507070024912133
~~~~~~~~~~~~~~~~~4~~~~~~~~~~~~~~~
9.140792869493794
2.8861869153863933


In [13]:
for i in range(1, 5):
    s = gen_pareto_sans_fourths(10_000, 10 ** i, b = 1.9)
    print(f"~~~~~~~~~~~~~~~~~{i}~~~~~~~~~~~~~~~")
    print(np.mean(s))
    print(np.std(s))

~~~~~~~~~~~~~~~~~1~~~~~~~~~~~~~~~
4.289307414949223
8.52589610677051
~~~~~~~~~~~~~~~~~2~~~~~~~~~~~~~~~
7.875097930486944
5.622884141245383
~~~~~~~~~~~~~~~~~3~~~~~~~~~~~~~~~
11.742828838089595
15.60620378120335
~~~~~~~~~~~~~~~~~4~~~~~~~~~~~~~~~
15.996641359461751
10.00319802957601


# Check whether E_{SRS}[1/J^4] converges:

In [2]:
k = cauchy.rvs(size=10)

In [3]:
np.mean(np.random.choice(k, 3, replace=False))

np.float64(0.17787420539033327)

In [4]:
def jn_given_population(s_sq, n: int, sims: int = 1_000, smaller_factor: int = 10):
    actual_sims = max(sims, len(s_sq) // smaller_factor)
    return [1 / (np.mean(np.random.choice(s_sq, n, replace=False)) ** 4) for _ in range(sims)]

In [5]:
def gen_norm_jn(N, n, smaller_factor=10):
    return jn_given_population(
            norm.rvs(size=N) ** 2, n=n, smaller_factor=smaller_factor
    ) 

def gen_chi_squared_jn(N, n, smaller_factor=10):
    return jn_given_population(
            chi2.rvs(size=N, df = 1) ** 2, n=n, smaller_factor=smaller_factor
    )

def gen_cauchy_jn(N, n, smaller_factor=10):
    return jn_given_population(
            cauchy.rvs(size=N) ** 2, n=n, smaller_factor=smaller_factor
    )


def gen_pareto_jn(N, n, b = 9, smaller_factor=10):
    return jn_given_population(
            pareto.rvs(size=N, b = b) ** 2, n=n, smaller_factor=smaller_factor
    )

### constant n, increasing N:

In [77]:
super_sims = 20

#### normal RV:

expectation for below: no, or at least this does not satisfy the assumptions for classical u-statistic literature

In [78]:
for i in range(1, 6):
    temp_samples_of_e_srs = []
    for _ in range(super_sims):
        s = gen_norm_jn(10 ** i, 10)
        temp_samples_of_e_srs.append(np.mean(s))
        
    print(f"~~~~~~~~~~~~~~~~~{i}~~~~~~~~~~~~~~~")
    print(np.mean(temp_samples_of_e_srs))
    print(np.std(temp_samples_of_e_srs))

~~~~~~~~~~~~~~~~~1~~~~~~~~~~~~~~~
9.812403724645362
31.049082607868176
~~~~~~~~~~~~~~~~~2~~~~~~~~~~~~~~~
20.958897407501144
11.43605321926172
~~~~~~~~~~~~~~~~~3~~~~~~~~~~~~~~~
27.785525227285923
27.68242434989221
~~~~~~~~~~~~~~~~~4~~~~~~~~~~~~~~~
19.69212922686581
9.162119189761043
~~~~~~~~~~~~~~~~~5~~~~~~~~~~~~~~~
17.01346048806104
5.4941131176087845


In [6]:
super_sims = 20

expectation for below: no

In [7]:
for i in range(1, 6):
    temp_samples_of_e_srs = []
    for _ in range(super_sims):
        s = gen_norm_jn(10 ** i, 1)
        temp_samples_of_e_srs.append(np.mean(s))
        
    print(f"~~~~~~~~~~~~~~~~~{i}~~~~~~~~~~~~~~~")
    print(np.mean(temp_samples_of_e_srs))
    print(np.std(temp_samples_of_e_srs))

~~~~~~~~~~~~~~~~~1~~~~~~~~~~~~~~~
5.772519422675527e+19
2.5161828604502275e+20
~~~~~~~~~~~~~~~~~2~~~~~~~~~~~~~~~
8.630563277970939e+28
3.747677662235374e+29
~~~~~~~~~~~~~~~~~3~~~~~~~~~~~~~~~
3.7990319312076484e+30
1.6559544700078887e+31
~~~~~~~~~~~~~~~~~4~~~~~~~~~~~~~~~
4.814473664476591e+39
2.0985787821508508e+40
~~~~~~~~~~~~~~~~~5~~~~~~~~~~~~~~~
6.053098214803902e+27
2.6247767497503354e+28


expectation for below: no, or at least this does not satisfy the assumptions for classical u-statistic literature

In [14]:
for i in range(2, 7):
    temp_samples_of_e_srs = []
    for _ in range(super_sims):
        s = gen_norm_jn(10 ** i, 16, smaller_factor=100)
        temp_samples_of_e_srs.append(np.mean(s))
        
    print(f"~~~~~~~~~~~~~~~~~{i}~~~~~~~~~~~~~~~")
    print(np.mean(temp_samples_of_e_srs))
    print(np.std(temp_samples_of_e_srs))

~~~~~~~~~~~~~~~~~2~~~~~~~~~~~~~~~
3.824343048685351
1.9234223453573427
~~~~~~~~~~~~~~~~~3~~~~~~~~~~~~~~~
4.473501263073026
1.0255861787129987
~~~~~~~~~~~~~~~~~4~~~~~~~~~~~~~~~
4.67824009296177
0.8149904168776031
~~~~~~~~~~~~~~~~~5~~~~~~~~~~~~~~~
4.925026594226969
1.0592846608817503
~~~~~~~~~~~~~~~~~6~~~~~~~~~~~~~~~
4.545891759912805
0.5704473953998235


expectation for below: yes

In [15]:
for i in range(2, 7):
    temp_samples_of_e_srs = []
    for _ in range(super_sims):
        s = gen_norm_jn(10 ** i, 17, smaller_factor=100)
        temp_samples_of_e_srs.append(np.mean(s))
        
    print(f"~~~~~~~~~~~~~~~~~{i}~~~~~~~~~~~~~~~")
    print(np.mean(temp_samples_of_e_srs))
    print(np.std(temp_samples_of_e_srs))

~~~~~~~~~~~~~~~~~2~~~~~~~~~~~~~~~
4.733763047857109
4.42729573807164
~~~~~~~~~~~~~~~~~3~~~~~~~~~~~~~~~
4.256666760794833
0.8991267042960781
~~~~~~~~~~~~~~~~~4~~~~~~~~~~~~~~~
4.044469269989458
0.5168708854478925
~~~~~~~~~~~~~~~~~5~~~~~~~~~~~~~~~
4.747089886091889
1.2503558053900319
~~~~~~~~~~~~~~~~~6~~~~~~~~~~~~~~~
4.119926694246409
0.473646740117171


expectation for below: unknown

In [79]:
for i in range(1, 6):
    temp_samples_of_e_srs = []
    for _ in range(super_sims):
        s = gen_chi_squared_jn(10 ** i, 10)
        temp_samples_of_e_srs.append(np.mean(s))
        
    print(f"~~~~~~~~~~~~~~~~~{i}~~~~~~~~~~~~~~~")
    print(np.mean(temp_samples_of_e_srs))
    print(np.std(temp_samples_of_e_srs))

~~~~~~~~~~~~~~~~~1~~~~~~~~~~~~~~~
21.93829672442158
84.00572674823653
~~~~~~~~~~~~~~~~~2~~~~~~~~~~~~~~~
92297.35388463804
352119.66744369594
~~~~~~~~~~~~~~~~~3~~~~~~~~~~~~~~~
2167.8506491876183
4567.011453299365
~~~~~~~~~~~~~~~~~4~~~~~~~~~~~~~~~
807.2745460082654
1185.2137072458622
~~~~~~~~~~~~~~~~~5~~~~~~~~~~~~~~~
36612.78003161496
132872.3711163346


expectation for below: no, because mean of cauchy is cauchy, and inverse of cauchy is cauchy

In [80]:
for i in range(1, 6):
    temp_samples_of_e_srs = []
    for _ in range(super_sims):
        s = gen_cauchy_jn(10 ** i, 10)
        temp_samples_of_e_srs.append(np.mean(s))
        
    print(f"~~~~~~~~~~~~~~~~~{i}~~~~~~~~~~~~~~~")
    print(np.mean(temp_samples_of_e_srs))
    print(np.std(temp_samples_of_e_srs))

~~~~~~~~~~~~~~~~~1~~~~~~~~~~~~~~~
0.02468086196272672
0.08315081415236003
~~~~~~~~~~~~~~~~~2~~~~~~~~~~~~~~~
0.6802079428184559
1.2795725564461966
~~~~~~~~~~~~~~~~~3~~~~~~~~~~~~~~~
0.7119259028626338
0.9513325024014526
~~~~~~~~~~~~~~~~~4~~~~~~~~~~~~~~~
0.7609212799109267
0.5557379510680903
~~~~~~~~~~~~~~~~~5~~~~~~~~~~~~~~~
1.0772061986093253
1.0290038024883146


expectation for below: unknown

In [81]:
for i in range(1, 6):
    temp_samples_of_e_srs = []
    for _ in range(super_sims):
        s = gen_pareto_jn(10 ** i, 10)
        temp_samples_of_e_srs.append(np.mean(s))
        
    print(f"~~~~~~~~~~~~~~~~~{i}~~~~~~~~~~~~~~~")
    print(np.mean(temp_samples_of_e_srs))
    print(np.std(temp_samples_of_e_srs))

~~~~~~~~~~~~~~~~~1~~~~~~~~~~~~~~~
0.3989689188092359
0.11641220770696734
~~~~~~~~~~~~~~~~~2~~~~~~~~~~~~~~~
0.39319487655164387
0.041389119285670815
~~~~~~~~~~~~~~~~~3~~~~~~~~~~~~~~~
0.39519264314770874
0.012603198060720015
~~~~~~~~~~~~~~~~~4~~~~~~~~~~~~~~~
0.39384869120748967
0.004914932548372808
~~~~~~~~~~~~~~~~~5~~~~~~~~~~~~~~~
0.39614746047838484
0.004035528102153502


### increasing n, increasing N:

In [84]:
super_sims = 1_000

In [85]:
for i in range(1, 5):
    temp_samples_of_e_srs = []
    for _ in range(super_sims):
        s = gen_norm_jn(10 ** i, max(10, 10 ** (i-1)), smaller_factor=100)
        temp_samples_of_e_srs.append(np.mean(s))
        
    print(f"~~~~~~~~~~~~~~~~~{i}~~~~~~~~~~~~~~~")
    # print(temp_samples_of_e_srs)
    print(np.mean(temp_samples_of_e_srs))
    print(np.std(temp_samples_of_e_srs))

~~~~~~~~~~~~~~~~~1~~~~~~~~~~~~~~~
21.661541158076805
301.27820434517764
~~~~~~~~~~~~~~~~~2~~~~~~~~~~~~~~~
20.843138912156537
31.016521333049127
~~~~~~~~~~~~~~~~~3~~~~~~~~~~~~~~~
1.23035306174074
0.22044163810598083
~~~~~~~~~~~~~~~~~4~~~~~~~~~~~~~~~
1.0196361589784628
0.0597953662181929


In [86]:
for i in range(1, 4):
    temp_samples_of_e_srs = []
    for _ in range(super_sims):
        s = gen_norm_jn(10 ** i, max(10, 10 ** (i-1)), smaller_factor=100)
        temp_samples_of_e_srs.append(np.mean(s))
        
    print(f"~~~~~~~~~~~~~~~~~{i}~~~~~~~~~~~~~~~")
    print(pd.Series(temp_samples_of_e_srs).describe())
    print(np.mean(temp_samples_of_e_srs))
    print(np.std(temp_samples_of_e_srs))

~~~~~~~~~~~~~~~~~1~~~~~~~~~~~~~~~
count    1000.000000
mean       14.443078
std       116.183879
min         0.013301
25%         0.459657
50%         1.534231
75%         5.412838
max      3262.770017
dtype: float64
14.44307779686571
116.125772158119
~~~~~~~~~~~~~~~~~2~~~~~~~~~~~~~~~
count    1000.000000
mean       22.490956
std        37.155356
min         1.093401
25%         7.541124
50%        12.810825
75%        23.100518
max       635.491729
dtype: float64
22.490955719900587
37.13677333088183
~~~~~~~~~~~~~~~~~3~~~~~~~~~~~~~~~
count    1000.000000
mean        1.235461
std         0.228543
min         0.691061
25%         1.077208
50%         1.210459
75%         1.370055
max         2.867052
dtype: float64
1.2354613951735751
0.22842821116843562


In [87]:
for i in range(1, 5):
    temp_samples_of_e_srs = []
    for _ in range(super_sims):
        s = gen_chi_squared_jn(10 ** i, max(10, 10 ** (i-1)), smaller_factor=100)
        temp_samples_of_e_srs.append(np.mean(s))
        
    print(f"~~~~~~~~~~~~~~~~~{i}~~~~~~~~~~~~~~~")
    print(np.mean(temp_samples_of_e_srs))
    print(np.std(temp_samples_of_e_srs))

~~~~~~~~~~~~~~~~~1~~~~~~~~~~~~~~~
130.3727448817593
2093.0588469799573
~~~~~~~~~~~~~~~~~2~~~~~~~~~~~~~~~
144521.29634729322
2599330.4683671854
~~~~~~~~~~~~~~~~~3~~~~~~~~~~~~~~~
0.034738860935486726
0.01382525439607548
~~~~~~~~~~~~~~~~~4~~~~~~~~~~~~~~~
0.01365480021607723
0.001742751306045806


In [88]:
for i in range(1, 5):
    temp_samples_of_e_srs = []
    for _ in range(super_sims):
        s = gen_cauchy_jn(10 ** i, max(10, 10 ** (i-1)), smaller_factor=100)
        temp_samples_of_e_srs.append(np.mean(s))
        
    print(f"~~~~~~~~~~~~~~~~~{i}~~~~~~~~~~~~~~~")
    print(np.mean(temp_samples_of_e_srs))
    print(np.std(temp_samples_of_e_srs))

~~~~~~~~~~~~~~~~~1~~~~~~~~~~~~~~~
0.7128704284423009
10.45858939607403
~~~~~~~~~~~~~~~~~2~~~~~~~~~~~~~~~
1.3600905773437928
8.687304646769729
~~~~~~~~~~~~~~~~~3~~~~~~~~~~~~~~~
7.83136729155336e-06
7.815487056346134e-06
~~~~~~~~~~~~~~~~~4~~~~~~~~~~~~~~~
6.66781440929385e-10
5.735405866814633e-10


In [89]:
for i in range(1, 5):
    temp_samples_of_e_srs = []
    for _ in range(super_sims):
        s = gen_pareto_jn(10 ** i, max(10, 10 ** (i-1)), smaller_factor=100)
        temp_samples_of_e_srs.append(np.mean(s))
        
    print(f"~~~~~~~~~~~~~~~~~{i}~~~~~~~~~~~~~~~")
    print(np.mean(temp_samples_of_e_srs))
    print(np.std(temp_samples_of_e_srs))

~~~~~~~~~~~~~~~~~1~~~~~~~~~~~~~~~
0.3962805962303229
0.1191845354699322
~~~~~~~~~~~~~~~~~2~~~~~~~~~~~~~~~
0.3949324191487045
0.03752261568412233
~~~~~~~~~~~~~~~~~3~~~~~~~~~~~~~~~
0.36818142252137653
0.013614908573804761
~~~~~~~~~~~~~~~~~4~~~~~~~~~~~~~~~
0.36618338178274323
0.0043032681767819865
