https://x.com/enodon/status/1931248617672847786

In [1]:
# Google Colabと自分のパソコンの両方で使えるようにするための工夫

using Pkg

"""すでにPkg.add済みのパッケージのリスト (高速化のために用意)"""
_packages_added = [sort!(readdir(Sys.STDLIB));
    [info.name for (uuid, info) in Pkg.dependencies() if info.is_direct_dep]]

"""_packages_added内にないパッケージをPkg.addする"""
add_pkg_if_not_added_yet(pkg) = if !(pkg in _packages_added)
    println(stderr, "# $(pkg).jl is not added yet, so let's add it.")
    Pkg.add(pkg)
end

"""expr::Exprからusing内の`.`を含まないモジュール名を抽出"""
function find_using_pkgs(expr::Expr)
    pkgs = String[]
    function traverse(expr::Expr)
        if expr.head == :using
            for arg in expr.args
                if arg.head == :. && length(arg.args) == 1
                    push!(pkgs, string(arg.args[1]))
                elseif arg.head == :(:) && length(arg.args[1].args) == 1
                    push!(pkgs, string(arg.args[1].args[1]))
                end
            end
        else
            for arg in expr.args arg isa Expr && traverse(arg) end
        end
    end
    traverse(expr)
    pkgs
end

"""必要そうなPkg.addを追加するマクロ"""
macro autoadd(expr)
    pkgs = find_using_pkgs(expr)
    :(add_pkg_if_not_added_yet.($(pkgs)); $expr)
end

@autoadd

In [2]:
@autoadd begin
using Distributions
using QuadGK
using Roots
using Plots
default(fmt=:png)
end

In [3]:
function pvalue_bin_score(k, n, p)
    phat = k/n
    sehat = sqrt(phat * (1 - phat) / n)
    z = (phat - p) / sehat
    2ccdf(Normal(), abs(z))
end

function expectval(f, bin::Binomial)
    sum(f(k) * pdf(bin, k) for k in support(bin))
end

function power_bin_score(n, p0, p1; alphamin=0.0, alphamax=0.05)
    expectval(k -> alphamin ≤ pvalue_bin_score(k, n, p0) < alphamax, Binomial(n, p1))
end

function find_p1(n, p0; power=0.8)
    f(p1) = power_bin_score(n, p0, p1) - power
    find_zero(f, (p0, 1.0))
end

find_p1 (generic function with 1 method)

In [4]:
@show n, p0 = 100, 0.3
@show p1 = find_p1(n, p0)
power_bin_score(n, p0, p0), power_bin_score(n, p0, p1), power_bin_score(n, p0, p1; alphamax=0.005)/power_bin_score(n, p0, p1), power_bin_score(n, p0, p1; alphamin=0.01)/power_bin_score(n, p0, p1)

(n, p0) = (100, 0.3) = (100, 0.3)
p1 = find_p1(n, p0) = 0.4368151627921558


(0.04981982912453117, 0.7999999999999999, 0.6411070821571047, 0.2593684742439168)

In [5]:
@show n, p0 = 1000, 0.3
@show p1 = find_p1(n, p0)
power_bin_score(n, p0, p0), power_bin_score(n, p0, p1), power_bin_score(n, p0, p1; alphamax=0.005)/power_bin_score(n, p0, p1), power_bin_score(n, p0, p1; alphamin=0.01)/power_bin_score(n, p0, p1)

(n, p0) = (1000, 0.3) = (1000, 0.3)
p1 = find_p1(n, p0) = 0.3421419901254067


(0.04962173978405629, 0.7999999999999982, 0.6113539910048822, 0.2567317272576154)

In [6]:
@show n, p0 = 10000, 0.3
@show p1 = find_p1(n, p0)
power_bin_score(n, p0, p0), power_bin_score(n, p0, p1), power_bin_score(n, p0, p1; alphamax=0.005)/power_bin_score(n, p0, p1), power_bin_score(n, p0, p1; alphamin=0.01)/power_bin_score(n, p0, p1)

(n, p0) = (10000, 0.3) = (10000, 0.3)
p1 = find_p1(n, p0) = 0.3129543657640997


(0.04953745059457239, 0.8000000000000755, 0.6140458253493996, 0.2684503371436248)

In [7]:
@show n, p0 = 10000, 0.03
@show p1 = find_p1(n, p0)
power_bin_score(n, p0, p0), power_bin_score(n, p0, p1), power_bin_score(n, p0, p1; alphamax=0.005)/power_bin_score(n, p0, p1), power_bin_score(n, p0, p1; alphamin=0.01)/power_bin_score(n, p0, p1)

(n, p0) = (10000, 0.03) = (10000, 0.03)
p1 = find_p1(n, p0) = 0.035103295759015204


(0.050821946946857655, 0.8000000000000089, 0.6081515398107427, 0.28383718030562205)

In [8]:
@show n, p0 = 10000, 0.003
@show p1 = find_p1(n, p0)
power_bin_score(n, p0, p0), power_bin_score(n, p0, p1), power_bin_score(n, p0, p1; alphamax=0.005)/power_bin_score(n, p0, p1), power_bin_score(n, p0, p1; alphamin=0.01)/power_bin_score(n, p0, p1)

(n, p0) = (10000, 0.003) = (10000, 0.003)
p1 = find_p1(n, p0) = 0.004838379254309776


(0.06887322917787866, 0.7999999999999938, 0.5337580144635746, 0.32331205827294096)

In [9]:
@show n, p0 = 10000, 0.0003
@show p1 = find_p1(n, p0)
power_bin_score(n, p0, p0), power_bin_score(n, p0, p1), power_bin_score(n, p0, p1; alphamax=0.005)/power_bin_score(n, p0, p1), power_bin_score(n, p0, p1; alphamin=0.01)/power_bin_score(n, p0, p1)

(n, p0) = (10000, 0.0003) = (10000, 0.0003)
p1 = find_p1(n, p0) = 0.0011376080285137112


(0.20290037857217139, 0.7999999999999976, 0.3182241268463505, 0.41803583730065064)

In [10]:
function pvalue_ztest(xbar, n)
    2ccdf(Normal(0, 1/sqrt(n)), abs(xbar))
end

function power_ztest(n, mu1; alphamin=0.0, alphamax=0.05)
    a, b, c, d = quantile.(Normal(0, 1/sqrt(n)), (alphamin/2, alphamax/2, 1-alphamax/2, 1-alphamin/2))
    normal1 = Normal(mu1, 1/sqrt(n))
    cdf(normal1, b) - cdf(normal1, a) + ccdf(normal1, c) - ccdf(normal1, d)
end

function find_mu1(n; power=0.8)
    f(mu1) = power_ztest(n, mu1) - power
    find_zero(f, (0.0, 10.0))
end

find_mu1 (generic function with 1 method)

In [11]:
@show n = 100
@show mu1 = find_mu1(n)
power_ztest(n, 0.0), power_ztest(n, mu1), power_ztest(n, mu1; alphamax=0.005)/power_ztest(n, mu1), power_ztest(n, mu1; alphamin=0.01)/power_ztest(n, mu1)

n = 100 = 100
mu1 = find_mu1(n) = 0.28015817870135823


(0.0499999999999995, 0.8, 0.6222812438850653, 0.26337116258354565)

In [12]:
@show n = 1000
@show mu1 = find_mu1(n)
power_ztest(n, 0.0), power_ztest(n, mu1), power_ztest(n, mu1; alphamax=0.005)/power_ztest(n, mu1), power_ztest(n, mu1; alphamin=0.01)/power_ztest(n, mu1)

n = 1000 = 1000
mu1 = find_mu1(n) = 0.08859379498207656


(0.0499999999999995, 0.8, 0.6222812438850652, 0.2633711625835458)

In [13]:
@show n = 10000
@show mu1 = find_mu1(n)
power_ztest(n, 0.0), power_ztest(n, mu1), power_ztest(n, mu1; alphamax=0.005)/power_ztest(n, mu1), power_ztest(n, mu1; alphamin=0.01)/power_ztest(n, mu1)

n = 10000 = 10000
mu1 = find_mu1(n) = 0.028015817870135826


(0.04999999999999948, 0.8, 0.6222812438850656, 0.26337116258354554)