In [1]:
import numpy as np
from scipy.stats import norm, t, kstwo


In [2]:
# def ovl_at_sn(sn):
#     """Overlap coefficient given an S/N ratio."""
#     return 2 * norm.cdf(-0.5 * sn)

# def sn_from_p_ks(p, n):
#     """S/N ratio from a KS test p-value and sample size n."""
#     ks_stat = kstwo.isf(p, n)
#     sn = 2 * norm.ppf((ks_stat + 1) / 2)
#     return sn, ks_stat

# def sn_from_p_t(p, n):
#     """S/N ratio from a t-test p-value and sample size n."""
#     df = 2*n - 2
#     t_stat = t.isf(p/2, df)  # two-sided
#     sn = t_stat * np.sqrt(2/n)
#     return sn, t_stat

# # --- Example usage ---

# p = 0.01
# n = 30

# sn_ks, ks_stat = sn_from_p_ks(p, n)
# sn_t,  t_stat  = sn_from_p_t(p, n)

# print(f"KS: stat = {ks_stat:.3f},  S/N from KS = {sn_ks:.3f}")
# print(f"t:  stat = {t_stat:.3f},  S/N from t  = {sn_t:.3f}")

# # Overlap at different S/N ratios
# sn_values = [0.5, 1, 2, 3]
# for sn in sn_values:
#     ovl = ovl_at_sn(sn)
#     print(f"S/N = {sn:.1f},  Overlap = {ovl:.3f}")


In [3]:
from scipy.stats import norm, t, kstwo
import numpy as np

def ovl_at_sn(sn):
    """Overlap coefficient given an S/N ratio."""
    return 2 * norm.cdf(-0.5 * sn)

def sn_from_p_ks(p, n1, n2=None):
    """S/N ratio from a KS test p-value and sample sizes n1, n2.
    If n2 is None, assumes n2 = n1 (one-sample equivalent).
    """
    if n2 is None: n2 = n1
    n_eff = int(round((n1 * n2) / (n1 + n2)))
    
    ks_stat = kstwo.isf(p, n_eff)
    sn = 2 * norm.ppf((ks_stat + 1) / 2)
    return sn, ks_stat

def sn_from_p_t(p, n1, n2=None):
    """S/N ratio from a t-test p-value and sample sizes n1, n2.
    If n2 is None, assumes n2 = n1 (one-sample equivalent).
    """
    if n2 is None:
        n2 = n1
    df = n1 + n2 - 2
    t_stat = t.isf(p/2, df)  # two-sided
    sn = t_stat * np.sqrt(1/n1 + 1/n2)
    return sn, t_stat

# --- Example usage ---

p = 0.01
n1 = 30

sn_ks, ks_stat = sn_from_p_ks(p, n1)
sn_t,  t_stat  = sn_from_p_t(p, n1)

print(f"KS: stat = {ks_stat:.3f},  S/N from KS = {sn_ks:.3f}")
print(f"t:  stat = {t_stat:.3f},  S/N from t  = {sn_t:.3f}")

# Overlap at different S/N ratios
sn_values = [0.5, 1, 2, 3]
for sn in sn_values:
    ovl = ovl_at_sn(sn)
    print(f"S/N = {sn:.1f},  Overlap = {ovl:.3f}")


KS: stat = 0.404,  S/N from KS = 1.061
t:  stat = 2.663,  S/N from t  = 0.688
S/N = 0.5,  Overlap = 0.803
S/N = 1.0,  Overlap = 0.617
S/N = 2.0,  Overlap = 0.317
S/N = 3.0,  Overlap = 0.134


In [4]:
ovl_at_sn(1), ovl_at_sn(2)

(0.6170750774519738, 0.31731050786291415)

In [5]:
kstwo.isf(0.01, 30),kstwo.isf(0.01, 20)

(0.2898639065419554, 0.3524108916388937)

In [6]:
1 - kstwo.isf(0.01, 30), 1 - kstwo.isf(0.01, 20)

(0.7101360934580446, 0.6475891083611063)

In [15]:
import pandas as pd

sample_sizes = np.arange(20, 80, 10)
p = 0.01
n1_values = [30, 50]

sn_ks_closed_all = {}
sn_ttest_closed_all = {}

for n1 in n1_values:
    sn_ks_closed = {}
    sn_ttest_closed = {}

    for n2 in sample_sizes:
        print(f'{n1=}, {n2=}')
        sn_ks, ks_stat = sn_from_p_ks(p, n1, n2)
        sn_ks_closed[n2] = sn_ks
        sn_t, t_stat = sn_from_p_t(p, n1, n2)
        sn_ttest_closed[n2] = sn_t

    sn_ks_closed_all[n1] = sn_ks_closed
    sn_ttest_closed_all[n1] = sn_ttest_closed

# Convert to pandas DataFrames
df_sn_ks = pd.DataFrame(sn_ks_closed_all)
df_sn_ttest = pd.DataFrame(sn_ttest_closed_all)

n1=30, n2=20
n1=30, n2=30
n1=30, n2=40
n1=30, n2=50
n1=30, n2=60
n1=30, n2=70
n1=50, n2=20
n1=50, n2=30
n1=50, n2=40
n1=50, n2=50
n1=50, n2=60
n1=50, n2=70


In [16]:
sn_ks_closed_all

{30: {20: 1.1926603688262172,
  30: 1.0608983639168763,
  40: 0.9941457784449532,
  50: 0.9386710981340697,
  60: 0.9142279990530949,
  70: 0.8916136546519501},
 50: {20: 1.099811036355417,
  30: 0.9386710981340697,
  40: 0.8706098795732118,
  50: 0.8155661625918895,
  60: 0.784218310427664,
  70: 0.7562478894171306}}

In [17]:
sn_ttest_closed_all

{30: {20: 0.7742856084906022,
  30: 0.6876577344815223,
  40: 0.6400554417902719,
  50: 0.6097604074324199,
  60: 0.5887249549142771,
  70: 0.5732433615112794},
 50: {20: 0.7011456070449334,
  30: 0.6097604074324199,
  40: 0.5585135318727165,
  50: 0.5253862191512747,
  60: 0.502097765160818,
  70: 0.4847847557806956}}

In [9]:
df_sn_ttest

Unnamed: 0,30,50
20,0.774286,0.701146
25,0.723533,0.647858
30,0.687658,0.60976
40,0.640055,0.558514
50,0.60976,0.525386
80,0.561362,0.471387


In [10]:
sample_sizes = [20, 25, 30, 40, 50, 80]#np.concatenate([np.arange(10, 45, 5), np.arange(50, 110, 10), np.arange(120, 220, 20)])
sn_ks_closed = {}
sn_ttest_closed = {}

n1  = 30
p = 0.01

for n2 in sample_sizes:
    print(f'{n2=}')
    sn_ks, ks_stat = sn_from_p_ks(p, n1, n2)
    sn_ks_closed[n2] = sn_ks
    sn_t,  t_stat  = sn_from_p_t(p, n1, n2)
    sn_ttest_closed[n2] = sn_t
    
    # print(f"KS: stat = {ks_stat:.3f},  S/N from KS = {sn_ks:.3f}")
    # print(f"t:  stat = {t_stat:.3f},  S/N from t  = {sn_t:.3f}")

n2=20
n2=25
n2=30
n2=40
n2=50
n2=80


In [11]:
sn_ks_closed

{20: 1.1926603688262172,
 25: 1.099811036355417,
 30: 1.0608983639168763,
 40: 0.9941457784449532,
 50: 0.9386710981340697,
 80: 0.8706098795732118}

In [12]:
sn_ttest_closed

{20: 0.7742856084906022,
 25: 0.7235329902399202,
 30: 0.6876577344815223,
 40: 0.6400554417902719,
 50: 0.6097604074324199,
 80: 0.5613623671251574}

In [13]:
sn_ks_best = {10: 1.7499705673550827,
 15: 1.45726269707422,
 20: 1.2247774799615319,
 25: 1.1714279247068289,
 30: 1.0309835143148263,
 40: 0.8613144994358745,
 50: 0.7662080894437514,
 80: 0.7406348131558262}
sn_ks_best

{10: 1.7499705673550827,
 15: 1.45726269707422,
 20: 1.2247774799615319,
 25: 1.1714279247068289,
 30: 1.0309835143148263,
 40: 0.8613144994358745,
 50: 0.7662080894437514,
 80: 0.7406348131558262}

In [8]:
1 - kstwo.isf(p, n)

0.8858444377173919

In [9]:
from scipy.stats import norm

mu, sigma = 0, 1  # standard normal
low, high = norm.cdf([-2, 2], mu, sigma)


In [10]:
prob = high - low   # â‰ˆ 0.9545
prob

0.9544997361036416

In [11]:
from scipy.stats import norm

sn = 1
low, high = norm.cdf([-sn, sn])
prob = high - low
prob

0.6826894921370859