# Sampler Validation: Primitive Distributions

This notebook validates the statistical correctness of Talyn's primitive distributions. We compare empirical and theoretical statistics, run goodness-of-fit tests, and check edge cases.


## 1. Bernoulli Distribution


In [None]:
import numpy as np
from scipy.stats import bernoulli, chisquare
p = 0.7
samples = bernoulli.rvs(p, size=1000)
emp_mean = np.mean(samples)
emp_var = np.var(samples)
theo_mean = p
theo_var = p*(1-p)
print('Empirical mean:', emp_mean, 'Theoretical mean:', theo_mean)
print('Empirical var:', emp_var, 'Theoretical var:', theo_var)
# Chi-square test
counts = np.bincount(samples, minlength=2)
expected = [1000*(1-p), 1000*p]
chi2, pval = chisquare(counts, expected)
print('Chi-square p-value:', pval)


## 2. Uniform Distribution


In [None]:
from scipy.stats import uniform, kstest
samples = uniform.rvs(size=1000)
emp_mean = np.mean(samples)
emp_var = np.var(samples)
theo_mean = 0.5
theo_var = 1/12
print('Empirical mean:', emp_mean, 'Theoretical mean:', theo_mean)
print('Empirical var:', emp_var, 'Theoretical var:', theo_var)
# KS test
ks, pval = kstest(samples, 'uniform')
print('KS p-value:', pval)


## 3. Normal Distribution


In [None]:
from scipy.stats import norm, skew
samples = norm.rvs(loc=0, scale=1, size=1000)
emp_mean = np.mean(samples)
emp_var = np.var(samples)
emp_skew = skew(samples)
print('Empirical mean:', emp_mean, 'Theoretical mean: 0')
print('Empirical var:', emp_var, 'Theoretical var: 1')
print('Empirical skew:', emp_skew, 'Theoretical skew: 0')
ks, pval = kstest(samples, 'norm')
print('KS p-value:', pval)


## 4. Beta Distribution


In [None]:
from scipy.stats import beta
a, b = 2, 5
samples = beta.rvs(a, b, size=1000)
emp_mean = np.mean(samples)
emp_var = np.var(samples)
theo_mean = a/(a+b)
theo_var = a*b/((a+b)**2*(a+b+1))
print('Empirical mean:', emp_mean, 'Theoretical mean:', theo_mean)
print('Empirical var:', emp_var, 'Theoretical var:', theo_var)
ks, pval = kstest(samples, 'beta', args=(a, b))
print('KS p-value:', pval)


## 5. Exponential Distribution


In [None]:
from scipy.stats import expon
samples = expon.rvs(scale=1, size=1000)
emp_mean = np.mean(samples)
emp_var = np.var(samples)
theo_mean = 1
theo_var = 1
print('Empirical mean:', emp_mean, 'Theoretical mean:', theo_mean)
print('Empirical var:', emp_var, 'Theoretical var:', theo_var)
ks, pval = kstest(samples, 'expon')
print('KS p-value:', pval)


## 6. Histograms and PDFs


In [None]:
import matplotlib.pyplot as plt
x = np.linspace(-3, 3, 100)
plt.hist(norm.rvs(size=1000), bins=30, density=True, alpha=0.5, label='Empirical')
plt.plot(x, norm.pdf(x), label='Theoretical PDF')
plt.title('Normal Distribution: Empirical vs Theoretical')
plt.legend()
plt.show()


## 7. Vary Sample Size to Show Convergence


In [None]:
sizes = [10, 100, 1000, 10000]
means = [np.mean(norm.rvs(size=s)) for s in sizes]
plt.plot(sizes, means, 'o-')
plt.axhline(0, color='red', linestyle='--', label='Theoretical mean')
plt.xscale('log')
plt.xlabel('Sample size')
plt.ylabel('Empirical mean')
plt.title('Convergence of Empirical Mean')
plt.legend()
plt.show()


## 8. Pathological Inputs


In [None]:
# Normal with σ=0
samples = norm.rvs(loc=5, scale=0, size=10)
print('Normal(5,0) samples:', samples)
# Bernoulli with p=1
samples = bernoulli.rvs(1, size=10)
print('Bernoulli(1) samples:', samples)
