In [1]:
import powerlawrs 
import polars as pl
import numpy as np

In [2]:
file = "../reference_data/blackouts.txt"

# polars and pandas do NOT do a good job detecting headers, do not rely on them.
df = pl.read_csv(file, has_header=False)
data = df.to_series()

# API
## Fitting proceedure

In [3]:
# 1. Generate an alpha paramater via MLE for every x_min in the data 
x_mins, alphas = powerlawrs.estimation.find_alphas_fast(data)
print(f"n: {len(data)}, n_x_mins: {len(x_mins)}, n_alphas: {len(alphas)}")

n: 211, n_x_mins: 210, n_alphas: 210


In [4]:
# 2. Find the pair with the lowest KS statistic. This is the estimated best fit.
best_fit = powerlawrs.gof.gof(data, alphas=alphas, x_mins=x_mins)
print(f"{best_fit}")

Fitment(x_min=230000, alpha=1.2726372198302858, D=0.06067379629443781, len_tail=59)


Steps 1 and 2 above are abstracted away via ```powerlawrs.fit()```

## Numerical stability 
Comparison of ```find_alphas_exhaustive()``` and ```find_alphas_fast()``` given the former will be technically more accurate than the latter. Depending on the context, the precision is negligible given significant performance improvements. 

In [5]:
# 1. Generate an alpha paramater via MLE for every x_min in the data via find_alphas_exhaustive()
x_mins_ex, alphas_ex = powerlawrs.estimation.find_alphas_exhaustive(data)

# 2. Find the pair with the lowest KS statistic. This is the estimated best fit.
best_fit_ex = powerlawrs.gof.gof(data, alphas=alphas_ex, x_mins=x_mins_ex)

print(f"find_alphas_exhaustive() alpha: {best_fit_ex.alpha}")
print(f"find_alphas_fast() alpha:\t{best_fit.alpha}")
print(f"Difference: {best_fit.alpha - best_fit_ex.alpha}")

find_alphas_exhaustive() alpha: 1.2726372198302882
find_alphas_fast() alpha:	1.2726372198302858
Difference: -2.4424906541753444e-15


## Parameter uncertainty

In [6]:
xm_std, a_std = powerlawrs.estimation.param_est(data, m=1000)
print(f"stdev (sample) x_min: {xm_std}, stdev (sample) alpha: {a_std}")

stdev (sample) x_min: 83619.16637398613, stdev (sample) alpha: 0.2538391903889558


## Hypothesis test 

In [7]:
# Run the experiment
# Set a minimum precsion of our p value of the KS test.
precision = 0.01 # p value should be accurate to with 0.01 
H0 = powerlawrs.hypothesis.hypothesis_test(data, precision, best_fit.alpha, best_fit.x_min, best_fit.D)

Generating M = 2500 simulated datasets of length n = 211 with tail size 59 and probability of the tail P(tail|data) = 0.2796208530805687


In [8]:
# hypothesis_test() calls powerlawrs.util.sim.calculate_sim_params() to determine the number of simulated datasets required given the desired precision. 
simparams_dict = powerlawrs.util.sim.calculate_sim_params(precision, data, best_fit.x_min)

In [9]:
# Which will require 2500 synthetic datasets of length 211. 59 of the 211 samples will be drawn from a Pareto Type I with the paramaters found above
simparams_dict

{'num_sims_m': 2500,
 'sim_len_n': 211,
 'n_tail': 59,
 'p_tail': 0.2796208530805687}

## Stats module

In [10]:
powerlawrs.stats.descriptive.mean(data)

253868.68246445496

In [11]:
powerlawrs.stats.descriptive.variance(data, 1)

372476564023.59814

In [12]:
powerlawrs.stats.random.random_choice(data, 3)

[500000.0, 10000.0, 272000.0]

In [13]:
powerlawrs.stats.random.random_uniform(3)

[0.19728301803368797, 0.8671134911884262, 0.2837687125001269]

In [14]:
# Define a standard normal CDF in Python
import math
norm_cdf = lambda x: 0.5 * (1 + math.erf(x / math.sqrt(2.0)))

sorted_data = [-1.1, -0.5, 0.1, 0.2, 1.5]

# Call your Rust function, passing the Python function as an argument
(d_plus, d_minus, d_max) = powerlawrs.stats.ks.ks_1sam_sorted(sorted_data, norm_cdf)

print(f"D+: {d_plus}")
print(f"D-: {d_minus}")
print(f"D max: {d_max}")

D+: 0.22074029056089706
D-: 0.13982783727702897
D max: 0.22074029056089706


## Util module

In [15]:
powerlawrs.util.linspace(0,10,5)

[0.0, 2.5, 5.0, 7.5, 10.0]

In [16]:
simparams_dict = powerlawrs.util.sim.calculate_sim_params(0.01, data, 230000)
simparams_dict

{'num_sims_m': 2500,
 'sim_len_n': 211,
 'n_tail': 59,
 'p_tail': 0.2796208530805687}

In [17]:
# convert simparams dict to rust struct
simparams_struct = powerlawrs.util.sim.PySimParams(**simparams_dict)

# use the struct as an argument
sim_data = powerlawrs.util.sim.generate_synthetic_datasets(data, 230000, simparams_struct, 1.27)

In [18]:
#Note the library does not yet impliment zeta distribution for discrete data. 
pl.from_numpy(np.array(sim_data))

column_0,column_1,column_2,column_3,column_4,column_5,column_6,column_7,column_8,column_9,column_10,column_11,column_12,column_13,column_14,column_15,column_16,column_17,column_18,column_19,column_20,column_21,column_22,column_23,column_24,column_25,column_26,column_27,column_28,column_29,column_30,column_31,column_32,column_33,column_34,column_35,column_36,…,column_174,column_175,column_176,column_177,column_178,column_179,column_180,column_181,column_182,column_183,column_184,column_185,column_186,column_187,column_188,column_189,column_190,column_191,column_192,column_193,column_194,column_195,column_196,column_197,column_198,column_199,column_200,column_201,column_202,column_203,column_204,column_205,column_206,column_207,column_208,column_209,column_210
f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,…,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
58000.0,146000.0,29000.0,472498.002572,147000.0,1.0913e6,269480.532943,988059.531398,1.3567e6,146000.0,290654.425902,246933.02961,29000.0,60000.0,15000.0,91000.0,7500.0,94285.0,1.5255e6,33000.0,18819.0,173000.0,2000.0,18000.0,18000.0,95000.0,14273.0,88000.0,252246.143124,92000.0,160000.0,94285.0,50000.0,160000.0,11529.0,164500.0,25000.0,…,29900.0,164500.0,261635.723312,82500.0,50000.0,56000.0,10000.0,80000.0,60000.0,4150.0,965915.901299,327739.866317,66005.0,545321.344718,95630.0,303037.45329,163000.0,166000.0,1.5180e6,50000.0,158000.0,43000.0,858511.12725,1.3236e7,160000.0,65000.0,60000.0,51000.0,271202.723833,881523.02601,20000.0,536884.093815,114500.0,146000.0,415409.037563,148000.0,53000.0
95000.0,791190.923494,56000.0,81000.0,29000.0,59000.0,246407.220088,70000.0,45000.0,29000.0,203000.0,66005.0,25000.0,636436.835205,29900.0,63500.0,364910.091142,160000.0,51000.0,29000.0,298587.492896,206000.0,260263.742898,115000.0,142000.0,4.3180e6,2.2741e6,50000.0,95630.0,15000.0,325804.318677,298134.569214,11000.0,56000.0,30001.0,349069.221769,158000.0,…,75000.0,20000.0,32000.0,63500.0,4150.0,45000.0,306959.338264,128000.0,95630.0,164500.0,100000.0,29900.0,24506.0,43696.0,80000.0,200000.0,50000.0,114500.0,18000.0,236092.044161,74000.0,92000.0,252151.363281,219000.0,312716.558648,160000.0,26334.0,48000.0,2900.0,30000.0,115000.0,92000.0,62000.0,35000.0,60000.0,114500.0,50000.0
62000.0,71000.0,25000.0,80000.0,158000.0,702342.684584,425899.136519,593437.498032,173000.0,488293.658238,506164.477604,1800.0,24506.0,323166.43243,114000.0,60000.0,922744.793778,50000.0,319715.935934,365554.348023,18000.0,30001.0,264841.55618,19000.0,29900.0,173000.0,56000.0,70000.0,429497.351278,407040.900786,120000.0,499992.456277,294256.767573,50000.0,55000.0,50000.0,191000.0,…,43000.0,56000.0,276401.329778,32000.0,29000.0,122000.0,18351.0,1.0778e6,147000.0,8000.0,100000.0,296730.611654,50000.0,145000.0,33000.0,145000.0,29000.0,75000.0,2000.0,233444.500778,25000.0,38500.0,60000.0,55000.0,92000.0,115000.0,4150.0,51000.0,92000.0,706568.707031,25000.0,59000.0,236376.46859,106850.0,51000.0,17000.0,70000.0
15000.0,94285.0,1646.0,55000.0,40000.0,30000.0,30001.0,63500.0,366966.011567,1800.0,145000.0,25000.0,70000.0,25000.0,15000.0,60000.0,3.1294e6,26334.0,388810.069386,14273.0,160000.0,112000.0,443986.249157,872219.236091,163000.0,29900.0,11529.0,8000.0,59000.0,14273.0,124000.0,71000.0,145000.0,50000.0,337751.236655,166000.0,1.0332e6,…,2.7969e6,2900.0,92000.0,1646.0,4.3125e6,130000.0,164500.0,25000.0,7500.0,40911.0,38500.0,94285.0,2000.0,408900.648516,24000.0,50000.0,447278.149537,40000.0,160000.0,1.0588e6,36073.0,1.4220e6,219000.0,100000.0,70000.0,25000.0,250955.461732,203000.0,160000.0,14273.0,1.3435e6,9000.0,120000.0,30000.0,114500.0,1.9587e6,25000.0
128000.0,80000.0,210882.0,25000.0,17000.0,8000.0,888684.267119,354475.868304,100000.0,429950.615983,40000.0,70000.0,32000.0,40911.0,120000.0,56000.0,45000.0,60000.0,65000.0,62000.0,3.1859e6,106850.0,62000.0,24506.0,20000.0,238112.100993,60000.0,82500.0,326921.529391,333687.125719,393377.613205,791061.318704,58000.0,75000.0,287480.105324,381796.776624,250613.588628,…,160000.0,315951.674154,142000.0,142000.0,146000.0,287752.50202,62000.0,50000.0,2900.0,148000.0,727286.81005,39500.0,160000.0,126000.0,43000.0,334272.829248,1.4675e6,2.4425e6,166000.0,58000.0,148000.0,100000.0,50000.0,94285.0,80000.0,147000.0,259004.108518,795385.238304,24000.0,650034.099682,130000.0,60000.0,30500.0,18819.0,60000.0,50000.0,71000.0
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
46000.0,126000.0,9000.0,509549.327803,50000.0,100000.0,18000.0,70000.0,173000.0,75000.0,277584.333633,17000.0,877917.011007,50000.0,1.1680e6,357505.695907,387440.293233,316211.536534,8000.0,251035.485978,92000.0,112000.0,18000.0,95000.0,55000.0,24506.0,190000.0,164500.0,120000.0,438853.602629,94285.0,126000.0,963336.692641,542496.636794,18000.0,371145.517015,147000.0,…,236307.729928,11000.0,112000.0,92000.0,66005.0,45000.0,75000.0,46000.0,281344.429489,353840.660864,1646.0,266059.909123,46000.0,112000.0,166000.0,273481.347105,40000.0,286335.172848,295884.379059,30000.0,55000.0,30000.0,50000.0,88000.0,146000.0,112000.0,343274.572129,39500.0,19000.0,51000.0,50462.0,190000.0,5300.0,18000.0,91000.0,265478.466074,442853.879817
60000.0,50000.0,60000.0,368731.16645,147000.0,124000.0,50000.0,30000.0,251400.577133,336536.399195,50000.0,20000.0,247498.972211,35000.0,20000.0,2000.0,12000.0,20000.0,147000.0,29000.0,14273.0,39500.0,10000.0,71000.0,50000.0,29900.0,1800.0,50462.0,65000.0,819722.036046,24506.0,53000.0,120000.0,11529.0,43696.0,354197.250707,15000.0,…,66005.0,36073.0,114500.0,124000.0,18000.0,60000.0,130000.0,11529.0,55000.0,115000.0,92000.0,19000.0,50000.0,88000.0,51000.0,65000.0,1.3389e6,95630.0,40000.0,50000.0,113200.0,51000.0,51000.0,60000.0,230347.8647,50000.0,91000.0,453407.548979,39500.0,262980.106711,350406.198251,18351.0,80000.0,25000.0,4150.0,82500.0,65000.0
50000.0,60000.0,19000.0,24506.0,50000.0,480305.240466,641381.066417,11000.0,130000.0,29900.0,4.0288e6,75000.0,40000.0,30001.0,160000.0,133000.0,277978.593971,300257.614419,158000.0,82500.0,286519.495804,25000.0,63500.0,278810.349529,260578.930525,14273.0,32000.0,200000.0,25000.0,507310.221122,18000.0,56000.0,690645.055765,1646.0,713783.457661,62000.0,1.0278e6,…,30500.0,142000.0,2900.0,50000.0,160000.0,133000.0,163000.0,122000.0,750867.420246,191000.0,455283.783,282879.313435,30000.0,2000.0,114500.0,768071.064925,25000.0,24000.0,200000.0,24506.0,568172.711225,63500.0,50000.0,284680.441466,92000.0,95000.0,727671.723832,19000.0,40000.0,234385.641613,50000.0,51000.0,1.3515e6,4150.0,4150.0,163000.0,94285.0
7500.0,376142.565241,40911.0,15000.0,30000.0,71000.0,60000.0,30500.0,18000.0,403099.582299,5300.0,320306.32722,370752.472595,92000.0,113200.0,33000.0,314477.721852,114000.0,50000.0,236301.429613,146000.0,124000.0,38500.0,302079.88474,81000.0,284022.413245,241967.704962,359793.380983,53000.0,363987.728034,721184.523142,207200.0,75000.0,707366.097188,50462.0,51000.0,244562.162011,…,71000.0,271585.001928,43000.0,25000.0,328505.817379,1.1245e6,26334.0,902562.700241,100000.0,71000.0,2.2863e6,26334.0,322189.305625,46000.0,95630.0,322253.398846,18819.0,126000.0,71000.0,1800.0,255704.967984,48000.0,605384.281635,1.4504e6,281878.669278,414731.391369,65000.0,130000.0,29000.0,773343.748056,342631.751974,25000.0,15000.0,50000.0,35000.0,250269.97977,490125.841515


# Distributions
## Generic Power-Law

In [19]:
# instantiate the class
pl_class = powerlawrs.dist.powerlaw.Powerlaw(2.2726, 230000)

In [20]:
# pdf
pl_class.pdf(500000)

9.47430869971139e-07

In [21]:
# cdf
pl_class.cdf(500000)

0.627757791147596

In [22]:
# ccdf
pl_class.ccdf(500000)

0.372242208852404

In [23]:
# rv
# generate random U(0,1)
u = np.random.rand()
pl_class.rv(u)

454936.6539476187

## Pareto Type I

In [24]:
# instantiate the class
pareto_class = powerlawrs.dist.pareto.Pareto(1.2726, 230000)

In [25]:
# pdf
pareto_class.pdf(500000)

9.474308699711417e-07

In [26]:
# cdf
pareto_class.cdf(500000)

0.6277577911475959

In [27]:
# ccdf
pareto_class.ccdf(500000)

0.3722422088524041

In [28]:
# rv
# generate random U(0,1)
u = np.random.rand()
pareto_class.rv(u)

544030.5117932835

## Exponential

In [29]:
# instantiate the class
expo_class = powerlawrs.dist.exponential.Exponential(1.5)

In [30]:
# pdf
expo_class.pdf(2)

0.07468060255179593

In [31]:
# cdf
expo_class.cdf(2)

0.950212931632136

In [32]:
# ccdf
expo_class.ccdf(2)

0.04978706836786395

In [33]:
# rv
# generate random U(0,1)
u = np.random.rand()
expo_class.rv(u)

0.4247431129619941