In [1]:
import powerlawrs
import polars as pl
import numpy as np
import pandas as pd
import math

In [2]:
file = "../reference_data/blackouts.txt"

# polars and pandas do NOT do a good job detecting headers, do not rely on them.
df = pl.read_csv(file, has_header=False)
data = df.to_series()

# Fit Pareto Type I
1. Generate an alpha param for every x_min of the data

In [82]:
(x_mins, alphas) = powerlawrs.dist.pareto.estimation.find_alphas_fast(data)

2. Goodness of fit test each pair to find the best fit

In [83]:
best_fit = powerlawrs.dist.pareto.gof.gof(data, x_mins, alphas)

In [86]:
print(f"alpha: {best_fit.alpha}\nx_min: {best_fit.x_min}\nKS stat: {best_fit.D}\ntail length: {best_fit.len_tail}")

alpha: 1.2726372198302858
x_min: 230000.0
KS stat: 0.06067379629443781
tail length: 59


## Stats module

In [3]:
powerlawrs.descriptive.mean(data)

253868.68246445496

In [4]:
powerlawrs.descriptive.variance(data, 1)

372476564023.59814

In [5]:
powerlawrs.random.random_choice(data, 3)

[71000.0, 48000.0, 18351.0]

In [6]:
powerlawrs.random.random_uniform(3)

[0.4828754356136413, 0.12486885733461772, 0.5970884716323652]

In [7]:
# Define a standard normal CDF in Python
norm_cdf = lambda x: 0.5 * (1 + math.erf(x / math.sqrt(2.0)))

sorted_data = [-1.1, -0.5, 0.1, 0.2, 1.5]

# Call your Rust function, passing the Python function as an argument
(d_plus, d_minus, d_max) = powerlawrs.ks.ks_1sam_sorted(sorted_data, norm_cdf)

print(f"D+: {d_plus}")
print(f"D-: {d_minus}")
print(f"D max: {d_max}")

D+: 0.22074029056089706
D-: 0.13982783727702897
D max: 0.22074029056089706


## Util module

In [8]:
powerlawrs.util.linspace(0,10,5)

[0.0, 2.5, 5.0, 7.5, 10.0]

In [9]:
simparams_dict = powerlawrs.sim.calculate_sim_params(0.01, data, 230000)
simparams_dict

{'num_sims_m': 2500,
 'sim_len_n': 211,
 'n_tail': 59,
 'p_tail': 0.2796208530805687}

In [10]:
# convert simparams dict to rust struct
simparams_struct = powerlawrs.sim.PySimParams(**simparams_dict)

# use the struct as an argument
sim_data = powerlawrs.sim.generate_synthetic_datasets(data, 230000, simparams_struct, 1.27)

In [11]:
#Note the library does not yet impliment zeta distribution for discrete data. 
pl.from_numpy(np.array(sim_data))

column_0,column_1,column_2,column_3,column_4,column_5,column_6,column_7,column_8,column_9,column_10,column_11,column_12,column_13,column_14,column_15,column_16,column_17,column_18,column_19,column_20,column_21,column_22,column_23,column_24,column_25,column_26,column_27,column_28,column_29,column_30,column_31,column_32,column_33,column_34,column_35,column_36,…,column_174,column_175,column_176,column_177,column_178,column_179,column_180,column_181,column_182,column_183,column_184,column_185,column_186,column_187,column_188,column_189,column_190,column_191,column_192,column_193,column_194,column_195,column_196,column_197,column_198,column_199,column_200,column_201,column_202,column_203,column_204,column_205,column_206,column_207,column_208,column_209,column_210
f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,…,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
133000.0,257456.790006,15000.0,66005.0,32000.0,240604.043763,15000.0,230864.239767,25000.0,190000.0,24000.0,48000.0,74000.0,18819.0,100000.0,32000.0,589264.44456,207200.0,29000.0,66005.0,25000.0,420931.443751,94285.0,51000.0,145000.0,33000.0,51000.0,18819.0,145000.0,1.0205e6,66005.0,200000.0,35000.0,14273.0,24000.0,71000.0,51000.0,…,458577.733005,115000.0,742627.746527,53000.0,504305.035575,32000.0,92000.0,92000.0,466067.885594,59000.0,88000.0,50000.0,50462.0,115000.0,88000.0,2900.0,203000.0,29900.0,448732.488432,258182.494833,513739.249307,826706.847825,25000.0,70000.0,40000.0,75000.0,55000.0,46000.0,276975.148347,20000.0,15000.0,60000.0,760271.658238,18000.0,100000.0,1.1913e6,18000.0
2000.0,74000.0,40911.0,190000.0,8000.0,17000.0,252190.728996,4.7776e6,501493.643581,9000.0,203000.0,71000.0,1.3327e6,63500.0,55000.0,50000.0,14273.0,128000.0,32000.0,259225.036172,8000.0,70000.0,32000.0,160000.0,18351.0,82500.0,38500.0,20000.0,90000.0,40000.0,50000.0,10000.0,200000.0,726456.470236,80000.0,32000.0,25000.0,…,70000.0,50000.0,120000.0,91000.0,60000.0,50000.0,122000.0,80000.0,627889.278374,200000.0,233782.293137,70000.0,158000.0,148000.0,50000.0,2900.0,760709.959684,60000.0,190000.0,24506.0,728762.252259,247826.621358,146000.0,100000.0,130000.0,259153.227869,414478.854209,210882.0,56000.0,235600.583662,160000.0,60000.0,50462.0,50000.0,306247.229706,25000.0,327216.488141
24506.0,122000.0,39500.0,440908.261065,70000.0,206000.0,2000.0,50000.0,56000.0,147000.0,43696.0,128000.0,70000.0,59000.0,945905.440722,315297.322941,56000.0,2900.0,7.1404e6,234138.435037,32000.0,1.1772e6,30000.0,332424.846917,973649.732339,304128.467049,50000.0,25000.0,80000.0,15000.0,145000.0,235447.338764,24000.0,4.6914e6,32000.0,95630.0,478629.974388,…,429142.550992,19000.0,324571.157901,400876.936215,8000.0,142000.0,248716.780317,24506.0,81000.0,18819.0,30000.0,63500.0,63500.0,191000.0,275661.432854,294478.005841,289733.220631,40911.0,313916.559989,114000.0,166000.0,50462.0,55000.0,258270.490249,43696.0,45000.0,115000.0,173000.0,20000.0,60000.0,60000.0,40000.0,10000.0,287462.824899,145000.0,59000.0,163000.0
361068.303403,100000.0,130000.0,70000.0,71000.0,20000.0,10000.0,50000.0,257316.844139,350660.819443,60000.0,2000.0,113200.0,190000.0,430831.869555,11000.0,163000.0,40000.0,53000.0,203000.0,18000.0,377660.32543,40000.0,75000.0,1.1811e6,7500.0,40911.0,413577.403968,50000.0,80000.0,33000.0,4150.0,200000.0,353442.297129,17000.0,37000.0,32000.0,…,4.1130e6,776759.382725,206000.0,75000.0,130000.0,100000.0,32000.0,74000.0,200000.0,39500.0,8000.0,120000.0,56000.0,17000.0,60000.0,114000.0,70000.0,81000.0,424765.431139,120000.0,417833.40968,18000.0,1800.0,50000.0,303318.529793,173000.0,24506.0,145000.0,341965.694204,1646.0,4150.0,248171.625706,4150.0,164500.0,275896.289856,142000.0,353170.37679
1.9782e6,160000.0,242839.016632,71000.0,66005.0,106850.0,25000.0,540682.597401,232572.474124,75000.0,332567.318729,449809.911513,92000.0,29000.0,234195.55712,100000.0,56000.0,115000.0,36073.0,330281.10826,115000.0,148000.0,233572.91427,50000.0,62000.0,15000.0,113200.0,51000.0,210882.0,145000.0,18000.0,366789.592647,14273.0,305670.070066,245127.739098,56000.0,60000.0,…,71000.0,268323.959407,10000.0,40000.0,70000.0,236135.081254,75000.0,43000.0,372383.113461,17000.0,370129.0781,94285.0,50000.0,2.4947e6,43696.0,239537.457929,346908.782237,45000.0,100000.0,114500.0,243632.122922,92000.0,18819.0,35000.0,80000.0,1.1043e6,33000.0,322110.914944,1.8769e6,106850.0,903439.95176,66005.0,43000.0,37000.0,352122.942579,7.7402e6,130000.0
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
4150.0,50000.0,452814.287555,95630.0,25000.0,50000.0,19000.0,243035.79106,60000.0,239041.909081,50000.0,100000.0,26334.0,2900.0,60000.0,203000.0,5300.0,100000.0,43696.0,26334.0,60000.0,14273.0,29000.0,148000.0,1.5213e6,249047.998565,250652.26124,75000.0,43000.0,270919.511857,40000.0,145000.0,469516.037188,32000.0,290180.948457,10000.0,1.4021e6,…,40000.0,65000.0,59000.0,46000.0,80000.0,50000.0,56000.0,145000.0,15000.0,51000.0,498740.573469,19000.0,62000.0,1.9428e6,113200.0,1.8919e6,15000.0,1000.0,70000.0,164500.0,146000.0,15000.0,133000.0,63500.0,50000.0,260701.31573,274228.096929,587400.32572,50000.0,10000.0,482187.180773,427851.670147,71000.0,309119.443646,50000.0,24000.0,65000.0
94285.0,173000.0,92000.0,5300.0,1.1026e6,262752.717493,100000.0,158000.0,100000.0,608785.561626,160000.0,2.4255e6,575554.672298,30500.0,74000.0,51000.0,20000.0,32000.0,15000.0,272713.762726,283053.18477,40000.0,431220.714893,71000.0,66005.0,50000.0,133000.0,91000.0,92000.0,1800.0,158000.0,50000.0,58000.0,33000.0,1.6671e6,71000.0,25000.0,…,55000.0,56000.0,173000.0,147000.0,310674.022697,80000.0,875262.193893,396087.367247,1.9519e6,24506.0,43696.0,363584.778877,32000.0,15000.0,29900.0,230492.609357,71000.0,251371.163416,30001.0,344090.526777,10000.0,50000.0,53000.0,315594.352513,25000.0,112000.0,371212.015312,18819.0,40911.0,59000.0,164500.0,312278.489377,15000.0,115000.0,75000.0,29000.0,70000.0
71000.0,12000.0,43696.0,173000.0,5300.0,240409.428687,15000.0,50000.0,32000.0,323036.855396,206000.0,63500.0,460086.486626,51000.0,1.6645e6,30001.0,8000.0,206000.0,8000.0,50000.0,120000.0,25000.0,1800.0,50000.0,71000.0,879686.66064,25000.0,559738.920296,272833.381579,382383.537725,749305.31165,100000.0,56000.0,37000.0,333137.387214,133000.0,130000.0,…,25000.0,246933.097958,160000.0,361790.060356,43696.0,704712.943699,219000.0,2.8171e6,397721.481422,33000.0,200000.0,270609.645029,122000.0,14273.0,95000.0,92000.0,70000.0,43000.0,130000.0,1.5418e6,4150.0,25000.0,160000.0,660735.41966,43696.0,9000.0,247487.297425,219000.0,126000.0,638238.422369,1.7358e6,100000.0,20000.0,75000.0,30500.0,160000.0,537796.706645
62000.0,206000.0,381981.050343,120000.0,5.9591e6,479712.674213,1800.0,55000.0,55000.0,113200.0,46000.0,75000.0,10000.0,164500.0,120000.0,262450.962248,232635.76257,559901.151229,50000.0,33000.0,11529.0,11000.0,53000.0,90000.0,60000.0,190000.0,1646.0,535068.921198,163000.0,114500.0,2.8654e6,51000.0,70000.0,572778.850044,75000.0,8000.0,51000.0,…,1.6826e6,19000.0,70000.0,145000.0,145000.0,1.1714e6,200000.0,798397.707646,133000.0,114500.0,3.2603e6,273448.179723,207200.0,81000.0,173000.0,126000.0,63500.0,95630.0,456294.940515,29000.0,784207.750385,100000.0,323521.880828,25000.0,63500.0,264389.847052,88000.0,207200.0,106850.0,146000.0,203000.0,400380.527507,10000.0,43000.0,11000.0,328496.203693,35000.0


# Distributions
## Generic Power-Law

In [16]:
# instantiate the class
pl_class = powerlawrs.dist.powerlaw.Powerlaw(2.2726, 230000)

In [17]:
# pdf
pl_class.pdf(500000)

9.47430869971139e-07

In [18]:
# cdf
pl_class.cdf(500000)

0.627757791147596

In [19]:
# ccdf
pl_class.ccdf(500000)

0.372242208852404

In [57]:
# rv
# generate random U(0,1)
u = np.random.rand()
pl_class.rv(u)

254491.25057865496

## Pareto Type I

In [59]:
# instantiate the class
pareto_class = powerlawrs.dist.pareto.Pareto(1.2726, 230000)

In [60]:
# pdf
pareto_class.pdf(500000)

9.474308699711417e-07

In [61]:
# cdf
pareto_class.cdf(500000)

0.6277577911475959

In [62]:
# ccdf
pareto_class.ccdf(500000)

0.3722422088524041

In [63]:
# rv
# generate random U(0,1)
u = np.random.rand()
pareto_class.rv(u)

3385213.6112842234

## Exponential

In [67]:
# instantiate the class
expo_class = powerlawrs.dist.exponential.Exponential(1.5)

In [69]:
# pdf
expo_class.pdf(2)

0.07468060255179593

In [71]:
# cdf
expo_class.cdf(2)

0.950212931632136

In [72]:
# ccdf
expo_class.ccdf(2)

0.04978706836786395

In [77]:
# rv
# generate random U(0,1)
u = np.random.rand()
expo_class.rv(u)

0.4273739170832393