In [1]:
import powerlawrs
import polars as pl
import numpy as np
import pandas as pd
import math

In [2]:
file = "../reference_data/blackouts.txt"

# polars and pandas do NOT do a good job detecting headers, do not rely on them.
df = pl.read_csv(file, has_header=False)
data = df.to_series()

## Stats module

In [8]:
powerlawrs.descriptive.mean(data)

253868.68246445496

In [9]:
powerlawrs.descriptive.variance(data, 1)

372476564023.59814

In [12]:
powerlawrs.random.random_choice(data, 3)

[650000.0, 95000.0, 24000.0]

In [14]:
powerlawrs.random.random_uniform(3)

[0.5292972006652001, 0.6841804486495846, 0.3210233891608596]

In [31]:
# Define a standard normal CDF in Python
norm_cdf = lambda x: 0.5 * (1 + math.erf(x / math.sqrt(2.0)))

sorted_data = [-1.1, -0.5, 0.1, 0.2, 1.5]

# Call your Rust function, passing the Python function as an argument
(d_plus, d_minus, d_max) = powerlawrs.ks.ks_1sam_sorted(sorted_data, norm_cdf)

print(f"D+: {d_plus}")
print(f"D-: {d_minus}")
print(f"D max: {d_max}")

D+: 0.22074029056089706
D-: 0.13982783727702897
D max: 0.22074029056089706


In [47]:
# generate an alpha param for every x_min of the data
(x_min, alphas) = powerlawrs.find_alphas_fast(df.to_series())

In [48]:
fit = powerlawrs.gof(df.to_series(), x_min, alphas)

In [36]:
fit.alpha

1.2726372198302858

In [37]:
fit.x_min

230000.0

In [38]:
fit.D

0.06067379629443781

In [39]:
fit.len_tail

59

In [41]:
h0 = powerlawrs.hypothesis_test(df.to_series(), 0.01, fit.alpha, fit.x_min, fit.D)

Generating M = 2500 simulated datasets of length n = 211 with tail size 59 and probability of the tail P(tail|data) = 0.2796208530805687


In [42]:
h0.gt

1937

In [43]:
h0.pval

0.7748

In [44]:
h0.total

2500