In [1]:
%load_ext autoreload

In [2]:
autoreload 2

In [3]:
%matplotlib inline

In [4]:
from matplotlib import pyplot as plt
import numpy as np
import pandas as pd

import pyreto

<h2> Forest fires </h2>

In [123]:
fire_size = pd.read_csv("http://tuvalu.santafe.edu/~aaronc/powerlaws/data/fires.txt", names=['acres'])

In [124]:
fire_size.describe()

Unnamed: 0,acres
count,203785.0
mean,89.563111
std,2098.732181
min,0.1
25%,0.1
50%,0.2
75%,2.0
max,412050.0


In [125]:
# check that I get same estimate for alpha given reported xmin...
desired_alpha, desired_xmin = 2.2, 6324
result1 = pyreto.distributions.Pareto.fit(fire_size.acres, xmin=desired_xmin)

In [127]:
np.testing.assert_almost_equal(result1.params['alpha'], desired_alpha, decimal=1)

In [131]:
# check that I get the same estimates for both alpha and xmin using brute force minimization
result2 = pyreto.distributions.Pareto.fit(fire_size.acres, xmin=None, quantile=0.999, method='brute')

In [132]:
np.testing.assert_almost_equal(result2.params['alpha'], desired_alpha, decimal=1)

In [133]:
np.testing.assert_almost_equal(result2.xmin, desired_xmin, decimal=1)

In [134]:
# check that I get the same estimates for both alpha and xmin using bounded minimization
result3 = pyreto.distributions.Pareto.fit(fire_size.acres, xmin=None, quantile=0.999, method='bounded')

In [135]:
np.testing.assert_almost_equal(result3.params['alpha'], desired_alpha, decimal=1)

In [138]:
np.testing.assert_almost_equal(result3.xmin, desired_xmin, decimal=1)

AssertionError: 
Arrays are not almost equal to 1 decimals
 ACTUAL: 7149.9999509662648
 DESIRED: 6324

<h2> Weblinks </h2>

In [117]:
weblinks = pd.read_csv('http://tuvalu.santafe.edu/~aaronc/powerlaws/data/weblinks.hist', sep='\t')

In [119]:
weblinks.describe()

Unnamed: 0,degree,frequency
count,14480.0,14480.0
mean,15499.9,19101.43
std,36571.73,1022556.0
min,0.0,1.0
25%,3619.75,1.0
50%,7468.5,2.0
75%,14750.5,11.0
max,1199466.0,106649800.0


In [120]:
# check that I get same estimate for alpha given reported xmin...
desired_alpha, desired_xmin = 2.336, 3684
result1 = pyreto.distributions.Pareto.fit(weblinks.frequency, xmin=desired_xmin)

In [121]:
np.testing.assert_almost_equal(result1.params['alpha'], desired_alpha, decimal=3)

AssertionError: 
Arrays are not almost equal to 3 decimals
 ACTUAL: 1.4634359666927714
 DESIRED: 2.336

<h2> Cities </h2>

In [5]:
cities = pd.read_csv('http://tuvalu.santafe.edu/~aaronc/powerlaws/data/cities.txt', names=['population'])
cities.population /= 1e3  # CSN units are in thousands of persons

In [6]:
cities.describe()

Unnamed: 0,population
count,19447.0
mean,9.002051
std,77.825051
min,0.001
25%,0.3695
50%,1.089
75%,4.1355
max,8008.654


In [93]:
# check that I get same estimate for alpha given reported xmin...
desired_alpha, desired_xmin = 2.37, 52.46
result1 = pyreto.distributions.Pareto.fit(cities.population, xmin=desired_xmin)

In [None]:
np.testing.assert_almost_equal(result1.params['alpha'], desired_alpha, decimal=2)

In [106]:
# check that I get the same estimates for both alpha and xmin using brute force minimization
result2 = pyreto.distributions.Pareto.fit(cities.population, xmin=None, quantile=0.99, method='brute')

In [107]:
np.testing.assert_almost_equal(result2.params['alpha'], desired_alpha, decimal=2)

AssertionError: 
Arrays are not almost equal to 2 decimals
 ACTUAL: 2.3639368738287363
 DESIRED: 2.37

In [108]:
np.testing.assert_almost_equal(result2.xmin, desired_xmin, decimal=2)

AssertionError: 
Arrays are not almost equal to 2 decimals
 ACTUAL: 51.442999999999998
 DESIRED: 52.46

In [109]:
# check that I get the same estimates for both alpha and xmin using bounded minimization
result3 = pyreto.distributions.Pareto.fit(cities.population, xmin=None, quantile=0.99, method='bounded')

In [110]:
np.testing.assert_almost_equal(result3.params['alpha'], desired_alpha, decimal=2)

In [111]:
np.testing.assert_almost_equal(result3.xmin, desired_xmin, decimal=2)

AssertionError: 
Arrays are not almost equal to 2 decimals
 ACTUAL: 70.075391716136934
 DESIRED: 52.46