# Notebook for the fitting of scaling laws

- For details see: 

-- Is this scaling non-linear?, from Jorge C. Leitão, José M. Miotto, Martin Gerlach, and Eduardo G. Altmann,
DOI: 10.1098/rsos.150649.
http://rsos.royalsocietypublishing.org/content/3/7/150649

- Remove results from folder "_results" to ensure original computations (instead of reading from stored results)

In [3]:
import sys
import numpy as np

sys.path.append("../src/")
sys.path.append("../data/")


import analysis as a
MIN_VALUE = 10**-8

%load_ext autoreload
%autoreload 2

%matplotlib inline
def summary(func):
    print(func.description,'\t Dataset:',func.dataset)
    print('Beta +- error:\t',func.beta)
    print('P-value:\t',func.p_value)
    print('BIC:\t\t',func.bic,'\n')

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [4]:
for d in a.DATABASES:
    try:
        a.Analysis(d)
        print("+",d,"worked")
    except:
        print("-",d,"failed")

+ brazil_aids_2010 worked
+ brazil_gdp_2010 worked
+ brazil_externalCauses_2010 worked
- ocde_gdp failed
- ocde_patents failed
- usa_gdp failed
+ usa_miles worked
- eurostat_cinema_seats failed
- eurostat_cinema_attendance failed
- eurostat_museum_visitors failed
- eurostat_theaters failed
- eurostat_libraries failed
- uk_patents failed
- uk_income failed
- uk_train failed
- new_dataset failed
- new_dataset2 failed


## Choose the dataset 

- by setting the variable dataset="name"

- Pick one of the available datasets (write 'help' as the name of the dataset for a list)

- Or, add your csv dataset to the appropriate folder:

-- New dataset with three columns (name,x,y) = (city-name, population, observable) to folder new_dataset/

-- New dataset with two columns (x,y) = (population, observable) to folder new_dataset2/

In [5]:
dataset="usa_miles"

try:
    x=a.xy(dataset).x
    y=a.xy(dataset).y
except:
    print("Invalid Dataset Chosen. Available datasets are:\n")
    for i in a.DATABASES:
        print(i)

#Deleting all calculations for previously load new datasets
if dataset.startswith("new_dataset"):
    %rm _results/*new_dataset*

# Performing Analysis

 - Each line is one of the models, with specific constrains. Further constrains cna be fixed in each model.
 
 - This can take some time (at least a couple of minutes)

 - Increase the "required_successes=" parameter to increase precision (and computational time) 

## Log-Normal

In [None]:
ln = a.LogNormalAnalysis(dataset, required_successes=8)

In [None]:
ln_beta1=a.LogNormalFixedBetaAnalysis(dataset,required_successes=8)

In [None]:
ln_deltaf=a.LogNormalFixedDAnalysis(dataset,required_successes=8)

##  Gaussian

In [None]:
Gauss=a.ConstrainedDAnalysis(dataset,required_successes=8)

In [None]:
Gauss_beta1=a.ConstrainedDFixedBetaAnalysis(dataset,required_successes=8)

## Population

In [None]:
Pop=a.PopulationAnalysis(dataset,required_successes=8)

In [None]:
Pop2=a.PopulationFixedGammaAnalysis(dataset,required_successes=8)

# Outcome of the Analysis 

### Printing the outcomes

In [None]:
summary(ln)
summary(ln_beta1)
summary(ln_deltaf)
summary(Gauss)
summary(Gauss_beta1)
summary(Pop)
summary(Pop2)

print("\n Least Square fitting:",a.mls_best_fit(np.log(x),np.log(y)))

### Plotting data 

In [None]:
from matplotlib.pyplot import *
figure(figsize=(20,10))
xscale("log")
yscale("log")
xlim(0.9*min(x),1.1*max(x))
ylim(0.9*min(y),1.1*max(y))
plot(x, y, "o", ms=2,color="black",label="data")
xlabel("Population")
ylabel("Observable")
legend(loc=0)
show()