# Fit abundance curves to different models

In [9]:
%matplotlib inline

import os
os.chdir("/Volumes/WorkDrive/gimmeSAD")
import matplotlib.pyplot as plt
import macroeco as meco
import pandas as pd
import numpy as np
import implicit_BI
import gimmeSAD
## For literal_eval
import sys
import ast

#SIMOUT_DIR = "/Users/iovercast/Desktop/lab-notebooks/gimmeSAD/simout"
SIMOUT_DIR = "/Volumes/WorkDrive/gimmeSAD/simout"
OUTFILE = "gimmeSAD.out"
os.chdir(SIMOUT_DIR)
col_rates = [0.01, 0.001]


In [6]:
## quicksort stolen from the internet
def qsort(arr):
     if len(arr) <= 1:
          return arr
     else:
          return qsort([x for x in arr[1:] if x[0]<arr[0][0]])\
                    + [arr[0]] + qsort([x for x in arr[1:] if x[0]>=arr[0][0]])

In [2]:
def unpack_abundances(my_abund):
    """ Unpacks packed abundances in my form (tuples), into just raw species abundances """
    raw_abund = []
    for ab in my_abund:
        raw_abund.extend([ab[0]] * ab[1])
    return raw_abund


In [16]:
def plot_models(abundances):

    plt.figure()
    sad_df = abundances

    p = meco.models.logser.fit_mle(sad_df)
    logser_rad = meco.models.logser.rank(len(sad_df), p)

    broken_stick_rad = meco.models.nbinom_ztrunc.rank(len(sad_df), np.mean(sad_df), 1)

    mu, s = meco.models.plnorm_ztrunc.fit_mle(sad_df)
    plnorm_rad = meco.models.plnorm_ztrunc.rank(len(sad_df), mu, s)

    mu, s = meco.models.lognorm.fit_mle(sad_df)
    lognorm_rad = meco.models.plnorm_ztrunc.rank(len(sad_df), mu, s)

    ranks = np.arange(1, len(sad_df) + 1)
    print(len(ranks))
    print(ranks)
    print(len(sad_df))
    print(sad_df)
          
    broken_stick_rad = meco.models.nbinom_ztrunc.rank(len(sad_df), np.mean(sad_df), 1)
    plt.scatter(ranks, np.sort(sad_df))
    plt.semilogy(ranks, np.sort(sad_df)[::-1], label="Empirical RAD")
    plt.semilogy(ranks, logser_rad[::-1], label="Logseries RAD")
    plt.semilogy(ranks, broken_stick_rad[::-1], label="Broken Stick RAD")
    plt.semilogy(ranks, lognorm_rad[::-1], label="Lognormal RAD")
    plt.semilogy(ranks, plnorm_rad[::-1], label="Poisson Lognormal RAD")
    plt.xlabel("Rank")
    plt.ylabel("Log(Abundance)")
    plt.legend()
    plt.show

In [4]:
models = [meco.models.logser, meco.models.lognorm, meco.models.nbinom_ztrunc, meco.models.plnorm_ztrunc]
def best_fit(abundances):
    """ Attempt to fit each model to the empirical, then get the AIC
    of the model fit"""
    
    for m in models:
        print(m)
        mle_params = m.fit_mle(abundances)
        #print(mle_params)
        if len(mle_params) > 1:
            model_aic = meco.compare.AIC(abundances, m(mle_params[0], mle_params[1]))
        else:
            model_aic = meco.compare.AIC(abundances, m(mle_params))

        print(model_aic)

In [88]:
## Test widgets
best_fit([1,1,1,2,3,4,4,4,5,5,6])
print(unpack_abundances([(2,10), (500,1)]))

<macroeco.models._distributions.logser_gen object at 0x11abbef50>
50.081103715
<macroeco.models._distributions.lognorm_gen object at 0x11a57c810>
49.7912729904
<macroeco.models._distributions.nbinom_ztrunc_gen object at 0x11ace08d0>
47.55256415
<macroeco.models._distributions.plnorm_ztrunc_gen object at 0x11abbe4d0>
47.5582657028
[2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 500]


In [None]:
dirs = os.listdir(SIMOUT_DIR)
#dirs = ["K_5000-C_0.01-1"]
for d in dirs:
    if not "K" in d:
        continue
    else:
        print(d)
    if not os.path.isdir(d):
        print("founda file")
        continue
#sys.exit()
#if True:
    if not os.path.exists(os.path.join(d, OUTFILE)):
        continue
    with open(os.path.join(d, OUTFILE), 'r') as out:
        lines = out.readlines()
        for line in lines:
            if "Raw abundance" in line:
                abund = ast.literal_eval(line.split("OrderedDict")[1])
                abund = qsort(abund)
                abund = unpack_abundances(abund)
                best_fit(abund)
                print("\n")
                plot_models(abund)


K_1000-C_0.001
<macroeco.models._distributions.logser_gen object at 0x1190bb9d0>
71.3453422877
<macroeco.models._distributions.lognorm_gen object at 0x1191fec10>
78.8309293004
<macroeco.models._distributions.nbinom_ztrunc_gen object at 0x1190bb390>
75.856290333
<macroeco.models._distributions.plnorm_ztrunc_gen object at 0x1191fe390>
78.269063712


6
[1 2 3 4 5 6]
6
[1, 2, 40, 219, 341, 397]
K_1000-C_0.001_x10
<macroeco.models._distributions.logser_gen object at 0x1190bb9d0>
1554.11169589
<macroeco.models._distributions.lognorm_gen object at 0x1191fec10>
1488.23423831
<macroeco.models._distributions.nbinom_ztrunc_gen object at 0x1190bb390>
1483.26717152
<macroeco.models._distributions.plnorm_ztrunc_gen object at 0x1191fe390>
1486.99313632




In [None]:
1

1