In [None]:
from scipy.optimize import fmin
from scipy.stats import beta
from scipy.special import gamma as gammaf
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

In [None]:
df = pd.read_csv('/root/bryton/github/cv_research/bryton/ml_sealice/2016.csv')

In [None]:
num = df.loc[df['avgAdultFemaleLice'] < 0.5, 'avgAdultFemaleLice']

def betaNLL(param,*args):
    '''Negative log likelihood function for beta
    <param>: list for parameters to be fitted.
    <args>: 1-element array containing the sample data.

    Return <nll>: negative log-likelihood to be minimized.
    '''

    a, b = param
    data = args[0]
    pdf = beta.pdf(data,a,b,loc=0,scale=1)
    lg = np.log(pdf)
    mask = np.isfinite(lg)
    nll = -lg[mask].sum()
    return nll

#-------------------Sample data-------------------
data=num
#----------------Normalize to [0,1]----------------
#data=(data-numpy.min(data))/(numpy.max(data)-numpy.min(data))

#----------------Fit using moments----------------
mean=numpy.mean(data)
var=numpy.var(data,ddof=1)
alpha1=mean**2*(1-mean)/var-mean
beta1=alpha1*(1-mean)/mean

#------------------Fit using mle------------------
result=fmin(betaNLL,[1,1],args=(data,))
alpha2,beta2=result

#----------------Fit using beta.fit----------------
alpha3,beta3,xx,yy=beta.fit(data)

print('\n# alpha,beta from moments:',alpha1,beta1)
print('# alpha,beta from mle:',alpha2,beta2)
print('# alpha,beta from beta.fit:',alpha3,beta3)

#-----------------------Plot-----------------------
plt.hist(data,bins=30,normed=True)

def fitted (x,a,b):
    array = gammaf(a+b)/gammaf(a)/gammaf(b)*x**(a-1)*(1-x)**(b-1) #pdf of beta
    
    array[array > 20] = 20
    print(array)
    return array

xx=numpy.linspace(0,max(data),len(data))

fit1 = fitted(xx,alpha1,beta1)
print(xx.shape)
print(fit1.shape)

plt.plot(xx, fit1, 'g')
#plt.plot(xx,fitted(xx,alpha2,beta2),'b')
plt.plot(xx,fitted(xx,alpha3,beta3),'r')

plt.xlabel('Sea lice count')
plt.ylabel('Density')

plt.show()