In [2]:
## Load required libraries
import numpy as np
import scipy as sp
import scipy.optimize as sp_opt
import pandas as pd

In [16]:
## Load data from INEGI's ENIGH survey
print("\nINEGI's ENIGH 2012 survey data:")

data = pd.read_excel("~/Desktop/enigh_income_data_households.xls", header = None)

print("\nFirst 10 rows of data:")
print(data.head(10))
data = data.to_numpy()

# Observations, expansion factors, expanded number of observations
mydata = [data[:,0], data[:,1], np.sum(data[:,1])]

print("\nExpanded number of observations: " + str(mydata[2]))

print("\nAverage quarterly household income: " + str(1/mydata[2]*np.sum(mydata[0]*mydata[1])) )


INEGI's ENIGH 2012 survey data:

First 10 rows of data:
               0     1
0   31548.900391  1537
1   34186.148438  1537
2  148123.890625  1537
3   45438.937500  1537
4   55309.453125  1537
5    7405.419922  1215
6    9164.080078  1215
7   23326.609375  1215
8   29059.210938  1215
9   45155.070312  1215

Expanded number of observations: 31559379.0

Average quarterly household income: 37999.64190447456


In [80]:
## Perform Maximum Likelihood Estimation with constraints using BFGS in Scipy

# Constraints
constrained_mean = 92733.62 #2.44 * (1/mydata[2]*np.sum(mydata[0]*mydata[1]))
top_threshold = 1.041e7
constrained_density = 0.000001

# Initial guess
start_param_scipy = [2.0**15, 1.0e-1, 0.5e0]

def objective_GeneralizedGamma(params, mydata):
    # Retrieve parameters
    a = params[0]
    d = params[1]
    p = params[2]
    
    # Retrieve observations
    Y = mydata[0]
    Weights = mydata[1]
    n = mydata[2]
    
    # Compute negative log-likelihood
    ll = -(n*np.log((p/a**d)/sp.special.gamma(d/p)) + (d-1)*np.sum(Weights*np.log(Y)) - np.sum(Weights*(Y/a)**p))
    
    return ll

def Mean_GeneralizedGamma(params):
    a = params[0]
    d = params[1]
    p = params[2]
    
    mean = a * sp.special.gamma((d+1)/p) / sp.special.gamma(d/p)
    return mean

def constrained_mean_fun(params):
    cons_mean = Mean_GeneralizedGamma(params) - constrained_mean
    return cons_mean

def Right_CDF_GeneralizedGamma(params):
    a = params[0]
    d = params[1]
    p = params[2]
    
    right_cdf = sp.special.gammaincc(d/p, (top_threshold/a)**p)
    return right_cdf

def constrained_CDF_fun(params):
    cons_CDF = Right_CDF_GeneralizedGamma(params) - constrained_density
    return cons_CDF

# Firstly, test unconstrained optimization
#print( objective_GeneralizedGamma(start_param_scipy, mydata) )
myoptions = {'disp' : True, 'maxiter' : 2000}
unc_results = sp_opt.minimize(objective_GeneralizedGamma, start_param_scipy, args = mydata, options = myoptions)
while False:#unc_results.success != True:
    start_param_scipy[1] = start_param_scipy[1]*2
    print(start_param_scipy)
    unc_results = sp_opt.minimize(objective_GeneralizedGamma, start_param_scipy, args = mydata, options = myoptions)


print("\nUnconstrained solution:")
print(unc_results.x)
print("\nUnconstrained mean of the fitted distribution:")
unc_fitted_mean = Mean_GeneralizedGamma(unc_results.x)
print(unc_fitted_mean)
print("\n")

# Then, perform optimization over the constrained mean
con_start_param_scipy = unc_results.x
myconstraints = ({'type': 'eq', 'fun' : constrained_mean_fun})
con_results = sp_opt.minimize(objective_GeneralizedGamma, con_start_param_scipy, args = mydata, constraints = myconstraints, options = myoptions)
print("\nConstrained solution:")
print(con_results.x)
print("\nConstrained mean of the fitted distribution:")
con_fitted_mean = Mean_GeneralizedGamma(con_results.x)
print(con_fitted_mean)
print("\nDensity above threshold of " + str(top_threshold) +":")
print(Right_CDF_GeneralizedGamma(con_results.x))
print("\n")

# Finally, perform optimization over the constrained mean and the constrained cdf
con_start_param_scipy = con_results.x
myconstraints = ({'type': 'eq', 'fun' : constrained_mean_fun}, {'type': 'eq', 'fun' : constrained_CDF_fun})
con_results = sp_opt.minimize(objective_GeneralizedGamma, con_start_param_scipy, args = mydata, constraints = myconstraints, options = myoptions)
print("\nConstrained solution:")
print(con_results.x)
print("\nConstrained mean of the fitted distribution:")
con_fitted_mean = Mean_GeneralizedGamma(con_results.x)
print(con_fitted_mean)
print("\nDensity above threshold of " + str(top_threshold) +":")
print(Right_CDF_GeneralizedGamma(con_results.x))
print("\n")


  df = (f(*((xk + d,) + args)) - f0) / d[k]


         Current function value: 360564215.991250
         Iterations: 376
         Function evaluations: 3128
         Gradient evaluations: 623

Unconstrained solution:
[9.50185826e-06 7.89926207e+00 1.74880449e-01]

Unconstrained mean of the fitted distribution:
36771.025634849204






Optimization terminated successfully.    (Exit mode 0)
            Current function value: 367501023.43674386
            Iterations: 146
            Function evaluations: 1314
            Gradient evaluations: 146

Constrained solution:
[1.10598971e-08 4.75361930e+00 1.25137028e-01]

Constrained mean of the fitted distribution:
92733.62000032829

Density above threshold of 10410000.0:
9.95488158305633e-07






Iteration limit exceeded    (Exit mode 9)
            Current function value: 367500977.71336627
            Iterations: 2001
            Function evaluations: 30004
            Gradient evaluations: 2001

Constrained solution:
[1.10570708e-08 4.75094351e+00 1.25122520e-01]

Constrained mean of the fitted distribution:
92733.56112381208

Density above threshold of 10410000.0:
1.0014430677684413e-06


