In [1]:
## Load required libraries
import numpy as np
import scipy as sp
import scipy.optimize as sp_opt
import pandas as pd

In [2]:
## Load data from INEGI's ENIGH survey
print("\nINEGI's ENIGH 2012 survey data:")

data = pd.read_excel("~/Desktop/enigh_income_data_labor.xls", header = None)

print("\nFirst 10 rows of data:")
print(data.head(10))
data = data.to_numpy()

# Observations, expansion factors, expanded number of observations
mydata = [data[:,0], data[:,1], np.sum(data[:,1])]

print("\nExpanded number of observations: " + str(mydata[2]))

print("\nAverage quarterly household income: " + str(1/mydata[2]*np.sum(mydata[0]*mydata[1])) )


INEGI's ENIGH 2012 survey data:

First 10 rows of data:
              0     1
0   5869.560059  1537
1  25679.339844  1537
2  21277.160156  1537
3   7092.379883  1537
4  54293.468750  1537
5  30091.289062  1537
6  32624.990234  1537
7  27880.429688  1537
8  26657.599609  1537
9   5282.600098  1215

Expanded number of observations: 38294419.0

Average quarterly household income: 16318.844812730109


In [41]:
## Perform Maximum Likelihood Estimation with constraints using BFGS in Scipy

# Constraints
constrained_mean = 2.44*16318.8448
top_threshold = 100000
constrained_density = 0.1
constrained_top_mean = 150000

# Initial guess
start_param_scipy = [1.0e4, 8e-1, -2.0e-1]

def objective_GeneralizedGamma(params, mydata):
    # Retrieve parameters
    mu = params[0]
    sigma = params[1]
    v = params[2]
    
    # Retrieve observations
    Y = mydata[0]
    Weights = mydata[1]
    n = mydata[2]
    
    
    theta = 1./(sigma**2*v**2)
    z = (Y/mu)**v
    # Compute negative log-likelihood
    ll = -n*( np.log(np.absolute(v)) + theta*np.log(theta) - np.log(sp.special.gamma(theta))) \
        -theta*np.sum(Weights*np.log(z))+ theta*np.sum(Weights*z) + np.sum(Weights*np.log(Y))
    
    return ll

def Mean_GeneralizedGamma(params):
    mu = params[0]
    sigma = params[1]
    v = params[2]
    
    theta = 1./(sigma**2*v**2)
    mean = mu * sp.special.gamma(theta + 1/v) / (theta**(1/v)*sp.special.gamma(theta))
    return mean

def constrained_mean_fun(params):
    cons_mean = Mean_GeneralizedGamma(params) - constrained_mean
    return cons_mean

def Right_CDF_GeneralizedGamma(params):
    mu = params[0]
    sigma = params[1]
    v = params[2]
    
    theta = 1./(sigma**2*v**2)
    right_cdf = sp.special.gammaincc(theta, theta*(top_threshold/mu)**v)
    return right_cdf

def constrained_CDF_fun(params):
    cons_CDF = Right_CDF_GeneralizedGamma(params) - constrained_density
    return cons_CDF

def Restricted_Mean_GeneralizedGamma(params):
    mu = params[0]
    sigma = params[1]
    v = params[2]
    
    theta = (1./(sigma**2*v**2))
    theta_new = (1./(sigma**2*v**2)) + 1./v
    right_cdf_new = sp.special.gammaincc(theta_new, theta*(top_threshold/mu)**v)
    
    res_mean = Mean_GeneralizedGamma(params)*right_cdf_new/Right_CDF_GeneralizedGamma(params)
    return res_mean

def constrained_top_mean_fun(params):
    cons_top_mean = Restricted_Mean_GeneralizedGamma(params) - constrained_top_mean
    return cons_top_mean


# Firstly, test unconstrained optimization
#print( objective_GeneralizedGamma(start_param_scipy, mydata) )
myoptions = {'disp' : True, 'maxiter' : 10000, 'gtol' : 1.0e-8, 'ftol' : 1.0e-8}
unc_results = sp_opt.minimize(objective_GeneralizedGamma, start_param_scipy, args = mydata, options = myoptions)
print("\nUnconstrained solution:")
print(unc_results.x)
print("\nUnconstrained mean of the fitted distribution:")
unc_fitted_mean = Mean_GeneralizedGamma(unc_results.x)
print(unc_fitted_mean)
print("\nDensity above threshold of " + str(top_threshold) +":")
print(Right_CDF_GeneralizedGamma(unc_results.x))
print("\nMean above threshold of " + str(top_threshold) +":")
print( Restricted_Mean_GeneralizedGamma(unc_results.x) )
print("\n")

# Then, perform optimization over the constrained mean
con_start_param_scipy = unc_results.x
myconstraints = ({'type': 'eq', 'fun' : constrained_mean_fun})
con_results = sp_opt.minimize(objective_GeneralizedGamma, con_start_param_scipy, args = mydata, constraints = myconstraints, options = myoptions)
print("\nConstrained solution:")
print(con_results.x)
print("\nConstrained mean of the fitted distribution:")
con_fitted_mean = Mean_GeneralizedGamma(con_results.x)
print(con_fitted_mean)
print("\nDensity above threshold of " + str(top_threshold) +":")
print(Right_CDF_GeneralizedGamma(con_results.x))
print("\nMean above threshold of " + str(top_threshold) +":")
print( Restricted_Mean_GeneralizedGamma(con_results.x) )
print("\n")

# Finally, perform optimization over the constrained mean and the constrained cdf
con_start_param_scipy = con_results.x
myconstraints = ({'type': 'eq', 'fun' : constrained_mean_fun}, {'type': 'eq', 'fun' : constrained_CDF_fun})
con_results = sp_opt.minimize(objective_GeneralizedGamma, con_start_param_scipy, args = mydata, constraints = myconstraints, options = myoptions)
print("\nConstrained solution:")
print(con_results.x)
print("\nConstrained mean of the fitted distribution:")
con_fitted_mean = Mean_GeneralizedGamma(con_results.x)
print(con_fitted_mean)
print("\nConstrained density above threshold of " + str(top_threshold) +":")
print(Right_CDF_GeneralizedGamma(con_results.x))
print("\nMean above threshold of " + str(top_threshold) +":")
print( Restricted_Mean_GeneralizedGamma(con_results.x) )
print("\n")

# Optionally, perform optimization over the constrained mean and the constrained top mean
con_start_param_scipy = con_results.x
myconstraints = ({'type': 'eq', 'fun' : constrained_mean_fun}, {'type': 'eq', 'fun' : constrained_top_mean_fun})
con_results = sp_opt.minimize(objective_GeneralizedGamma, con_start_param_scipy, args = mydata, constraints = myconstraints, options = myoptions)
print("\nConstrained solution:")
print(con_results.x)
print("\nConstrained mean of the fitted distribution:")
con_fitted_mean = Mean_GeneralizedGamma(con_results.x)
print(con_fitted_mean)
print("\nDensity above threshold of " + str(top_threshold) +":")
print(Right_CDF_GeneralizedGamma(con_results.x))
print("\nConstrained mean above threshold of " + str(top_threshold) +":")
print( Restricted_Mean_GeneralizedGamma(con_results.x) )
print("\n")




Optimization terminated successfully.
         Current function value: 409516796.268954
         Iterations: 14
         Function evaluations: 90
         Gradient evaluations: 18

Unconstrained solution:
[1.49556056e+04 1.08764101e+00 8.27228312e-01]

Unconstrained mean of the fitted distribution:
16291.163190574945

Density above threshold of 100000:
0.004520535067636733

Mean above threshold of 100000:
121631.11948955


Optimization terminated successfully.    (Exit mode 0)
            Current function value: 414890203.3670205
            Iterations: 56
            Function evaluations: 371
            Gradient evaluations: 55

Constrained solution:
[1.46738643e+04 1.61533463e+00 1.48164681e-01]

Constrained mean of the fitted distribution:
39817.98131199304

Density above threshold of 100000:
0.09258013987115232

Mean above threshold of 100000:
238931.27192139803


Optimization terminated successfully.    (Exit mode 0)
            Current function value: 415346939.8877118
         



Optimization terminated successfully.    (Exit mode 0)
            Current function value: 418700005.9608245
            Iterations: 22
            Function evaluations: 173
            Gradient evaluations: 22

Constrained solution:
[3.91134966e+04 1.15698062e+00 9.66879735e-01]

Constrained mean of the fitted distribution:
39817.981311999974

Density above threshold of 100000:
0.09611272482555759

Constrained mean above threshold of 100000:
150000.00000000163


