In [1]:
## Load required libraries
import numpy as np
import scipy as sp
import scipy.optimize as sp_opt
import pandas as pd

In [4]:
## Load data from INEGI's ENIGH survey
print("\nINEGI's ENIGH 2012 survey data:")

data = pd.read_excel("~/Desktop/enigh_income_data_households.xls", header = None)

print("\nFirst 10 rows of data:")
print(data.head(10))
data = data.to_numpy()

# Observations, expansion factors, expanded number of observations
mydata = [data[:,0], data[:,1], np.sum(data[:,1])]

print("\nExpanded number of observations: " + str(mydata[2]))

print("\nAverage quarterly household income: " + str(1/mydata[2]*np.sum(mydata[0]*mydata[1])) )



INEGI's ENIGH 2012 survey data:

First 10 rows of data:
               0     1
0   31548.900391  1537
1   34186.148438  1537
2  148123.890625  1537
3   45438.937500  1537
4   55309.453125  1537
5    7405.419922  1215
6    9164.080078  1215
7   23326.609375  1215
8   29059.210938  1215
9   45155.070312  1215

Expanded number of observations: 31559379.0

Average quarterly household income: 37999.64190447456


In [7]:
## Perform Maximum Likelihood Estimation with constraints using BFGS in Scipy

# Constraints
constrained_mean = 92733.62
top_threshold = 1.0e9
constrained_density = 0.000001

# Initial guess
start_param_scipy = [2.4e4, 8e-1, -2.0e-1]

def objective_GeneralizedGamma(params, mydata):
    # Retrieve parameters
    mu = params[0]
    sigma = params[1]
    v = params[2]
    
    # Retrieve observations
    Y = mydata[0]
    Weights = mydata[1]
    n = mydata[2]
    
    
    theta = 1./(sigma**2*v**2)
    z = (Y/mu)**v
    # Compute negative log-likelihood
    ll = -n*( np.log(np.absolute(v)) + theta*np.log(theta) - np.log(sp.special.gamma(theta))) \
        -theta*np.sum(Weights*np.log(z))+ theta*np.sum(Weights*z) + np.sum(Weights*np.log(Y))
    
    return ll

def Mean_GeneralizedGamma(params):
    mu = params[0]
    sigma = params[1]
    v = params[2]
    
    theta = 1./(sigma**2*v**2)
    mean = mu * sp.special.gamma(theta + 1/v) / (theta**(1/v)*sp.special.gamma(theta))
    return mean

def constrained_mean_fun(params):
    cons_mean = Mean_GeneralizedGamma(params) - constrained_mean
    return cons_mean

def Right_CDF_GeneralizedGamma(params):
    mu = params[0]
    sigma = params[1]
    v = params[2]
    
    theta = 1./(sigma**2*v**2)
    right_cdf = 1-sp.special.gammaincc(theta, theta*(top_threshold/mu)**v)
    return right_cdf

def constrained_CDF_fun(params):
    cons_CDF = Right_CDF_GeneralizedGamma(params) - constrained_density
    return cons_CDF

# Firstly, test unconstrained optimization
#print( objective_GeneralizedGamma(start_param_scipy, mydata) )
myoptions = {'disp' : True, 'maxiter' : 2000}
unc_results = sp_opt.minimize(objective_GeneralizedGamma, start_param_scipy, args = mydata, options = myoptions, tol = 1e-12)
print("\nUnconstrained solution:")
print(unc_results.x)
print("\nUnconstrained mean of the fitted distribution:")
unc_fitted_mean = Mean_GeneralizedGamma(unc_results.x)
print(unc_fitted_mean)
print("\n")

# Then, perform optimization over the constrained mean
con_start_param_scipy = unc_results.x
myconstraints = ({'type': 'eq', 'fun' : constrained_mean_fun})
con_results = sp_opt.minimize(objective_GeneralizedGamma, con_start_param_scipy, args = mydata, constraints = myconstraints, options = myoptions)
print("\nConstrained solution:")
print(con_results.x)
print("\nConstrained mean of the fitted distribution:")
con_fitted_mean = Mean_GeneralizedGamma(con_results.x)
print(con_fitted_mean)
print("\nDensity above threshold of " + str(top_threshold) +":")
print(Right_CDF_GeneralizedGamma(con_results.x))
print("\n")

# Finally, perform optimization over the constrained mean and the constrained cdf
con_start_param_scipy = con_results.x
myconstraints = ({'type': 'eq', 'fun' : constrained_mean_fun}, {'type': 'eq', 'fun' : constrained_CDF_fun})
con_results = sp_opt.minimize(objective_GeneralizedGamma, con_start_param_scipy, args = mydata, constraints = myconstraints, options = myoptions)
print("\nConstrained solution:")
print(con_results.x)
print("\nConstrained mean of the fitted distribution:")
con_fitted_mean = Mean_GeneralizedGamma(con_results.x)
print(con_fitted_mean)
print("\nConstrained density above threshold of " + str(top_threshold) +":")
print(Right_CDF_GeneralizedGamma(con_results.x))
print("\n")


         Current function value: 360372513.509367
         Iterations: 13
         Function evaluations: 437
         Gradient evaluations: 85

Unconstrained solution:
[ 2.45221380e+04  8.44129639e-01 -1.43244160e-01]

Unconstrained mean of the fitted distribution:
37434.935872292604


Optimization terminated successfully.    (Exit mode 0)
            Current function value: 362352689.9943639
            Iterations: 35
            Function evaluations: 235
            Gradient evaluations: 35

Constrained solution:
[ 1.97970955e+04  9.39951323e-01 -8.74216674e-01]

Constrained mean of the fitted distribution:
92733.62000000273

Density above threshold of 1000000000.0:
1.1096372575236657e-06




  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)


Optimization terminated successfully.    (Exit mode 0)
            Current function value: 362361924.9733937
            Iterations: 32
            Function evaluations: 212
            Gradient evaluations: 32

Constrained solution:
[ 2.03412971e+04  9.50347352e-01 -8.43623293e-01]

Constrained mean of the fitted distribution:
92733.61999999873

Constrained density above threshold of 1000000000.0:
1.000000000139778e-06


