In [1]:
## Load required libraries
import numpy as np
import scipy as sp
import scipy.optimize as sp_opt
import pandas as pd

In [3]:
## Load data from INEGI's ENIGH survey
print("\nINEGI's ENIGH 2012 survey data:")

data = pd.read_excel("~/Desktop/enigh_income_data_labor.xls", header = None)

print("\nFirst 10 rows of data:")
print(data.head(10))
data = data.to_numpy()

# Observations, expansion factors, expanded number of observations
mydata = [data[:,0], data[:,1], np.sum(data[:,1])]

print("\nExpanded number of observations: " + str(mydata[2]))

print("\nAverage quarterly household income: " + str(1/mydata[2]*np.sum(mydata[0]*mydata[1])) )

# Normalize data
mydata[0] = mydata[0]/1000.
mydata[1] = mydata[1]/mydata[2]
mydata[2] = 1.



INEGI's ENIGH 2012 survey data:

First 10 rows of data:
              0     1
0   5869.560059  1537
1  25679.339844  1537
2  21277.160156  1537
3   7092.379883  1537
4  54293.468750  1537
5  30091.289062  1537
6  32624.990234  1537
7  27880.429688  1537
8  26657.599609  1537
9   5282.600098  1215

Expanded number of observations: 38294419.0

Average quarterly household income: 16318.844812730109


In [50]:
## Perform Maximum Likelihood Estimation with constraints using Scipy's optimizer

# Constraints
constrained_mean = 2.44*16318.8448/1000
top_threshold = 2.0e2
constrained_density = 1.0e-3
constrained_top_mean = 2.3e2

# Initial guess
start_param_scipy = [1.6e1, 4e0, 2e-1, 7e-1]

def objective_GeneralizedBeta(params, mydata):
    # Retrieve parameters
    mu = params[0]
    sigma = params[1]
    v = params[2]
    tau = params[3]
    
    # Retrieve observations
    Y = mydata[0]
    Weights = mydata[1]
    n = mydata[2]
    
    
    aux = 1. + (Y/mu)**sigma
    
    # Compute negative log-likelihood
    ll = -n*( np.log(np.absolute(sigma)) - sigma*v*np.log(mu) - np.log(sp.special.beta(v, tau)) ) \
        -(sigma*v-1)*np.sum(Weights*np.log(Y)) + (v+tau)*np.sum(Weights*np.log(aux))

    return ll

def Mean_GeneralizedBeta(params):
    mu = params[0]
    sigma = params[1]
    v = params[2]
    tau = params[3]
    
    mean = mu * sp.special.beta(v + 1./sigma, tau - 1./sigma) / sp.special.beta(v, tau)

    return mean

def constrained_mean_fun(params):
    cons_mean = Mean_GeneralizedBeta(params) - constrained_mean
    return cons_mean

def Right_CDF_GeneralizedBeta(params):
    mu = params[0]
    sigma = params[1]
    v = params[2]
    tau = params[3]
    
    c = 1./(1. + (top_threshold/mu)**(-1.*sigma))
    right_cdf = 1. - sp.special.betainc(v, tau, c)
    return right_cdf

def constrained_CDF_fun(params):
    cons_CDF = Right_CDF_GeneralizedBeta(params) - constrained_density
    return cons_CDF

def Restricted_Mean_GeneralizedBeta(params):
    mu = params[0]
    sigma = params[1]
    v = params[2]
    tau = params[3]
    
    c = 1./(1. + (top_threshold/mu)**(-1.*sigma))
    right_cdf_new = 1. - sp.special.betainc(v + 1./sigma, tau - 1./sigma, c)
    
    res_mean = Mean_GeneralizedBeta(params)*right_cdf_new/Right_CDF_GeneralizedBeta(params)
    return res_mean

def constrained_top_mean_fun(params):
    cons_top_mean = Restricted_Mean_GeneralizedBeta(params) - constrained_top_mean
    return cons_top_mean


# Firstly, test unconstrained optimization
myoptions = {'disp' : True, 'maxiter' : 10000, 'gtol' : 1.0e-8, 'ftol' : 1.0e-8}
unc_results = sp_opt.minimize(objective_GeneralizedBeta, start_param_scipy, args = mydata, options = myoptions, bounds = [(0, None), (0, None), (0, None), (0, None)])
print("\nUnconstrained solution:")
print(unc_results.x)
print("\nUnconstrained mean of the fitted distribution:")
unc_fitted_mean = Mean_GeneralizedBeta(unc_results.x)
print(unc_fitted_mean)
print("\nDensity above threshold of " + str(top_threshold) +":")
print(Right_CDF_GeneralizedBeta(unc_results.x))
print("\nMean above threshold of " + str(top_threshold) +":")
print( Restricted_Mean_GeneralizedBeta(unc_results.x) )
print("\n")

# Then, perform optimization over the constrained mean
con_start_param_scipy = start_param_scipy
myconstraints = ({'type': 'eq', 'fun' : constrained_mean_fun})
con_results = sp_opt.minimize(objective_GeneralizedBeta, con_start_param_scipy, args = mydata, constraints = myconstraints, options = myoptions, bounds = [(0, None), (0, None), (0, None), (0, None)])
print("\nConstrained solution:")
print(con_results.x)
print("\nConstrained mean of the fitted distribution:")
con_fitted_mean = Mean_GeneralizedBeta(con_results.x)
print(con_fitted_mean)
print("\nDensity above threshold of " + str(top_threshold) +":")
print(Right_CDF_GeneralizedBeta(con_results.x))
print("\nMean above threshold of " + str(top_threshold) +":")
print( Restricted_Mean_GeneralizedBeta(con_results.x) )
print("\n")

# Finally, perform optimization over the constrained mean and the constrained cdf
con_start_param_scipy = start_param_scipy
myconstraints = ({'type': 'eq', 'fun' : constrained_mean_fun}, {'type': 'eq', 'fun' : constrained_CDF_fun})
con_results = sp_opt.minimize(objective_GeneralizedBeta, con_start_param_scipy, args = mydata, constraints = myconstraints, options = myoptions, bounds = [(0, None), (0, None), (0, None), (0, None)])
print("\nConstrained solution:")
print(con_results.x)
print("\nConstrained mean of the fitted distribution:")
con_fitted_mean = Mean_GeneralizedBeta(con_results.x)
print(con_fitted_mean)
print("\nConstrained density above threshold of " + str(top_threshold) +":")
print(Right_CDF_GeneralizedBeta(con_results.x))
print("\nMean above threshold of " + str(top_threshold) +":")
print( Restricted_Mean_GeneralizedBeta(con_results.x) )
print("\n")

# Optionally, perform optimization over the constrained mean and the constrained top mean
con_start_param_scipy = start_param_scipy
myconstraints = ({'type': 'eq', 'fun' : constrained_mean_fun}, {'type': 'eq', 'fun' : constrained_top_mean_fun})
con_results = sp_opt.minimize(objective_GeneralizedBeta, con_start_param_scipy, args = mydata, constraints = myconstraints, options = myoptions, bounds = [(0, None), (0, None), (0, None), (0, None)])
print("\nConstrained solution:")
print(con_results.x)
print("\nConstrained mean of the fitted distribution:")
con_fitted_mean = Mean_GeneralizedBeta(con_results.x)
print(con_fitted_mean)
print("\nDensity above threshold of " + str(top_threshold) +":")
print(Right_CDF_GeneralizedBeta(con_results.x))
print("\nConstrained mean above threshold of " + str(top_threshold) +":")
print( Restricted_Mean_GeneralizedBeta(con_results.x) )
print("\n")



Unconstrained solution:
[16.72824432  8.90758171  0.09294565  0.20042276]

Unconstrained mean of the fitted distribution:
17.237677891398977

Density above threshold of 200.0:
0.0038728130211088674

Mean above threshold of 200.0:
454.6855276931572






Optimization terminated successfully.    (Exit mode 0)
            Current function value: 3.763803793124903
            Iterations: 5096
            Function evaluations: 41005
            Gradient evaluations: 5096

Constrained solution:
[1.40796915e+01 2.03904170e+02 4.03973300e-03 5.83642999e-03]

Constrained mean of the fitted distribution:
39.817981312027634

Density above threshold of 200.0:
0.0

Mean above threshold of 200.0:
nan






Optimization terminated successfully.    (Exit mode 0)
            Current function value: 4.128108917889572
            Iterations: 317
            Function evaluations: 2397
            Gradient evaluations: 317

Constrained solution:
[9.96350404e+02 2.25623528e+00 2.77729297e-01 1.71664343e+02]

Constrained mean of the fitted distribution:
39.81798131088899

Constrained density above threshold of 200.0:
0.001000000000110135

Mean above threshold of 200.0:
216.2900374158684






Optimization terminated successfully.    (Exit mode 0)
            Current function value: 4.080418653241588
            Iterations: 86
            Function evaluations: 602
            Gradient evaluations: 86

Constrained solution:
[2.46242484e+03 1.57360448e+00 4.07849944e-01 1.87819428e+02]

Constrained mean of the fitted distribution:
39.81798131200705

Density above threshold of 200.0:
0.005336270849704339

Constrained mean above threshold of 200.0:
229.9999999995461


