In [1]:
## Load required libraries
import numpy as np
import scipy as sp
import scipy.optimize as sp_opt
import pandas as pd

In [2]:
## Load data from INEGI's ENIGH survey
print("\nINEGI's ENIGH 2012 survey data:")

data = pd.read_excel("~/Desktop/enigh_income_data_households.xls", header = None)

print("\nFirst 10 rows of data:")
print(data.head(10))
data = data.to_numpy()

# Observations, expansion factors, expanded number of observations
mydata = [data[:,0], data[:,1], np.sum(data[:,1])]

print("\nExpanded number of observations: " + str(mydata[2]))

print("\nAverage quarterly household income: " + str(1/mydata[2]*np.sum(mydata[0]*mydata[1])) )




INEGI's ENIGH 2012 survey data:

First 10 rows of data:
               0     1
0   31548.900391  1537
1   34186.148438  1537
2  148123.890625  1537
3   45438.937500  1537
4   55309.453125  1537
5    7405.419922  1215
6    9164.080078  1215
7   23326.609375  1215
8   29059.210938  1215
9   45155.070312  1215

Expanded number of observations: 31559379.0

Average quarterly household income: 37999.64190447456


In [10]:
## Perform Maximum Likelihood Estimation with constraints using Scipy's optimizer

# Constraints
constrained_mean = 92733.62
top_threshold = 1.0e6
constrained_density = 1.0e-6
constrained_top_mean = 1.5e6

# Initial guess
start_param_scipy = [1.7e4, 3.2e0, 7.9e-1, 3.6e-1]

def objective_GeneralizedBeta(params, mydata):
    # Retrieve parameters
    mu = params[0]
    sigma = params[1]
    v = params[2]
    tau = params[3]
    
    # Retrieve observations
    Y = mydata[0]
    Weights = mydata[1]
    n = mydata[2]
    
    
    aux = 1. + (Y/mu)**sigma
    
    # Compute negative log-likelihood
    ll = -n*( np.log(np.absolute(sigma)) - sigma*v*np.log(mu) - np.log(sp.special.beta(v, tau)) ) \
        -(sigma*v-1)*np.sum(Weights*np.log(Y)) + (v+tau)*np.sum(Weights*np.log(aux))

    return ll

def Mean_GeneralizedBeta(params):
    mu = params[0]
    sigma = params[1]
    v = params[2]
    tau = params[3]
    
    mean = mu * sp.special.beta(v + 1./sigma, tau - 1./sigma) / sp.special.beta(v, tau)

    return mean

def constrained_mean_fun(params):
    cons_mean = Mean_GeneralizedBeta(params) - constrained_mean
    return cons_mean

def Right_CDF_GeneralizedBeta(params):
    mu = params[0]
    sigma = params[1]
    v = params[2]
    tau = params[3]
    
    c = 1./(1. + (top_threshold/mu)**(-1.*sigma))
    right_cdf = 1. - sp.special.betainc(v, tau, c)
    return right_cdf

def constrained_CDF_fun(params):
    cons_CDF = Right_CDF_GeneralizedBeta(params) - constrained_density
    return cons_CDF

def Restricted_Mean_GeneralizedBeta(params):
    mu = params[0]
    sigma = params[1]
    v = params[2]
    tau = params[3]
    
    c = 1./(1. + (top_threshold/mu)**(-1.*sigma))
    right_cdf_new = 1. - sp.special.betainc(v + 1./sigma, tau - 1./sigma, c)
    
    res_mean = Mean_GeneralizedBeta(params)*right_cdf_new/Right_CDF_GeneralizedBeta(params)
    return res_mean

def constrained_top_mean_fun(params):
    cons_top_mean = Restricted_Mean_GeneralizedBeta(params) - constrained_top_mean
    return cons_top_mean


# Firstly, test unconstrained optimization
myoptions = {'disp' : True, 'maxiter' : 2000}#, 'gtol' : 1.0e-8, 'ftol' : 1.0e-8}
unc_results = sp_opt.minimize(objective_GeneralizedBeta, start_param_scipy, args = mydata, options = myoptions)#, bounds = [(0, None), (0, None), (0, None), (0, None)])
print("\nUnconstrained solution:")
print(unc_results.x)
print("\nUnconstrained mean of the fitted distribution:")
unc_fitted_mean = Mean_GeneralizedBeta(unc_results.x)
print(unc_fitted_mean)
print("\nDensity above threshold of " + str(top_threshold) +":")
print(Right_CDF_GeneralizedBeta(unc_results.x))
print("\nMean above threshold of " + str(top_threshold) +":")
print( Restricted_Mean_GeneralizedBeta(unc_results.x) )
print("\n")

# Then, perform optimization over the constrained mean
con_start_param_scipy = unc_results.x
myconstraints = ({'type': 'eq', 'fun' : constrained_mean_fun})
con_results = sp_opt.minimize(objective_GeneralizedBeta, con_start_param_scipy, args = mydata, constraints = myconstraints, options = myoptions, bounds = [(0, None), (0, None), (0, None), (0, None)])
print("\nConstrained solution:")
print(con_results.x)
print("\nConstrained mean of the fitted distribution:")
con_fitted_mean = Mean_GeneralizedBeta(con_results.x)
print(con_fitted_mean)
print("\nDensity above threshold of " + str(top_threshold) +":")
print(Right_CDF_GeneralizedBeta(con_results.x))
print("\nMean above threshold of " + str(top_threshold) +":")
print( Restricted_Mean_GeneralizedBeta(con_results.x) )
print("\n")

# Finally, perform optimization over the constrained mean and the constrained cdf
con_start_param_scipy = con_results.x
myconstraints = ({'type': 'eq', 'fun' : constrained_mean_fun}, {'type': 'eq', 'fun' : constrained_CDF_fun})
con_results = sp_opt.minimize(objective_GeneralizedBeta, con_start_param_scipy, args = mydata, constraints = myconstraints, options = myoptions, bounds = [(0, None), (0, None), (0, None), (0, None)])
print("\nConstrained solution:")
print(con_results.x)
print("\nConstrained mean of the fitted distribution:")
con_fitted_mean = Mean_GeneralizedBeta(con_results.x)
print(con_fitted_mean)
print("\nConstrained density above threshold of " + str(top_threshold) +":")
print(Right_CDF_GeneralizedBeta(con_results.x))
print("\nMean above threshold of " + str(top_threshold) +":")
print( Restricted_Mean_GeneralizedBeta(con_results.x) )
print("\n")

# Optionally, perform optimization over the constrained mean and the constrained top mean
con_start_param_scipy = con_results.x
myconstraints = ({'type': 'eq', 'fun' : constrained_mean_fun}, {'type': 'eq', 'fun' : constrained_top_mean_fun})
con_results = sp_opt.minimize(objective_GeneralizedBeta, con_start_param_scipy, args = mydata, constraints = myconstraints, options = myoptions, bounds = [(0, None), (0, None), (0, None), (0, None)])
print("\nConstrained solution:")
print(con_results.x)
print("\nConstrained mean of the fitted distribution:")
con_fitted_mean = Mean_GeneralizedBeta(con_results.x)
print(con_fitted_mean)
print("\nDensity above threshold of " + str(top_threshold) +":")
print(Right_CDF_GeneralizedBeta(con_results.x))
print("\nConstrained mean above threshold of " + str(top_threshold) +":")
print( Restricted_Mean_GeneralizedBeta(con_results.x) )
print("\n")


         Current function value: 360323176.055492
         Iterations: 57
         Function evaluations: 749
         Gradient evaluations: 123

Unconstrained solution:
[2.33781817e+04 1.08284377e+00 2.98041605e+00 2.70774139e+00]

Unconstrained mean of the fitted distribution:
37809.10592665326

Density above threshold of 1000000.0:
0.00013222775941967857

Mean above threshold of 1000000.0:
1530870.5334565225


Optimization terminated successfully.    (Exit mode 0)




            Current function value: 364961550.94797206
            Iterations: 60
            Function evaluations: 436
            Gradient evaluations: 60

Constrained solution:
[2.33780260e+04 7.59931843e-01 3.44387876e+00 2.44206090e+00]

Constrained mean of the fitted distribution:
92733.62000024407

Density above threshold of 1000000.0:
0.007549874646123

Mean above threshold of 1000000.0:
2304845.56336687


Positive directional derivative for linesearch    (Exit mode 8)
            Current function value: 364961550.94797206
            Iterations: 5
            Function evaluations: 6
            Gradient evaluations: 1

Constrained solution:
[2.33780260e+04 7.59931843e-01 3.44387876e+00 2.44206090e+00]

Constrained mean of the fitted distribution:
92733.62000024407

Constrained density above threshold of 1000000.0:
0.007549874646123

Mean above threshold of 1000000.0:
2304845.56336687






Optimization terminated successfully.    (Exit mode 0)
            Current function value: 367845418.0218754
            Iterations: 251
            Function evaluations: 1912
            Gradient evaluations: 251

Constrained solution:
[2.38816321e+04 1.11966340e-01 1.36323191e+02 1.25491756e+02]

Constrained mean of the fitted distribution:
92733.61999994935

Density above threshold of 1000000.0:
0.003528632814484789

Constrained mean above threshold of 1000000.0:
1500000.00000021


