In [1]:
# Load required libraries
import tensorflow as tf
import tensorflow_probability as tfp
import numpy as np
import pandas as pd
import time

#tf.debugging.set_log_device_placement(True);

In [30]:
# Load data
data = pd.read_excel("Desktop/datos_enigh.xls", header = None)
print("\nFirst 10 rows of data:")
print(data.head(10))
data = data.to_numpy()
# Transform to monthly income in thousands
data = data / np.array([3000,1])

# Cast into Tensorflow objects
Y = tf.constant(data[:,0], dtype = tf.float32)
Weights = tf.constant(data[:,1], dtype = tf.float32)
expanded_n = tf.math.reduce_sum(Weights)

# Print tensors
print("\nFirst 10 elements of Y tensor:")
tf.print(Y[0:9])
print("\nFirst 10 elements of Weights tensor:")
tf.print(Weights[0:9])
print("\nExpanded number of observations in data:")
tf.print(expanded_n)


First 10 rows of data:
              0    1
0  14609.870117  915
1  22263.830078  915
2   2393.419922  915
3  18200.000000  915
4   5983.560059  915
5  12465.750000  915
6  13213.700195  968
7  12553.009766  968
8     24.930000  968
9  41884.921875  968

First 10 elements of Y tensor:
[4.86995649 7.42127657 0.797806621 ... 4.40456676 4.18433666 0.00831]

First 10 elements of Weights tensor:
[915 915 915 ... 968 968 968]

Expanded number of observations in data:
73871328


In [33]:
# Test param_vec
test_param = tf.constant([3, 1., 0.7], dtype = tf.float32)

# Define target negative log-likelihood function without constraints
@tf.function
def target_GeneralizedGamma(param_vec, Y = Y, Weights = Weights, n = expanded_n):
    # Retrieve distribution parameters
    a, d, p = tf.split(param_vec, 3, axis = 0)
    
    # Normalize weights
    #weights = weights / n
    #tf.print(weights[0:9])
    
    # Compute negative log-likelihood
    ll = -(n*tf.math.log((p/a**d)/tf.math.exp(tf.math.lgamma(d/p))) + (d-1)*tf.math.reduce_sum(Weights*tf.math.log(Y)) - tf.math.reduce_sum(Weights*(Y/a)**p))
    
    return tf.squeeze(ll)

# Return target function and its gradient
@tf.function
def target_grad_GeneralizedGamma(param_vec):
    return tfp.math.value_and_gradient(target_GeneralizedGamma, param_vec)

print( target_grad_GeneralizedGamma(test_param) )

(<tf.Tensor: shape=(), dtype=float32, numpy=198604540.0>, <tf.Tensor: shape=(3,), dtype=float32, numpy=array([  1282176. ,  -2394799.8, -21291136. ], dtype=float32)>)


In [62]:
# Minimize negative log-likelihood via BFGS
start_param = test_param

optim_results = tfp.optimizer.bfgs_minimize(target_grad_GeneralizedGamma, start_param, tolerance = 1e-10)
print("\nOutput of optimizer:")
tf.print(optim_results)
est_params = optim_results.position.numpy()

a_fitted = tf.constant(est_params[0], dtype = tf.float64)
d_fitted = tf.constant(est_params[1], dtype = tf.float64)
p_fitted = tf.constant(est_params[2], dtype = tf.float64)

print("\nEstimated parameters:")
print(est_params)

# Compute fitted mean
mu_fitted = a_fitted * tf.math.exp(tf.math.lgamma((d_fitted+1)/p_fitted)) / tf.math.exp(tf.math.lgamma(d_fitted/p_fitted))
print("\nMean of fitted distribution:")
tf.print(mu_fitted)


Output of optimizer:
BfgsOptimizerResults(converged=1, failed=0, num_iterations=9, num_objective_evaluations=69, position=[2.88551736 1.144014 0.772850096], objective_value=197576048, objective_gradient=[-37150 -133019.375 289104], inverse_hessian_estimate=[[0.988262296 -0.0351199284 0.10210707]
 [-0.0351199284 0.899715602 0.312423259]
 [0.10210707 0.312423259 0.121726513]])

Estimated parameters:
[2.8855174 1.144014  0.7728501]

Mean of fitted distribution:
5.3449636313359


In [331]:
# Minimize negative log-likelihood via gradient descent
@tf.function
def SGDfunction(a, d, p, lambda1, Y = Y, Weights = Weights, n = expanded_n):
    target = -(n*tf.math.log((p/a**d)/tf.math.exp(tf.math.lgamma(d/p))) + (d-1)*tf.math.reduce_sum(Weights*tf.math.log(Y)) - tf.math.reduce_sum(Weights*(Y/a)**p)) \
        + lambda1 * ((a * tf.math.exp(tf.math.lgamma((d+1)/p)) / tf.math.exp(tf.math.lgamma(d/p))) - 1)
    
    return tf.squeeze(target)

@tf.function
def SGDfunction_minimize(Y = Y, Weights = Weights, n = expanded_n):
    target = -(n*tf.math.log((p/a**d)/tf.math.exp(tf.math.lgamma(d/p))) + (d-1)*tf.math.reduce_sum(Weights*tf.math.log(Y)) - tf.math.reduce_sum(Weights*(Y/a)**p)) \
        + lambda1 * ((a * tf.math.exp(tf.math.lgamma((d+1)/p)) / tf.math.exp(tf.math.lgamma(d/p))) - 1)
    
    return tf.squeeze(target)

# Generate tensors with initial values
def SGD_initial_values():
    a = tf.Variable(0.60) 
    d = tf.Variable(0.92)
    p = tf.Variable(1.01) 
    lambda1 = tf.Variable(1e5*1.0) 
    return a, d, p, lambda1

a, d, p, lambda1 = SGD_initial_values()

opt = tf.keras.optimizers.SGD(learning_rate = 3e-9)
tic = time.time()
for i in range(10001):
    if i%10000 == 0:
        print("\nGradient at iteration " + str(i))
        with tf.GradientTape() as tape:
            function = SGDfunction(a, d, p, lambda1)
        tf.print(tape.gradient(function, [a, d, p, lambda1]))
        print("Parameter values at iteration " + str(i))
        tf.print(a)
        tf.print(d)
        tf.print(p)
        tf.print(lambda1)
    opt.minimize(SGDfunction_minimize, var_list = [a, d, p, lambda1])
print("\nGradient Descent took "+str(time.time()-tic)+" seconds")

# Compute fitted mean
mu_fitted_2 = a * tf.math.exp(tf.math.lgamma((d+1)/p)) / tf.math.exp(tf.math.lgamma(d/p))
print("\nMean of fitted distribution:")
tf.print(mu_fitted_2)


Gradient at iteration 0
[1.62176e+06, -1401715.5, 1282472, -0.455411255]
Parameter values at iteration 0
0.6
0.92
1.01
100000

Gradient at iteration 10000
[575184, -431868.594, 435816, -0.463336]
Parameter values at iteration 10000
0.389582664
1.05692768
0.852889895
100000

Gradient Descent took 22.085002422332764 seconds

Mean of fitted distribution:
0.536570907


In [92]:
## MLE with constraints
start_param_constrained = tf.constant([1.5, .5], dtype = tf.float32)

constrained_mean = 10.1
top_threshold = 200.


# Define target negative log-likelihood function with constraints
@tf.function
def target_GeneralizedGamma_Constrained(param_vec_constrained, Y = Y, Weights = Weights, n = expanded_n):
    # Retrieve distribution parameters
    d, p = tf.split(param_vec_constrained, 2, axis = 0)
    
    a = constrained_mean / (tf.math.exp(tf.math.lgamma((d+1)/p)) / tf.math.exp(tf.math.lgamma(d/p)))
    
    # Compute negative log-likelihood
    ll = -(n*tf.math.log((p/a**d)/tf.math.exp(tf.math.lgamma(d/p))) + (d-1)*tf.math.reduce_sum(Weights*tf.math.log(Y)) - tf.math.reduce_sum(Weights*(Y/a)**p))
    
    return tf.squeeze(ll)

# Return target function and its gradient
@tf.function
def target_grad_GeneralizedGamma_Constrained(param_vec_constrained):
    return tfp.math.value_and_gradient(target_GeneralizedGamma_Constrained, param_vec_constrained)

# Minimize negative log-likelihood via BFGS
optim_results_constrained = tfp.optimizer.bfgs_minimize(target_grad_GeneralizedGamma_Constrained, start_param_constrained, tolerance = 1e-10)
print("\nOutput of optimizer:")
tf.print(optim_results_constrained)
est_params_constrained = optim_results_constrained.position.numpy()

d_fitted_constrained = tf.constant(est_params_constrained[0], dtype = tf.float32)
p_fitted_constrained = tf.constant(est_params_constrained[1], dtype = tf.float32)
a_fitted_constrained = constrained_mean / (tf.math.exp(tf.math.lgamma((d_fitted_constrained+1)/p_fitted_constrained)) / tf.math.exp(tf.math.lgamma(d_fitted_constrained/p_fitted_constrained)))

print("\nEstimated parameters:")
tf.print(a_fitted_constrained)
tf.print(d_fitted_constrained)
tf.print(p_fitted_constrained)

print("\nConstraint is satisfied. Mean equals:")
mu_fitted_constrained = a_fitted_constrained * tf.math.exp(tf.math.lgamma((d_fitted_constrained+1)/p_fitted_constrained)) / tf.math.exp(tf.math.lgamma(d_fitted_constrained/p_fitted_constrained))
tf.print(mu_fitted_constrained)

# Verify second constraint
@tf.function
def CDF_GeneralizedGamma(x, a, p, d):
    # Using the inverse gamma distribution this way yields the same as the CDF for the generalized gamma distribution
    return tf.math.igammac( (d/p), ((x/a)**p) )

print("\nMass above threshold of " + str(top_threshold) +":")
tf.print( CDF_GeneralizedGamma(top_threshold, a_fitted_constrained, p_fitted_constrained, d_fitted_constrained) )


Output of optimizer:
BfgsOptimizerResults(converged=1, failed=0, num_iterations=11, num_objective_evaluations=66, position=[1.75941372 0.285252124], objective_value=204857120, objective_gradient=[104 1152], inverse_hessian_estimate=[[2.23848156e-06 -4.07764787e-07]
 [-4.07764787e-07 7.66302435e-08]])

Estimated parameters:
0.00924701244
1.75941372
0.285252124

Constraint is satisfied. Mean equals:
10.1

Mass above threshold of 200.0:
0.000691972731
