In [50]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import scipy.stats as sts
from scipy.optimize import minimize
import pdb
from matplotlib import rc
import statsmodels.api as sm
rc('text', usetex=True)

  from pandas.core import datetools


# GMM framework

In [2]:
class GMM:
    def __init__(self):
        pass
    
    def model_moment(self, param):
        pass
    
    def error_func(self, param, W = None):
        if W is None:
            W = np.eye(len(self.data_moment))
        e = (self.model_moment(param) - self.data_moment)/self.data_moment
        return e.T @ W @ e
    
    def vcv(self, param):
        e = (self.model_moment(param) - self.data_moment)/self.data_moment
        return e @ e.T / self.N
    
    def estimate(self, init, method = "identity", bounds = None):
        results = minimize(self.error_func, init, method='L-BFGS-B', bounds = bounds)
        if method == "identity":
            return results
        if method == "two-step":
            e = (self.model_moment(results.x) - self.data_moment)/self.data_moment
            vcv = e @ e.T / self.N
            W = np.linalg.inv(vcv)
            W = W/np.sum(W) # rescale the W in case of overflow
            results = minimize(self.error_func, results.x, args = W, method='L-BFGS-B',  bounds = bounds)
            return results
        if method == "iterate":
            W = np.eye(len(init))
            while True:
                e = (self.model_moment(results.x) - self.data_moment)/self.data_moment
                vcv = e @ e.T / self.N
                new_W = np.linalg.inv(vcv)
#                 new_W = new_W/np.sum(new_W) # rescale the W in case of overflow    
                if np.sum(np.abs(new_W - W))>1e-7:
                    W = new_W
                    results = minimize(self.error_func, results.x, args = (W), method='L-BFGS-B',  bounds = bounds)
                else:
                    return results    

## Instances of GMM

In [10]:
class Log_Normal(GMM):
    def __init__(self, data):
        self.N = len(data)
        self.data = data
        self.data_moment = np.array((np.mean(self.data), np.std(self.data))).reshape((2,1))
        
    def model_moment(self, param):
#       Given the formula of model moment is easily calculated analytically,
#       I use the expression directly here instead of numerical integral to avoid inaccuracy and addtional offhead.
        param = np.array(param).reshape(2, 1)
        mean = np.exp(np.array((1, 0.5)).reshape((1,2)) @ param)
        std = np.sqrt(np.exp(np.array((2, 1)).reshape((1,2)) @ param) * (np.exp(param[1,:]) - 1))
        return np.concatenate((mean, std), axis = 0)
    
    def pdf(self, x, param):
        mu, sigma2 = param
        sigma = np.sqrt(sigma2)
        return 1/x/sigma/np.sqrt(2*np.pi) * np.exp(-(np.log(x)-mu)**2/2/sigma/sigma)
    
class Linear_Regression(GMM):
    
    def error_func(self, param, W = None):
        if W is None:
            W = np.eye(len(self.data_moment))
        e = (self.model_moment(param) - self.data_moment)
        return e.T @ W @ e
    
    def __init__(self, data):
        self.data = data.values
        self.N = self.data.shape[0]
        self.data_moment = data.iloc[:,0].values
    
    def model_moment(self, param):
        param = np.array(param)
        X = np.concatenate((np.ones((self.N,1)),self.data[:,1:]), axis = 1)
        return X @ param

In [11]:
dt = np.loadtxt("incomes.txt")
sick = pd.read_csv("sick.txt")

In [12]:
ln = Log_Normal(dt)
results = ln.estimate((11, 0.2), bounds = ((None, None), (1e-10, None)))

In [13]:
lr = Linear_Regression(sick)
res = lr.estimate((1,1,1,1))

# MLE Framework

In [47]:
class Random_Variable:
    def log_lik(self, dt, param = None):
        return np.sum(np.log(self.pdf(dt, param)))
    
    def param_array(self):
        pass
    
    def pdf(self, x, param = None):
        pass
    
    def MLE(self, dt, bounds = None):
        def crit(param):
            return -self.log_lik(dt, param)
        return minimize(crit, self.param_array(),  method='L-BFGS-B', bounds = bounds)
    
    
    def cdf_integral(self, x):
        return integrate.quad(lambda x: self.pdf(x, param), 0, x)
        

class Log_Normal(Random_Variable):
    def __init__(self, mu, sigma):
        self.mu  = mu
        self.sigma = sigma
    
    def param_array(self):
        return np.array([self.mu, self.sigma])
    
    def pdf(self, x, param = None):
        if param is None:
            sigma = self.sigma
            mu = self.mu
            sigma2 = sigma ** 2
        else:
            mu, sigma = param
            sigma2 = sigma ** 2
        return 1/x/sigma/np.sqrt(2*np.pi) * np.exp(-(np.log(x)-mu)**2/2/sigma2)
    
    def cdf(self, x, param = None):
        if param is None:
            sigma = self.sigma
            mu = self.mu
        else:
            mu, sigma = param
        return sts.norm.cdf((np.log(x) - mu)/sigma)
    
class Linear_Regression:
    def __init__(self, y, X, constant = True):
        self.y = y
        self.X = X
        self.N = len(y)
        if constant is True:
            self.X = np.concatenate((np.ones((self.N, 1)),self.X), axis = 1)
        self.m = self.X.shape[1]
    
    def log_lik(self, param):
        beta = param[0:self.m]
        sigma2 = param[self.m] *  param[self.m]
        return -self.N/2*np.log(2*np.pi*sigma2) - 1/2/sigma2*(self.y - self.X @ beta).T @ (self.y - self.X @ beta)

    def MLE(self):
        return minimize(lambda x: -self.log_lik(x), [0,1,1,1,1], method='BFGS')
    
    def analy_sol(self):
        X = self.X
        y = self.y
        return np.linalg.inv(X.T@X) @ X.T @ y

In [48]:
lr = Linear_Regression(sick.loc[:, "sick"].values, sick.iloc[:, 1:].values)
mle_res = lr.MLE()
Hinv = mle_res.hess_inv
OffDiagNeg = -np.ones(Hinv.shape) + np.eye(Hinv.shape[0])*2
VCV = Hinv * OffDiagNeg

In [49]:
lognormal = Log_Normal(11, 0.3)
lognormal.MLE(dt, bounds = ((10, 12), (1e-2, 1)))

      fun: 2239.5347439980069
 hess_inv: <2x2 LbfgsInvHessProduct with dtype=float64>
      jac: array([ -4.54747351e-05,  -4.54747351e-05])
  message: b'CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH'
     nfev: 48
      nit: 7
   status: 0
  success: True
        x: array([ 11.33144032,   0.21167458])

In [51]:
ln = sts.lognorm

In [53]:
ln.fit(dt)

(8.6927662804320995, 47628.560636118294, 0.19762737066002253)