In [None]:
import numpy as np
import scipy.stats as spst
from scipy.optimize import leastsq
import pandas as pd
from scipy import optimize

In [None]:
mizon57= pd.read_csv("Mizon57.csv")

In [None]:

mizon57["L"]= (mizon57["LF"]-mizon57["U"])*mizon57["H"]/100
mizon57["L100"]= (mizon57["LF"]-mizon57["U"])*mizon57["H"]

In [None]:
mizon57

In [None]:
def error_ces(params, X_data,Y_data):
    """only error output for least squared
    Q = \gamma[\delta K^{-\rho} +(1-\delta) L^{-\rho}  ]^{-\frac{\nu}{\rho}
    error = Q_est - Q
    """
    Q = Y_data[:,0]
    K = X_data[:,0]
    L = X_data[:,1]    
    gamma = params[0]
    delta = params[1]
    rho = params[2]
    nu = params[3]
    Q_est =  gamma*(delta* K**(-rho) + (1-delta)* L**(-rho))**(-nu/rho)
    return  np.array(Q - Q_est)
    


In [None]:
## http://www.scipy-lectures.org/intro/summary-exercises/optimize-fit.html

p0 = np.array([0.1, 0.1, -0.1, 1.0])

estimates = leastsq(error_ces, p0, 
                    args = ( np.array(mizon57.ix[:,["K","L"]]), 
                            np.array(mizon57.ix[:,["Q"]]) ), full_output=1)

estimates


In [None]:
p0 = np.array([0.1, 0.1, -0.1, 1.0])

## alternative using minimization routine.
def sse_ces(params, X_data,Y_data):
    """Sum of squared error
    Q = \gamma[\delta K^{-\rho} +(1-\delta) L^{-\rho}  ]^{-\frac{\nu}{\rho}
    error = Q_est - Q
    sse = sum(error**2)
    """
    return np.sum(error_ces(params, X_data,Y_data)**2)

res = optimize.minimize(sse_ces, p0, method='L-BFGS-B', 
                        args=( np.array(mizon57.ix[:,["K","L"]]), 
                              np.array(mizon57.ix[:,["Q"]]) ))
print( res.x)
print( res.nfev)

In [None]:
## curve fit NOT working
# p0 = np.array([0.1, 0.1, -0.1, 1.0])

# def ces(X_data, params):
#     """ces
#     Q = \gamma[\delta K^{-\rho} +(1-\delta) L^{-\rho}  ]^{-\frac{\nu}{\rho}
#     error = Q_est - Q
#     """    
#     K = X_data[:,0]
#     L = X_data[:,1]    
#     gamma = params[0]
#     delta = params[1]
#     rho = params[2]
#     nu = params[3]        
#     return gamma*(delta* K**(-rho) + (1-delta)* L**(-rho))**(-nu/rho)

# popt, pcov = optimize.curve_fit(ces,
#                                 np.array(mizon57.ix[:,["K","L"]]),
#                                 np.array(mizon57.ix[:,["Q"]]),
#                                 p0=p0
#                                )
# print(popt)

In [None]:

func = sse_ces
name = "CES production function" 
                 
#p0= {"gamma": 0.04746368, "delta":0.43092672,"rho": -0.34408368, "nu": 1.020762290}
#p0= {"gamma": 0.18, "delta":0.1,"rho": -0.1, "nu": 1.01}



xdata= np.array(mizon57.ix[:,["K","L100"]])
ydata= np.array(mizon57.ix[:,["Q"]])
inits = list(p0.values()) 

#https://stackoverflow.com/questions/9219094/order-of-values-when-extracting-python-dict-to-list

In [None]:
params = ["gamma", "delta","rho", "nu"]
#p0 =np.array([0.04746368, 0.43092672,-0.34408368,  1.020762290])
p0 =np.array([0.1, 0.1,-0.1,  1.1])

In [None]:
# Run the model
mod1 = leastsq(sse_ces, p0, args = (xdata, ydata), full_output=1)


In [None]:
mod1

## Generate output report like R  

In [None]:
# Get the parameters
parmEsts = np.round( mod1[0], 4 )

# Get the Error variance and standard deviation
RSS = np.sum( mod1[2]['fvec']**2 )
df = nobs - nparm
MSE = RSS / df
RMSE = np.sqrt( MSE )

# Get the covariance matrix
cov = MSE * mod1[1]

# Get parameter standard errors
parmSE = np.diag( np.sqrt( cov ) )

# Calculate the t-values
tvals = parmEsts/parmSE

# Get p-values
pvals = (1 - spst.t.cdf( np.abs(tvals), df))*2

# Get biased variance (MLE) and calculate log-likehood
s2b = RSS / nobs
logLik = -nobs/2 * np.log(2*np.pi) - nobs/2 * np.log(s2b) - 1/(2*s2b) * RSS

#del(mod1)
#del(s2b)
#del(inits)

# Get AIC. Add 1 to the df to account for estimation of standard error
def AIC(self, k=2):
    return -2*logLik + k*(nparm + 1)



In [None]:
# Print the summary
def summary():
    print()
    print( 'Non-linear least squares')
    print( 'Model: ' + name)
    print( 'Parameters:')
    print( " Estimate Std. Error t-value P(>|t|)")
    for i in range( len(parmNames) ):
            print( "% -5s % 5.4f % 5.4f % 5.4f % 5.4f" % tuple( [parmNames[i], 
                                                                parmEsts[i], parmSE[i], tvals[i], pvals[i]] ))
    print()
    print( 'Residual Standard Error: % 5.4f' % RMSE)
    print( 'Df: %i' % df)
summary()

## Very sensitive for initial value

In [None]:
# modified to fit python3.5
# # #  https://www.r-bloggers.com/r-vs-python-practical-data-analysis-nonlinear-regression/


class NLS:
    ''' This provides a wrapper for scipy.optimize.leastsq to get the relevant output for nonlinear least squares.
    Although scipy provides curve_fit for that reason, curve_fit only returns parameter estimates and covariances. 
    This wrapper returns numerous statistics and diagnostics'''
 
    import numpy as np
    import scipy.stats as spst
    from scipy.optimize import leastsq
 
    def __init__(self,func,name, params ,p0, xdata, ydata):
        """
        func: error function y^ - y
        name: model name
        params: names of parameters, list
        p0: starting value for params(the same order), np.array
        xdata: np.array 
        ydata: np.array
        
        """
        # Check the data     
        
        if len(xdata) != len(ydata):
            msg = 'The number of observations does not match the number of rows for the predictors'
            raise ValueError(msg)
 

            
        self.name = name 
        self.func = func

        self.xdata = xdata
        self.ydata = ydata
        
        self.nobs = len( ydata )

        # Check parameter estimates
#         if type(p0) != dict:
#             msg = "Initial parameter estimates (p0) must be a dictionry of form p0={'a':1, 'b':2, etc}"
#             raise ValueError(msg)
        
        self.inits = p0 
        self.nparm= len( self.inits )
        self.parmNames = params
        
        # clean parameters' names 
        for i in range( len(self.parmNames) ):
            if len(self.parmNames[i]) > 5:
                self.parmNames[i] = self.parmNames[i][0:4]
 
        # Run the model
        self.mod1 = leastsq(self.func, np.array(self.inits), args = (self.xdata, self.ydata), full_output=1)
 
        # Get the parameters
        self.parmEsts = np.round( self.mod1[0], 4 )
 
        # Get the Error variance and standard deviation
        self.RSS = np.sum( self.mod1[2]['fvec']**2 )
        self.df = self.nobs - self.nparm
        self.MSE = self.RSS / self.df
        self.RMSE = np.sqrt( self.MSE )
 
        # Get the covariance matrix
        self.cov = self.MSE * self.mod1[1]
 
        # Get parameter standard errors
        self.parmSE = np.diag( np.sqrt( self.cov ) )
 
        # Calculate the t-values
        self.tvals = self.parmEsts/self.parmSE
 
        # Get p-values
        self.pvals = (1 - spst.t.cdf( np.abs(self.tvals), self.df))*2
 
        # Get biased variance (MLE) and calculate log-likehood
        self.s2b = self.RSS / self.nobs
        self.logLik = -self.nobs/2 * np.log(2*np.pi) - self.nobs/2 * np.log(self.s2b) - 1/(2*self.s2b) * self.RSS
 
        #del(self.mod1)
        #del(self.s2b)
        #del(self.inits)
 
    # Get AIC. Add 1 to the df to account for estimation of standard error
    def AIC(self, k=2):
        return -2*self.logLik + k*(self.nparm + 1)
 
    del(np)
    del(leastsq)
 
    # Print the summary
    def summary(self):
        print()
        print( 'Non-linear least squares')
        print( 'Model: ' + self.name)
        print( 'Parameters:')
        print( " Estimate Std. Error t-value P(>|t|)")
        for i in range( len(self.parmNames) ):
                print( " % -5s % 5.4f % 5.4f % 5.4f % 5.4f" % tuple( [self.parmNames[i], 
                                                                    self.parmEsts[i], self.parmSE[i], self.tvals[i], self.pvals[i]] ))
        print()
        print( 'Residual Standard Error: % 5.4f' % self.RMSE)
        print( 'Df: %i' % self.df)


In [None]:
params = ["gamma", "delta","rho", "nu"]
#p0 =np.array([0.04746368, 0.43092672,-0.34408368,  1.020762290])
p0 =np.array([0.1, 0.1,-0.1,  1.1])

nls_mizon57 = NLS(func = sse_ces,name = "CES production function", params = params,
                  p0=p0, xdata= np.array(mizon57.ix[:,["K","L100"]]), ydata= np.array(mizon57.ix[:,["Q"]]))

In [None]:
nls_mizon57.summary()

It turns out there are many ways to do the NLS in Python since there are couples of way to do the minimization/optimization in scipy

ref:
### scipy.optimize.least_squares

Solve a nonlinear least-squares problem with bounds on the variables.

Given the residuals $f(x)$ (an m-dimensional real function of n real variables) and the loss function $rho(s)$ (a scalar function), least_squares finds a local minimum of the cost function $F(x)$:

minimize $F(x) = 0.5 * sum(rho(f_i(x)**2), i = 0, ..., m - 1)$

subject to $lb <= x <= ub$

The purpose of the loss function $rho(s)$ is to reduce the influence of outliers on the solution.

https://docs.scipy.org/doc/scipy-0.19.1/reference/generated/scipy.optimize.least_squares.html

### Optimization (scipy.optimize)


https://docs.scipy.org/doc/scipy/reference/tutorial/optimize.html

http://blog.mmast.net/least-squares-fitting-numpy-scipy

In [None]:
import numpy as np
from scipy import optimize
import matplotlib.pyplot as plt

In [None]:
f = np.poly1d([-5, 1, 3])
x = np.linspace(0, 2, 20)
y = f(x) + 1.5*np.random.normal(size=len(x))
xn = np.linspace(0, 2, 200)

plt.plot(x, y, 'or')
plt.show()

In [None]:
def f(x, a, b, c):
    return a*x**2 + b*x + c

def residual(p, x, y):
    return y - f(x, *p)

p0 = [1., 1., 1.]

popt, pcov, info, mesg, ler = optimize.leastsq(residual, p0, 
                                               args=(x, y), full_output=True)
# popt, pcov = optimize.leastsq(residual, p0, args=(x, y))

print(popt)

We should use non-linear least squares if the dimensionality of the output vector is larger than the number of parameters to optimize. Here, we can see the number of function evaluations of our last estimation of the coeffients:

In [None]:
print( info['nfev'])

In [None]:
yn = f(xn, *popt)

plt.plot(x, y, 'or')
plt.plot(xn, yn)
plt.show()

Using as a example, a L-BFGS minimization we will achieve the minimization in more cost function evaluations:




In [None]:
def min_residual(p, x, y):
    return sum(residual(p, x, y)**2)

res = optimize.minimize(min_residual, p0, method='L-BFGS-B', args=(x, y))
print( res.x)
print( res.nfev)

In [None]:
popt, pcov = optimize.curve_fit(f, x, y, p0=p0)
print( popt)

plt.plot(x, y, 'or')
plt.plot(xn, f(xn, *popt))
plt.show()

In [None]:
# stnd    = input(r'P ($\%$) and $\theta$ of pol. standard? (as tuple)')
# p       = stnd[0]/100.
# ang     = np.radians(stnd[1])  
# x,y     = sympy.symbols('x y')  
# stndqu  = sympy.solve([sympy.sqrt(x**2+y**2)-p,(0.5*sympy.atan(y/x))-ang],[x,y])[1] 

# stndqun = np.array([sympy.N(i) for i in stndqu],dtype=float) 




