In [1]:
import scipy.stats as stats
import numpy as np
import scipy.optimize as optimize
import math
import matplotlib.pyplot as plt
import pandas as pd

import matplotlib as mpl
mpl.rcParams['figure.dpi'] = 200

log_plot = True

In [34]:
%%time
# Create synthetic data assuming the following values for theta
Theta_Guess = np.array([0,0,-7,1])

Constants = np.array([[-200,-100,-170,15],
                      [-1,-1,-6.5,0.7],
                      [0,0,11,0.6],
                      [-10,-10,-6.5,0.7],
                      [1,0,-0.5,-1],
                      [0,0.5,1.5,1]])

##New Cell

# Evaluate model and add noise based on assumed theta values
# This generates experimental data points
exp_data_doc = "Input_CSVs/Exp_Data/n=5.csv"
exp_data = pd.read_csv(exp_data_doc, header=0,sep=",",index_col=0)
Num_params  = len(exp_data.T)
Xexp = np.array([exp_data.iloc[:,0:Num_params-1]])[0,:]
Yexp = np.array([exp_data.iloc[:,-1]])[0,:]

[-0.558 -0.05   0.623 -0.82   0.22 ]
CPU times: user 4.3 ms, sys: 877 µs, total: 5.18 ms
Wall time: 4.22 ms


In [40]:
##New Cell

# Evaluate model based on the assumed experimental values
#Create Meshgrid for X1 and X2 and evaluate Y
len_data = 20
X1 = np.linspace(np.min(Xexp[:,0]),np.max(Xexp[:,0]),len_data)
X2 = np.linspace(np.min(Xexp[:,1]),np.max(Xexp[:,1]),len_data)
X_mesh = np.meshgrid(X1,X2)

# #Creates an array for Y that will be filled with the for loop
# #Initialize y_sim
Y = [] #len_data x len_data

# #Iterates over evey combination of X1/X2 to find the SSE for each combination
for i in range(len_data):
    #Set dig out values of a from train_p
    #Set constants to change the a row to the index of the first loop
    A, a, b, c, x0, y0 = Constants
#         print(a)

    #Calculate y_sim
    X1, X2 = X_mesh[0][i], X_mesh[1][i]
    Term1 = a*(X1 - x0)**2
    Term2 = b*(X1 - x0)*(X2 - y0)
    Term3 = c*(X2 - y0)**2
    Y.append( np.sum( A*np.exp(Term1 + Term2 + Term3) ) )

print(Y)

# # Compare the experiments to the true model
# plt.plot(X,Y,'b-',linewidth=2,label="True Model")
# plt.plot(Xexp,Yexp,'r.',markersize=10,label="Experiments")
# plt.title("Plotting True Model and Synthetic Data")
# plt.xlabel('X',fontsize=14)
# plt.ylabel('Y',fontsize=14)
# plt.legend()
# plt.show()


ValueError: operands could not be broadcast together with shapes (20,) (4,) 

In [4]:
##New Cell

## define function that includes nonlinear model
def model(a_guess, Constants, x):
    '''
    Toy model
    Arguments:
        theta: parameter vector
        x: independent variable vector
    Returns:
        y_model: dependent variable prediction
    '''
        """
    Creates y_data (training data) based on the function theta_1*x + theta_2*x**2 +x**3
    Parameters
    ----------
        param_space: (nx3) ndarray or tensor, parameter space over which the GP will be run
        Constants: ndarray, The array containing the true values of Muller constants
    Returns
    -------
        y_model: ndarray, The simulated y training data
    """
    #Assert statements check that the types defined in the doctring are satisfied
    
    #Converts parameters to numpy arrays if they are tensors
    if torch.is_tensor(param_space)==True:
        a_guess = a_guess.numpy()
        
    if isinstance(a_guess, pd.DataFrame):
        a_guess = a_guess.to_numpy()
    
    try:
        len_x = x.shape[1]
    except:
        len_x = 1
    
        
    #For the case where more than 1 point is geing generated
    #Creates an array for train_sse that will be filled with the for loop
    #Initialize y_sim
    y_model = np.zeros(len_data) #1 x n_train^2

    #Iterates over evey combination of theta to find the SSE for each combination
    for i in range(len_data):
        #Set dig out values of a from train_p
        #Set constants to change the a row to the index of the first loop
        A, a, b, c, x0, y0 = Constants
        len_a = a.shape[0]
        a = a_guess
#         print(a)

        #Calculate y_sim
        X1, X2 = x[0,i], x[1,i]
        Term1 = a*(X1 - x0)**2
        Term2 = b*(X1 - x0)*(X2 - y0)
        Term3 = c*(X2 - y0)**2
        y_model[i] = np.sum(A*np.exp(Term1 + Term2 + Term3) )
    return y_model

print(model(Theta_Guess,Xexp))

##New Cell

# Create a function to optimize, in this case, least squares fitting
def regression_func(theta, x, y):
    '''
    Function to define regression function for least-squares fitting
    Arguments:
        theta: parameter vector
        x: independent variable vector
        y: dependent variable vector (measurements)
    Returns:
        e: residual vector
    '''
    
    error = y - model(theta,x); #NOTE: Least squares will calculate sse based off this to minimize
    
    return error


#Create a function to define the SSE for any Theta vector on a heat map.
def sse_func(xx, yy, x, y):
    '''
    Function to define define sum of squared error function for heat map
    Arguments:
        xx: An N X D array of all Theta1 values
            
        yy: An D X N array of all Theta2 values
        theta: parameter vector
        x: independent variable vector (predicted x values including noise)
        y: dependent variable vector (predicted y values on Heat Map)
    Returns:
        sse: N x N sum of squared error matrix of all generated combination of xx and yy
    '''
    sse = np.zeros([len(xx),len(yy)])
    
    for i in range(len(xx)):
        for j in range(len(yy)):
            theta = np.array([xx[i][j],yy[i][j]])
            sse[i][j] = sum((y - model(theta,x))**2) 
    
    return sse

[-1.73741112e-01 -1.25000000e-04  2.41804367e-01 -5.51368000e-01
  1.06480000e-02]


In [5]:

## specify initial guess
theta0 = Theta_Guess

## specify bounds
# first array: lower bounds
# second array: upper bounds
bounds = ([-np.inf, -np.inf,-np.inf, -np.inf], [np.inf, np.inf, np.inf, np.inf])

## use least squares optimizer in scipy
# argument 1: function that takes theta as input, returns residual
# argument 2: initial guess for theta
# optional arguments 'bounds': bounds for theta
# optional arugment 'args': additional arguments to pass to residual function
# optional argument 'method': select the numerical method
#   if you want to consider bounds, choose 'trf'
#   if you do not want to consider bounds, try either 'lm' or 'trf'
Solution = optimize.least_squares(regression_func, theta0,bounds=bounds, method='trf',args=(Xexp, Yexp))

theta = Solution.x
print("theta = ",theta)


ValueError: Inconsistent shapes between bounds and `x0`.

In [None]:
#New Cell

# generate predictions
X_pred = np.linspace(np.min(Xexp),np.max(Xexp),20)
Y_pred = model(theta, X_pred)

Theta1_Map = np.linspace(-2,2,100)
Theta2_Map = np.linspace(-2,2,100)

x = Theta1_Map
y = Theta2_Map

# full coorindate arrays
xx, yy = np.meshgrid(x, y)
zz = sse_func(xx,yy,X_pred,Y_pred)

if log_plot == True:
    zz = np.log(zz)

plt.contourf(x, y, zz, cmap = "autumn", levels = 20)
plt.colorbar()
plt.axis('scaled')
plt.scatter(Theta_Guess[0],Theta_Guess[1], color="blue", s=100, label = "True Optimal Value", marker = (5,1))
plt.scatter(theta[0],theta[1], color="white",s=50, marker = ".",label = "NLR Optimal Value")
# plt.grid()
plt.legend(loc = 'best')
plt.xlabel('$\Theta_1$',weight='bold')
plt.ylabel('$\Theta_2$',weight='bold')
plt.xlim((np.amin(xx), np.amax(xx)))
plt.ylim((np.amin(yy),np.amax(yy)))

if log_plot == True:
    plt.title('NLR ln(SSE)', weight='bold',fontsize = 16)
    plt.savefig("Figures/NLR_ln(SSE).png",dpi=300)
else:
    plt.title('Non-Linear Regression SSE', weight='bold',fontsize = 16)
    plt.savefig("Figures/NLR_SSE.png",dpi=300)
    

plt.show()

In [None]:
#New Cell

# create plot and compare predictions and experiments
plt.plot(Xexp,Yexp,'.b',markersize=20,label='Experimental Data')
plt.plot(X_pred,Y_pred,'--g',linewidth=4,label='$y_{sim}$')
plt.plot(X,Y,'r-',linewidth=3,label='$y_{true}$')
plt.title("Predictions vs Syntehic Data")
plt.xlabel('X')
plt.ylabel('Y')
plt.grid(True)
plt.legend()

plt.savefig("Figures/sim_true_comp.png",dpi=300)
plt.show()


##New Cell

#Plot error
Y_pred2 = model(theta,Xexp)
error = (Yexp - Y_pred2)
print("SSE = ", np.sum(error**2))
plt.plot(Y_pred2,error,"b.",markersize=20, label = "Error")
plt.title("Residuals")
plt.xlabel('Predicted Y')
plt.ylabel('Residuals vs. Predicted Value')
plt.grid(True)
plt.legend()
plt.show()

In [None]:
#Jacobian and Uncertainty Analysis
print("Jacobian =\n")
print(Solution.jac)
sigre = (error.T @ error)/(len(error) - 2)
Sigma_theta2 = sigre * np.linalg.inv(Solution.jac.T @ Solution.jac)
print("Covariance matrix:\n",Sigma_theta2)