In [1]:
import scipy.stats as stats
import numpy as np
import scipy.optimize as optimize
import math
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from scipy.spatial.distance import pdist, squareform
import itertools
from itertools import combinations_with_replacement
from itertools import combinations
from itertools import permutations

import matplotlib as mpl
mpl.rcParams['figure.dpi'] = 200

log_plot = True

import bo_methods_lib
from bo_methods_lib.bo_methods_lib.GPBO_Classes_New import * #Fix this later
from bo_methods_lib.bo_methods_lib.GPBO_Class_fxns import * #Fix this later
from bo_methods_lib.bo_methods_lib.GPBO_Classes_plotters import * #Fix this later

criteria_dict = {"cs_name_val" : 12}
project = signac.get_project("GPBO_Fix")
save_csv = False
save_figs = False
analyzer = General_Analysis(criteria_dict, project, mode = "act", save_csv = save_csv)
plotters = Plotters(analyzer, save_figs)

def grid_sampling(num_points, bounds):
        """
        Generates Grid sampled data
        
        Parameters
        ----------
        num_points: int, number of points in LHS, should be greater than # of dimensions
        bounds: ndarray, array containing upper and lower bounds of elements in LHS sample. Defaults of 0 and 1
        
        Returns:
        ----------
        grid_data: ndarray, (num_points)**bounds.shape[1] grid sample of data
        
        """
        #Generate mesh_grid data for theta_set in 2D
        #Define linspace for theta
        params = np.linspace(0,1, num_points)
        #Define dimensions of parameter
        dimensions = bounds.shape[1]
        #Generate the equivalent of all meshgrid points
        df = pd.DataFrame(list(itertools.product(params, repeat=dimensions)))
        df2 = df.drop_duplicates()
        scaled_data = df2.to_numpy()
        #Normalize to bounds 
        lower_bound = bounds[0]
        upper_bound = bounds[1]
        grid_data = scaled_data*(upper_bound - lower_bound) + lower_bound 
        return grid_data

#Create a function to define the SSE for any Theta vector on a heat map.
def sse_func(xx, yy, x, y, args):
    '''
    Function to define define sum of squared error function for heat map
    Arguments:
        xx: An N X D array of all Theta1 values
            
        yy: An D X N array of all Theta2 values
        theta: parameter vector
        x: independent variable vector (predicted x values including noise)
        y: dependent variable vector (predicted y values on Heat Map)
    Returns:
        sse: N x N sum of squared error matrix of all generated combination of xx and yy
    '''
    sse = np.zeros([len(xx),len(yy)])
    
    for i in range(len(xx)):
        for j in range(len(yy)):
            theta = np.array([xx[i][j],yy[i][j]])
            sse[i][j] = sum((y - uniquac_model(theta,x, args))**2) 
    
    return sse

# Create a function to optimize, in this case, least squares fitting
def regression_func(theta_guess, x, y, args=None):
    '''
    Function to define regression function for least-squares fitting
    Arguments:
        theta_guess: ndarray, guess value for a
        Constants: ndarray, The array containing the true values of Muller constants
        x: ndarray, experimental X data (Inependent Variable)
        y: ndarray, experimental Y data (Dependent Variable)
    Returns:
        e: residual vector
    '''
    
    error = y - calc_mm_model(theta_guess,x, args); #NOTE: Least squares will calculate sse based off this to minimize
    
    return error

def calc_gamma_exp(Xexp, P, y1, theta_ref, args):
    # Extract parameters
    r = np.array(args["r"])
    q = np.array(args["q"])
    z = args.get("z", 10)
    R = args["R"]
    T = args["T"]
    A, B, C = np.array(args["A"]), np.array(args["B"]), np.array(args["C"])
    
    l = (z / 2) * (r - q) - (r - 1)
    tau = np.exp(-theta_ref / (R * T))
    psat = 10 ** (A - (B / (C + (T - 273.15))))
    term1 = np.log(r[0]/r[1])
    term2a = 5*np.log((q[0]*r[1])/(q[1]*r[0])) - np.log(tau[1]) + 1 -tau[0]
    term2 = q[0]*term2a
    term3 = l[0]-(r[0]/r[1])*l[1]
    gamma_inf1 = np.exp(term1 + term2 + term3)

    term1_x2 = np.log(r[1]/r[0])
    term2a_x2 = 5*np.log((q[1]*r[0])/(q[0]*r[1])) - np.log(tau[0]) + 1 -tau[1]
    term2_x2 = q[1]*term2a_x2
    term3_x2 = l[1]-(r[1]/r[0])*l[0]
    gamma_inf2 = np.exp(term1_x2 + term2_x2 + term3_x2)
    gamma = []
    for i in range(len(Xexp)):
        if Xexp[i] == 0:
            gamma.append(gamma_inf1)
        elif Xexp[i] == 1:
            gamma.append(gamma_inf2)
        else:
            value = P[i]*y1[i]/(psat[0]*Xexp[i])
            gamma.append(value)
    return np.array(gamma) #Gamma1

def uniquac_model(unknown_params, xP, args):
    """
    Compute activity coefficients using the UNIQUAC model for a binary mixture.

    Parameters:
    unknown_params : np.array
        A vector containing the unknown interaction energy parameters Δu_ij.
    xP : np.array or float
        Mole fractions x1 (x2 is inferred).
    args : dict
        A dictionary containing necessary additional parameters:
        - "r": np.array, volume parameters for components
        - "q": np.array, surface area parameters for components
        - "R": float, gas constant
        - "T": float, temperature
        - "z": float, coordination number (default 10)
        - "A", "B", "C": Antoine equation parameters for vapor pressure

    Returns:
    np.array or float
        Vapor pressure P.
    """
    # Extract parameters
    r = np.array(args["r"])
    q = np.array(args["q"])
    z = args.get("z", 10)
    R = args["R"]
    T = args["T"]
    A, B, C = np.array(args["A"]), np.array(args["B"]), np.array(args["C"])
    
    # Precompute constants
    l = (z / 2) * (r - q) - (r - 1)
    tau = np.exp(-unknown_params / (R * T))
    psat = 10 ** (A - B / (C + (T - 273.15)))

    # Ensure xP is at least 1D
    x1 = np.atleast_2d(xP).reshape(-1,1)
    x2 = 1 - x1
    x = np.hstack([x1, x2])
    # print(x.shape)

    # Initialize gamma with ones
    gamma = np.ones_like(x)

    # Identify valid indices where both x1 and x2 are nonzero
    valid_mask = (x1.flatten() > 0) & (x2.flatten() > 0)

    if np.any(valid_mask):
        # Apply valid_mask correctly to both dimensions
        valid_x = x[valid_mask, :]  # Shape (M, 2) where M is number of valid rows

        sum_xq = np.dot(valid_x, q)
        sum_xr = np.dot(valid_x, r)

        theta = (valid_x * q) / sum_xq[:, None]
        psi = (valid_x * r) / sum_xr[:, None]

        lngC = (
            np.log(psi / valid_x) + (z / 2) * q * np.log(theta / psi) + psi[:, ::-1] * (l - r * l[::-1] / r[::-1])
        )

        lngR = (
            -q * np.log(theta + theta[:, ::-1] * tau[::-1]) + theta[:, ::-1] * q * (
                tau[::-1] / (theta + theta[:, ::-1] * tau[::-1]) - tau / (theta[:, ::-1] + theta * tau)
            )
        )

        gamma[valid_mask, :] = np.exp(lngC + lngR)
        
    # Handle infinite dilution cases
    if np.any(~valid_mask):
        # Compute gamma at infinite dilution for both components
        gamma_inf = np.zeros(2)

        # term1 = 1- (r[0]/r[1]) +np.log(r[0]/r[1])
        # term2 = -5*q[0]*(1-(r[0]*q[1])/(r[1]*q[0]) + np.log((r[0]*q[1])/(r[1]*q[0])))

        term1 = np.log(r[0]/r[1])
        term2a = 5*np.log((q[0]*r[1])/(q[1]*r[0])) - np.log(tau[1]) + 1 -tau[0]
        term2 = q[0]*term2a
        term3 = l[0]-(r[0]/r[1])*l[1]
        gamma_inf[0] = np.exp(term1 + term2 + term3)

        term1_x2 = np.log(r[1]/r[0])
        term2a_x2 = 5*np.log((q[1]*r[0])/(q[0]*r[1])) - np.log(tau[0]) + 1 -tau[1]
        term2_x2 = q[1]*term2a_x2
        term3_x2 = l[1]-(r[1]/r[0])*l[0]
        gamma_inf[1] = np.exp(term1_x2 + term2_x2 + term3_x2)

        # Assign infinite dilution gamma where needed
        if np.any((x2.flatten() > 0)):
            gamma[~valid_mask[0], 0] = gamma_inf[0]  # Where x1 = 0
            # gamma[~valid_mask[-1], 1] = gamma_inf[1] #Where x2 = 0

    P = np.sum(x * gamma * psat, axis=1)
    gamma1 = gamma[:, 0]
    gamma2 = gamma[:, 1]

    return gamma1#P[0] if P.shape == (1,) else P  # Return scalar if input was scalar-like

def calc_mm_model(model_coefficients, x, args = None):
    """
    Caclulates the Muller Potential
    
    Parameters
    ----------
        model_coefficients: ndarray, The array containing the values of Muller constants
        x: ndarray, Values of X
        noise: ndarray, Any noise associated with the model calculation
    
    Returns:
    --------
        y_mul: float, value of Muller potential
    """    
    theta = model_coefficients
    # y_mul = (theta[0] * x**3 - theta[1] * x**2 + 2*x - 1)**2 + (theta[0] - theta[1])**2 + (x**2 - 1)**2 #MC Example
    # y_mul = theta[0]*(1 - theta[1]*np.exp(-theta[2]*x)) #Choline Chloride
    # y_mul = -(theta[3]+theta[4])*np.exp(-theta[0]*x) + theta[3]*np.exp(-theta[1]*x) + theta[4]*np.exp(-theta[2]*x) #Ethyl Acrylate
    # y_mul = (theta[0]*(x-theta[1])*(1-np.exp(theta[2]*(x-theta[3]))))**2 #Ratkowsky Model
    # y_mul = theta[0]*(1-((x-theta[1])**2/((x-theta[1])**2+x*(theta[3]+theta[2]-x)-(theta[2]*theta[3])))) #Cardinal Temperature Model
    # y_mul = theta[0]+1/(x - theta[1]) #Rod and Hancil Nonlinear Fit
    # y_mul = theta[0]*((x**2 + theta[1]*x)/(x**2+theta[2]*x + theta[3])) #Kowalik Problem
    # y_mul = theta[0] + (0.49-theta[0])*(np.exp(-theta[1]*(x-8))) #Model C (Bates and Watts)
    # y_mul = theta[0]*np.exp(-np.exp(theta[1]-theta[2]*x)) #Model G (Bates and Watts)
    # y_mul = theta[0]*(1-theta[1]*np.exp(-theta[2]*x)) #Model I (Bates and Watts)
    # y_mul = (theta[1]*theta[2]+theta[0]*x**theta[3])/(theta[2]+x**theta[3]) #Model O (Bates and Watts)
    # y_mul = theta[0]*np.exp(-theta[3]*x) + theta[1]*np.exp(-theta[4]*x) + theta[2]*np.exp(-theta[5]*x) #Pharmacokinetic model
    # y_mul = np.exp(theta[0]-theta[3]*x) + np.exp(theta[1]-theta[4]*x) - 2*np.exp(theta[2]-theta[5]*x) #Pharmacokinetic model
    y_mul = uniquac_model(theta, x, args)
    return y_mul


2025-02-10 20:40:34.370244: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-02-10 20:40:34.370301: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-02-10 20:40:34.372307: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-02-10 20:40:34.382183: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
  from .autonotebook import tqdm as notebook_tqdm


In [None]:
#Choline Chloride (ILL POSED)
# param_name_str = "t1t2t3"
# indeces_to_consider = [0,1,2]
# bounds_x = np.array([[3], [8]])
# bounds_theta_l = [35.0,0.4, 0.01]
# bounds_theta_u = [45.0, 1.0, 0.5]
# theta_ref = np.array([39.09, 0.828, 0.159]) 
# theta_names = ['theta_1', 'theta_2', 'theta_3']
# theta_true = np.array([theta_ref[i] for i in indeces_to_consider] )
# theta_true_names = np.array([theta_names[i] for i in indeces_to_consider] )

#Ethyl Acrylate (ILL POSED + Only 1 Min)
# param_name_str = "t1t2t3t4t5"
# indeces_to_consider = [0,1,2,3,4]
# bounds_x = np.array([[0], [4]])
# bounds_theta_l = [1,0.1,0.01, 0.1, 0.001]
# bounds_theta_u = [10, 1, 0.1, 1, 0.01]
# theta_ref = np.array([3.025, 0.481, 0.0258, 0.310, 0.0011]) 
# theta_names = ['theta_1', 'theta_2', 'theta_3', 'theta_4', 'theta_5']
# theta_true = np.array([theta_ref[i] for i in indeces_to_consider] )
# theta_true_names = np.array([theta_names[i] for i in indeces_to_consider] )

#MC Example (Well Posed, 2 min)
# param_name_str = "t1t2"
# indeces_to_consider = [0,1]
# bounds_x = np.array([[-2], [1.5]])
# bounds_theta_l = [-2,-2]
# bounds_theta_u = [2,2]
# theta_ref = np.array([-1.5, 0.5 ]) 
# theta_names = ['theta_1', 'theta_2']
# theta_true = np.array([theta_ref[i] for i in indeces_to_consider] )
# theta_true_names = np.array([theta_names[i] for i in indeces_to_consider] )

#The Ratkowsky Model (Ill posed, 2 min) (see Set-membership nonlinear regression approach to parameter estimation)
# D. Ratkowsky, R. Lowry, T. McMeekin, A. Stokes, R. Chandler, A model for
#bacterial culture growth rate throughout the entire biokinetic temperature
#range, J. Bacteriol. 154 (1983) 1222–1226.)
# param_name_str = "t1t2t3t4"
# indeces_to_consider = [0,1,2,3]
# bounds_x = np.array([[294], [320]])
# bounds_theta_l = [0,245,0,310]
# bounds_theta_u = [0.1,290,1,325]
# theta_ref = np.array([0.0325, 273.54, 0.336, 321.37]) 
# theta_names = ["b", "Tmin", "c", "Tmax"]
# theta_true = np.array([theta_ref[i] for i in indeces_to_consider] )
# theta_true_names = np.array([theta_names[i] for i in indeces_to_consider] )

#cardinal temperature model (Well posed, 2 local min)
#] J. Lobry, L. Rosso, J. Flandrois, A fortran subroutine for the determination of
#parameter confidence limits in non-linear models, Binary 3 (1991) 86–93.
# param_name_str = "t1t2t3t4"
# indeces_to_consider = [0,1,2,3]
# bounds_x = np.array([[294], [320]])
# bounds_theta_l = [1.0,308,280,319]
# bounds_theta_u = [2.0,318,296,325]
# theta_ref = np.array([1.396, 313.25, 289.40, 320.23]) 
# theta_names = ["mu_opt", "Topt", "Tmin", "Tmax"]
# theta_true = np.array([theta_ref[i] for i in indeces_to_consider] )
# theta_true_names = np.array([theta_names[i] for i in indeces_to_consider] )

#Rod and Hancil Nonlinear Fit (Well posed, 2 min)
#Rod, V.; Hancil, V. Iterative Estimation of Model Parameters when Measurements of 
# #All Variables are Subject to Error. Comput. Chem. Eng. 1980, 4, 33.
# param_name_str = "t1t2"
# indeces_to_consider = [0,1]
# bounds_x = np.array([[0], [5]])
# bounds_theta_l = [1,1]
# bounds_theta_u = [10,10]
# theta_ref = np.array([2.00,6.00]) 
# theta_names = ['theta_1', 'theta_2']
# theta_true = np.array([theta_ref[i] for i in indeces_to_consider] )
# theta_true_names = np.array([theta_names[i] for i in indeces_to_consider] )

#Kowalik Problem (see Global Optimization in Parameter Estimation of Nonlinear
#Algebraic Models via the Error-in-Variables Approach) (Ill posed, 2 local min)
#Moore, R., Hansen, E., & Leclerc, A. (1992). Rigorous Methods for Global Optimization. 
# #In C. A. Floudas & P. M. Pardalos (Eds.), Recent Advances in Global Optimization (pp. 321–342). 
# #Princeton University Press. http://www.jstor.org/stable/j.ctt7ztwft.19
# param_name_str = "t1t2t3t4"
# indeces_to_consider = [0,1,2,3]
# bounds_x = np.array([[1/4], [2.50]])
# bounds_theta_l = [0,-0.2892, -0.2892,-0.2892]
# bounds_theta_u = [0.2892,0.2892, 0.35,0.2892]
# theta_ref = np.array([0.19283, 0.19088, 0.12314, 0.13578]) 
# theta_names = ['theta_1', 'theta_2', 'theta_3', 'theta_4']
# theta_true = np.array([theta_ref[i] for i in indeces_to_consider] )
# theta_true_names = np.array([theta_names[i] for i in indeces_to_consider] )

#Model C (Bates and Watts) (Well posed, 1 min)
# param_name_str = "t1t2"
# indeces_to_consider = [0,1]
# bounds_x = np.array([[8], [42]])
# bounds_theta_l = [0.380,0.075]
# bounds_theta_u = [0.400, 0.128]
# theta_ref = np.array([0.3901,0.1016]) 
# theta_names = ['theta_1', 'theta_2']
# theta_true = np.array([theta_ref[i] for i in indeces_to_consider] )
# theta_true_names = np.array([theta_names[i] for i in indeces_to_consider] )

#Gompertz Model (Bates and Watts Model H) (Ill posed, 1 min)
# param_name_str = "t1t2t3"
# indeces_to_consider = [0,1,2]
# bounds_x = np.array([[118], [1582]])
# bounds_theta_l = [130, 1, 0.001]
# bounds_theta_u = [200, 5, 0.003]
# theta_ref = np.array([172, 2.18,0.0016]) 
# theta_names = ['theta_1', 'theta_2', 'theta_3']
# theta_true = np.array([theta_ref[i] for i in indeces_to_consider] )
# theta_true_names = np.array([theta_names[i] for i in indeces_to_consider] )

#Bates and Watts Model I (Ill Posed 1 min)
# param_name_str = "t1t2t3"
# indeces_to_consider = [0,1,2]
# bounds_x = np.array([[118], [1582]])
# bounds_theta_l = [450, 0.1, 10**-4]
# bounds_theta_u = [500, 0.5, 5*10**-4]
# theta_ref = np.array([478, 0.357, 2.64*10**-4]) 
# theta_names = ['theta_1', 'theta_2', 'theta_3']
# theta_true = np.array([theta_ref[i] for i in indeces_to_consider] )
# theta_true_names = np.array([theta_names[i] for i in indeces_to_consider] )

#Bates and Watts Model O (see 10.1063/1.4940872) (Well posed, 1 local min)
# param_name_str = "t1t2t3t4"
# indeces_to_consider = [0,1,2,3]
# bounds_x = np.array([[0], [10]])
# bounds_theta_l = [0.5, 1,1,1]
# bounds_theta_u = [2, 15, 10,5]
# theta_ref = np.array([1, 12.371, 6.318,3.46]) 
# theta_names = ['theta_1', 'theta_2', 'theta_3', 'theta_4']
# theta_true = np.array([theta_ref[i] for i in indeces_to_consider] )
# theta_true_names = np.array([theta_names[i] for i in indeces_to_consider] )

#Pharmacokinetic model
# William R. Esposito, Christodoulos A. Floudas, Parameter estimation in nonlinear algebraic models via global optimization, 
# Computers & Chemical Engineering, Volume 22, Supplement 1, 1998, Pages S213-S220, ISSN 0098-1354, https://doi.org/10.1016/S0098-1354(98)00217-8. (Well posed, 2 local min)
# param_name_str = "t1t2t3t4t5t6"
# indeces_to_consider = [0,1,2,3,4,5]
# bounds_x = np.array([[7.5], [120]])
# bounds_theta_l = [-2.302, 0,0,0,0,0 ]
# bounds_theta_u = [0, 2.302, 2.302, 0.5,0.5,0.5]
# theta_ref = np.array([-1.0345, 0.6966, 1.5200, 0.01491, 0.1102, 0.2847]) 
# # bounds_theta_l = [-10,-10,-10,0,0,0 ]
# # bounds_theta_u = [10,10,10, 5,5,5]
# # theta_ref = np.array([0.3554,2.007,-4.572, 0.01491, 0.1102, 0.2847]) 
# theta_names = ['theta_1', 'theta_2', 'theta_3', 'theta_4', 'theta_5', 'theta_6']
# theta_true = np.array([theta_ref[i] for i in indeces_to_consider] )
# theta_true_names = np.array([theta_names[i] for i in indeces_to_consider] )

#VLE DMSO + EG
# param_name_str = "t1t2"
# indeces_to_consider = [0,1]
# bounds_x = np.array([[0], [1]])
# bounds_theta_l = [-2e3,-2e3 ]
# bounds_theta_u = [2e3,2e3]
# theta_ref = np.array([757.6653, -759.0704]) 
# theta_names = ['theta_1', 'theta_2']
# theta_true = np.array([theta_ref[i] for i in indeces_to_consider] )
# theta_true_names = np.array([theta_names[i] for i in indeces_to_consider] )
# Xexp = np.array([0,.149,.2100,.283,.4010,.5050,.5970,.7060,.7910,.899,1])
# Yexp_org = np.array([15.80, 16.30,16.80,17.70,17.80,19.60,22.40,27.10,29.80,34.40,38.80])
# argsvals = {"r" :[2.8266, 2.4088], #DMSO, EG
#         "q" :[2.4720, 2.2480],
#         "T" : 373.15, #K
#         "R" : 1.98721 , #cal/molK
#         "A": [6.88076,8.09083],
#         "B": [1541.520,2088.936],
#         "C": [191.797,203.454]
#         } 

#VLE Methanol + Water (https://www.degruyter.com/document/doi/10.1515/zpch-1927-13002/html / DECHEMA)
param_name_str = "t1t2"
indeces_to_consider = [0,1]
bounds_x = np.array([[0], [1]])
bounds_theta_l = [-1e3,-1e3 ]
bounds_theta_u = [1e3,1e3]
theta_ref = np.array([-99.5814, 99.4288]) 
theta_names = ['theta_1', 'theta_2']
theta_true = np.array([theta_ref[i] for i in indeces_to_consider] )
theta_true_names = np.array([theta_names[i] for i in indeces_to_consider] )
P = np.array([119.50, 157.00, 169.70, 196.00, 217.70, 236.60, 
                   283.00, 306.40, 324.10, 348.40, 373.50, 391.10])
y1 = np.array([0.2741, 0.4741, 0.5220, 0.6294, 0.7106, 0.7580, 
               0.8203, 0.8654, 0.9007, 0.9406, 0.9627, 0.9736])
Xexp = np.array([0.0486, 0.1218, 0.1478, 0.2131, 0.2693, 0.3252, 
               0.5143, 0.6279, 0.7083, 0.8037, 0.9007, 0.9461])
argsvals = {"r" :[1.4311,0.92], #MeOH, H2O
        "q" :[1.432,1.4],
        "T" : 49.76+273.15, #K
        "R" : 1.98721 , #cal/molK
        "A": [8.08097,8.07131],
        "B": [1582.271,1730.63],
        "C": [239.726,233.426]
        } 
Yexp_org = calc_gamma_exp(Xexp, P, y1, theta_ref, argsvals)


#VLE Methanol + Water (DECHEMA 50C) (Only 1 Minimum)
# param_name_str = "t1t2"
# indeces_to_consider = [0,1]
# bounds_x = np.array([[0], [1]])
# bounds_theta_l = [-1e3,-1e3 ]
# bounds_theta_u = [1e3,1e3]
# theta_ref = np.array([349.2925, -246.6378]) 
# theta_names = ['theta_1', 'theta_2']
# theta_true = np.array([theta_ref[i] for i in indeces_to_consider] )
# theta_true_names = np.array([theta_names[i] for i in indeces_to_consider] )
# P = np.array([92.50,143.00,196.50, 244.50,286.00,333.00,373.00,406.00])
# y1 = np.array([0.0, 0.3783,0.5883,0.7076, 0.7807,0.8655,0.9349,1.000])
# Xexp = np.array([0.0,0.0873,0.1900,0.3417,0.4943,0.6919,0.8492,1.000])
# argsvals = {"r" :[1.4311,0.92], #MeOH, H2O
#         "q" :[1.432,1.4],
#         "T" : 50+273.15, #K
#         "R" : 1.98721 , #cal/molK
#         "A": [8.08097,8.07131],
#         "B": [1582.271,1730.63],
#         "C": [239.726,233.426]
#         } 
# Yexp_org = calc_gamma_exp(Xexp, P, y1, theta_ref, argsvals)
print(Yexp_org)

[1.63480222 1.48233672 1.45379261 1.40418363 1.39338723 1.33769762
 1.09488163 1.02432958 0.99969265 0.98903964 0.96833574 0.9762384 ]


In [None]:
num_params = len(theta_ref)
np.random.seed(1)
# Evaluate model and add noise based on assumed theta values
# This generates experimental data points
num_points = 10 #len(Xexp)
# Xexp = grid_sampling(num_points, bounds_x)
# # Xexp = np.array([7.5,11.5,15.5,22,45,75,90,120])
# Yexp_org = np.array([ calc_mm_model(theta_ref, Xexp[i], argsvals)  for i in range(len(Xexp)) ]).flatten()
std = 0#abs(np.mean(Yexp_org))*0.05
noise = np.random.normal(size=len(Yexp_org), loc = 0, scale = std)
Yexp = Yexp_org + noise

# Evaluate model based on the assumed experimental values
X = np.linspace(np.min(Xexp),np.max(Xexp),100).reshape(-1,1)
Y = calc_mm_model(theta_ref, X.reshape(-1,1),argsvals)
# Compare the experiments to the true model
plt.plot(X,Y,'b-',linewidth=2,label=r"$y$")
plt.plot(Xexp,Yexp,'r.',markersize=10,label=r"$y$")
plt.title("Plotting True Model and Synthetic Data")
plt.xlabel(r"$x$",fontsize=14)
plt.ylabel(r'$y$',fontsize=14)
plt.legend()
plt.show()

In [None]:
##New Cell

## define function that includes nonlinear model
def model(theta_guess, theta_ref, x, indeces_to_consider, args=None):
    '''
        """
    Creates Muller potential values given a guess for "a"
    Parameters
    ----------
        a_guess: ndarray, guess value for a
        Constants: ndarray, The array containing the true values of Muller constants
        x: ndarray, Independent variable data (exp or pred)
    Returns
    -------
        y_model: ndarray, The simulated Muller potential given the guess
    '''
    #Define an array to store y values in
    y_data = []
    #Loop over all theta values
    for i in range(len(x)):
        #Create model coefficient from true space substituting in the values of param_space at the correct indeces
        model_coefficients = theta_ref.copy()
        #Replace coefficients a specified indeces with their theta_val counterparts
        model_coefficients[indeces_to_consider] = theta_guess              
        #Create y data coefficients
        y = calc_mm_model(model_coefficients, x[i], args)
        y_data.append(y)
        # print(model_coefficients, x[i], y)

    #Convert list to array and flatten array
    y_model = np.array(y_data).flatten()
    
    return y_model

print(model(theta_true, theta_true, Xexp, indeces_to_consider,argsvals))

##New Cell

# Create a function to optimize, in this case, least squares fitting
def regression_func(theta_guess, theta_ref, x, indeces_to_consider, y, args=None):
    '''
    Function to define regression function for least-squares fitting
    Arguments:
        a_guess: ndarray, guess value for a
        Constants: ndarray, The array containing the true values of Muller constants
        x: ndarray, experimental X data (Inependent Variable)
        y: ndarray, experimental Y data (Dependent Variable)
    Returns:
        e: residual vector
    '''
    error = y - model(theta_guess, theta_ref, x, indeces_to_consider, args) #NOTE: Least squares will calculate sse based off this to minimize
    
    return error

print(regression_func(theta_true, theta_true, Xexp, indeces_to_consider, Yexp, argsvals))

#Create a function to define the SSE for any Theta vector on a heat map.
def sse_func(theta_guesses, theta_ref, indeces_to_consider, Xexp, Yexp, args):
    '''
    Function to define define sum of squared error function for heat map
    Arguments:
        xx: An N X D array of all a_1 values
        yy: An D X N array of all a_2 values
        x: independent variable vector (predicted x values including noise)
        y: dependent variable vector (predicted y values on Heat Map)
    Returns:
        sse: N x N sum of squared error matrix of all generated combination of xx and yy
    '''
    #Initialize sse grid
    sse = np.zeros(len(theta_guesses))
    
    #For each guess
    for i in range(len(theta_guesses)):
        #Evaluate the model
        y_sim = model(theta_guesses[i], theta_ref, Xexp, indeces_to_consider, args)
        #Calculate SSE
        sse[i] = np.sum((y_sim - Yexp)**2)
     
    sse = sse.reshape(int(np.sqrt(len(theta_guesses))), -1).T
    
    return sse

print(sse_func([theta_true], theta_true, indeces_to_consider, Xexp, Yexp, argsvals))

In [None]:
#New Cell
# Create synthetic data assuming the following values for theta
Theta_Guess = np.array([1,1])

# print(calc_cs1_polynomial(Theta_Guess,Xexp))
## specify initial guess
sse_list = []
opt_list = []
theta_list = []

## specify bounds
lower = np.array([bounds_theta_l[i] for i in indeces_to_consider] )
upper = np.array([bounds_theta_u[i] for i in indeces_to_consider] )
bounds = (lower, upper)

for i in range(1000):
    theta_guess = np.random.uniform(low=lower, high=upper, size=len(lower) )
    # print(theta_guess)
    Solution = optimize.least_squares(regression_func, theta_guess, bounds=bounds, method='trf',
                                        args=(theta_true, Xexp.reshape(-1,1), indeces_to_consider, Yexp, argsvals),verbose=0)

    theta = Solution.x
    sse_list.append(Solution.cost)
    theta_list.append(theta)
    opt_list.append(Solution.optimality)
    # print("theta = ",theta)

all_sets = pd.DataFrame({'Theta': theta_list, 'SSE': sse_list, 'Optimality': opt_list})

# print(all_sets)
#Organize all_sets by SSE, lowest to highest
all_sets = all_sets.sort_values(by="SSE", ascending=True)

# # Drop duplicate minima
all_sets = all_sets.drop_duplicates(
    subset="SSE", keep="first")
# # Drop minima with optimality > 1e-4
all_sets = all_sets[all_sets["Optimality"] < 1e-4]

print(len(all_sets))

In [None]:
print(all_sets)

In [None]:
#Scale values between 0 and 1 with minmax scaler
scaler = MinMaxScaler()
scaler.fit([bounds_theta_l, bounds_theta_u])
all_param_sets = np.array(list(map(np.array, all_sets["Theta"].values)))
all_param_sets_scaled = scaler.transform(all_param_sets)
#Calculate the scaled euclidean distance between each pair of scaled points
dist = pdist(all_param_sets_scaled)/np.sqrt(all_param_sets.shape[1])
#Convert the condensed distance matrix to square form
dist_sq = squareform(dist)

#Initialize a boolean array to keep track of unique sets
unique_mask = np.ones(all_param_sets.shape[0], dtype=bool)
duplicate_counts = np.zeros(all_param_sets.shape[0], dtype=int)

# Iterate over the upper triangle of the distance matrix
for i in range(all_param_sets.shape[0]):
    # If the current set is already marked as non-unique, skip it
    if not unique_mask[i]:
        continue
    # Mark sets within the threshold distance as non-unique
    within_threshold = dist_sq[i] <= 0.01
    duplicate_counts[i] = np.sum(within_threshold)
    unique_mask[within_threshold] = False
    unique_mask[i] = True  # Keep the current set

# Filter out the unique sets from the pandas df
local_min_sets = all_sets[unique_mask]
local_min_counts = duplicate_counts[unique_mask]

print("Num local min count", local_min_counts)

print("Num local min", len(local_min_sets))
print(local_min_sets)

# print(all_sets)print("Best Theta = ", nlr_theta)
try:
    nlr_theta = local_min_sets.iloc[1]['Theta']
except:
    nlr_theta = local_min_sets.iloc[0]['Theta']
nlr_thetas = np.vstack(local_min_sets['Theta'])

print("Best Theta = ", nlr_theta)
print("theta_ref", theta_true)
Y_nlr_exp = model(nlr_theta, theta_true, Xexp, indeces_to_consider, argsvals)
error = (Yexp - Y_nlr_exp)
print("SSE = ", np.sum(error**2))

In [8]:
#Create heat map data
#Create list of heat map theta data
heat_map_data_dict = {}

#Create a linspace for the number of dimensions and define number of points
dim_theta = num_params
dim_list = np.linspace(0, dim_theta-1, dim_theta)

#Create a list of all combinations (without repeats e.g no (1,1), (2,2)) of dimensions of theta
mesh_combos = np.array(list(combinations(dim_list, 2)), dtype = int)
n_points = 20

#Meshgrid set always defined by n_points**2
theta_set = np.tile(np.array(theta_ref), (n_points**2, 1))

#Set x_vals
norm_x_vals = Xexp.reshape(-1,1)

#Loop over all possible theta combinations of 2
for i in range(len(mesh_combos)):
    #Create a copy of the true values to change the mehsgrid valus on
    theta_set_copy = np.copy(theta_set)
    #Set the indeces of theta_set for evaluation as each row of mesh_combos
    idcs = mesh_combos[i]
    #define name of parameter set as tuple ("param_1,param_2")
    data_set_name = (theta_true_names[idcs[0]], theta_true_names[idcs[1]])

    #Create a meshgrid of values of the 2 selected values of theta and reshape to the correct shape
    #Assume that theta1 and theta2 have equal number of points on the meshgrid
    theta1 = np.linspace(lower[idcs[0]], upper[idcs[0]], n_points)
    theta2 = np.linspace(lower[idcs[1]], upper[idcs[1]], n_points)
    theta12_mesh = np.array(np.meshgrid(theta1, theta2))
    theta12_vals = np.array(theta12_mesh).T.reshape(-1,2)

    #Set initial values for evaluation (true values) to meshgrid values
    theta_set_copy[:,idcs] = theta12_vals
    
    #Append data set to dictionary with name
    heat_map_data_dict[data_set_name] = theta_set_copy
    
hm_data_keys = list(heat_map_data_dict.keys())

In [None]:
print(nlr_thetas)

In [None]:
#New Cell
log_data = False
# save_figure = True
save_figure = False

#Get Number of pairs
combos = list(combinations(dim_list, 2))
pairs = len((list(combinations(dim_list, 2))))

#For each pair
for pair in range(pairs):
    #Make a meshgrid for each parameter
    idcs_to_plot = [int(combos[pair][i]) for i in range(len(combos[pair]))]
    theta_data = heat_map_data_dict[hm_data_keys[pair]].reshape(n_points, n_points, -1).T
    theta_mesh = np.take(theta_data, list(combos[pair]), axis=0)
    
    sse_sim = sse_func(heat_map_data_dict[hm_data_keys[pair]], theta_ref, indeces_to_consider, Xexp.reshape(-1,1), Yexp, argsvals)
    param_names = theta_true_names[idcs_to_plot]
    
    title = "Heat Map Pair " + "-".join(map(str, param_names))
    title = None

    z = np.array([sse_sim])
    # print(np.amin(z), np.amax(z))
    if log_data == True:
        z_titles = ["ln("+ r"$\mathbf{e(\theta)_{sim}}$" + ")"]
        z = np.log(z)
    else:
        z_titles = [r"$\mathbf{e(\theta)_{sim}}$" + ")"]
    
#     z_save_names = ["sse_sim", "sse_nlr"]
#     path_end = '-'.join(z_save_names) 
    levels = [100]

    param_info_dict = {"true":theta_true, "min_sse":nlr_thetas, "names":param_names, "idcs":idcs_to_plot}
    plotters.plot_nlr_heat_maps(theta_mesh, z, z_titles, levels, param_info_dict, log_data = False)

In [None]:
# create plot and compare predictions and experiments
print(nlr_theta)
X_pred = np.linspace(bounds_x[0], bounds_x[1]).reshape(-1,1)
Y_pred = model(nlr_theta, theta_true, X_pred, indeces_to_consider, argsvals).flatten()
print(Y_pred)
plt.figure(figsize = (9,6))
plt.plot(Xexp,Yexp,'.g',markersize=20,label=r'$y$')
plt.plot(X,Y,'r-',linewidth=3,label=r'$f(\mathbf{\theta_{true}})$')
plt.plot(X_pred,Y_pred,'--b',linewidth=4,label=r'$f(\mathbf{\theta})$')
# plt.title("Predictions with $\\theta = [0.994,-1.00]$ vs Synthetic Data")
# plt.title("Predictions with $\\theta = [0.802,-0.757]$ vs Synthetic Data")
plt.legend(loc = "best", fontsize=30) #(bbox_to_anchor=(1.04, 1), borderaxespad=0
plt.xlabel(r'$x$',fontsize=30,fontweight='bold')
plt.ylabel(r'$y$',fontsize=30,fontweight='bold')

plt.locator_params(axis='y', nbins=5)
plt.locator_params(axis='x', nbins=5)
plt.minorticks_on() # turn on minor ticks
plt.tick_params(which="minor",direction="in",top=True, right=True)
# plt.grid(True)

# plt.savefig("Figures/sim_true_comp_poster.png", dpi=300, bbox_inches='tight')
plt.show()


##New Cell

#Plot error
print("SSE = ", np.sum(error**2))
plt.plot(Y_nlr_exp,error,"b.",markersize=20, label = "Error")
plt.title("Residuals")
plt.xlabel('Predicted Y')
plt.ylabel('Residuals vs. Predicted Value')
plt.grid(True)
plt.legend()
plt.show()
print(np.log(7.73251354))

In [None]:
sigre = np.maximum(std**2, 0.01)
MSE = (error.T @ error)/(len(error) - 2)
Hess = Solution.jac.T @ Solution.jac
Covar = sigre * np.linalg.inv(Hess)
FIM = (1/sigre)*Hess
eigvals, eigvecs = np.linalg.eig(FIM)
k = np.max(eigvals)/np.min(eigvals)
print("MSE = ", MSE)
print("Experimental Variance = ", sigre)
print("Parameter Prediction Standard Deviation: \n", np.sqrt(np.diag(Covar)))
print("Covariance matrix:\n",Covar)
print("Det(FIM) = ", np.linalg.det(FIM))
print("Eigen Values (FIM):\n", eigvals)
print("Eigen Vectors (FIM)\n", eigvecs)
print("Condition Number (FIM): ", k)
print("Degree of precision loss (log10(k) of FIM): ", math.log10(k))