In [2]:
# %pip install openpyxl
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import scipy
import numpy as np
from scipy.integrate import solve_ivp
import glob
from deap import base, creator, tools, algorithms
import os

In [3]:
def import_dfs():
    # Path to the folder containing CSV files
    folder_path = '../dados/'

    # Get a list of all CSV files in the folder
    csv_files = glob.glob(os.path.join(folder_path, '5*.csv'))
    # Initialize an empty list to hold dataframes
    data_frames = []

    # Read the first CSV file to establish the schema
    schema = ['ester_mm', 'amox_mm', 'apa_mm', 'aoh_mm','apa_t']


    # Load each remaining CSV file, reorder columns, and append to the list
    for file in csv_files:
        df = pd.read_csv(file)  
        print(file)
        df = df[schema]  # Reorder columns to match the schema
        data_frames.append(df)
    
    
    # HARDCODED
    data_frames.append(pd.read_csv('../dados/nh60ab80.csv'))
    return data_frames
data_frames = import_dfs()
print(data_frames[0].columns)
x_names = ['ester_mm', 'amox_mm', 'apa_mm', 'aoh_mm']

../dados\5.100nh40ab80.csv
../dados\5.102nh20ab40.csv
../dados\5.103nh40ab30.csv
../dados\5.106nh100ab40.csv
../dados\5.107nh55ab55.csv
../dados\5.109nh5ab55.csv
../dados\5.110nh10ab55.csv
../dados\5.112nh78ab35.csv
../dados\5.48nh20ab80.csv
../dados\5.80nh12ab40.csv
../dados\5.81nh5ab30.csv
Index(['ester_mm', 'amox_mm', 'apa_mm', 'aoh_mm', 'apa_t'], dtype='object')


In [4]:
def enzymic_amox(t, y, kcat1, kcat2, Km1, Km2, Tmax, Ken, kAB, kAN, kAOH, kNH):
    FAB = 0
    FNH = 0 

    CAB = y[0]
    CAN = y[1]
    CNH = y[2]
    CAOH = y[3]

    Cez = 1  # Assuming a constant value for Cez if not provided

    # Consumo de ester
    VAB = (kcat1 * CAB * Cez) / ((Km1 * (1 + (CAN/kAN) + (CAOH/kAOH))) + CAB)
    
    # Hidrolise de amoxicilina
    VAN = (kcat2 * CAN * Cez) / ((Km2 * (1 + (CAB/kAB) + (CNH/kNH) + (CAOH/kAOH))) + CAN)
    
    # Enzima saturada com 6-apa
    X = CNH / (Ken + CNH)
    
    # Sintese enzimatica
    VS = VAB * Tmax * X

    # Hidrolise de ester
    Vh1 = (VAB - VS) 

    dy = np.zeros(4)

    # C. ester
    dy[0] = (-(VS - VAN) - (Vh1 + VAN)) + FAB 
    
    # C. amox
    dy[1] = VS - VAN                        
    
    # C. 6-apa
    dy[2] = -(VS - VAN) + FNH                
    
    # C. POHPG
    dy[3] = Vh1 + VAN
    
    return dy


In [5]:
kcat1        = 0.181
kcat2        = 0.395
Km1          = 5.449
Km2          = 1.694
Tmax         = 0.824
Ken          = 7.947
kAB          = 0.682
kAN          = 1.989
kAOH         = 9.856
kNH          = 9.763

P = np.zeros(10)
P[0]   = kcat1    
P[1]   = kcat2    
P[2]   = Km1      
P[3]   = Km2      
P[4]   = Tmax     
P[5]   = Ken      
P[6]   = kAB      
P[7]   = kAN      
P[8]   = kAOH     
P[9]  = kNH 
Np = len(P)



In [6]:
def ode15s_amox(P, CI, t):
    try:
        sol = solve_ivp(
            enzymic_amox, 
            t_span=(t[0], t[-1]), 
            t_eval=t, 
            y0=CI, 
            method='BDF', 
            args=P, 
            #atol=1e-8, 
            #tol=1e-6
        )
        if sol.status != 0:
            raise ValueError("ODE solver failed to converge")
        return sol.y
    except Exception as e:
        print(f"Solver failed with error: {e}")
        # Handle solver failure (e.g., return NaNs or retry with different parameters)
        return np.full((len(CI), len(t)), np.nan)
    

In [7]:
def calculate_error_mass_reg(model_output, experimental_data,params):

    exp_mass = np.sum(model_output, axis=1)    
    
    model_mass = np.sum(experimental_data, axis=1)


    mass_conservation_penalty = np.sum((exp_mass - model_mass)**2)

    total_error = np.sum((model_output - experimental_data) ** 2) + mass_conservation_penalty
    return total_error

In [8]:
def weighted_least_squares_with_covariance(observations, predictions):
    """
    Compute the weighted least squares cost using the covariance matrix from residuals.

    Parameters:
    observations : numpy array
        Observed data of shape (n_samples, n_features).
    predictions : numpy array
        Predicted data of shape (n_samples, n_features).

    Returns:
    total_cost : float
        Total weighted least squares cost.
    covariance_matrix : numpy array
        Covariance matrix of the residuals.
    """
    # Calculate residuals
    residuals = observations - predictions

    # Calculate the covariance matrix from the residuals
    covariance_matrix = np.cov(residuals, rowvar=False)

    # Calculate the inverse of the covariance matrix to get the weight matrix
    W = np.linalg.inv(covariance_matrix)

    # Compute the weighted least squares cost
    total_cost = 0
    for res in residuals:
        total_cost += np.dot(np.dot(res.T, W), res)
    
    return total_cost

In [9]:
def objective_function(params):
    total_error = 0
    for df in data_frames:
        x_target = df.loc[:,x_names].to_numpy()
        t_target = df['apa_t'].to_numpy()
        ic = x_target[0]
        sol = ode15s_amox(params,ic,t_target)
        sol = sol.T
        if np.isnan(sol).any():
            return (np.inf,)  # Return a large cost if the solver fails
        total_error += weighted_least_squares_with_covariance(sol,x_target)
    print(total_error,end="\r")
    return (total_error,)

In [10]:
creator.create("FitnessMin", base.Fitness, weights=(-1.0,))
creator.create("Individual", list, fitness=creator.FitnessMin)
labels = ['kcat1','kcat2','Km1','Km2',  'Tmax', 'Ken',  'kAB',  'kAN',  'kAOH', 'kNH']
eps = 0.001


param_bounds = [(eps,4), # Kcat1
              (eps,4), # Kcat2
              (eps,30), # Km1
              (eps,30), # Km2
              (eps,1), # Tmax
              (eps,50), # Ken
              (eps,5000), # kAB
              (eps,500), # kAN
              (eps,500), # kAOH
              (eps,5000), # kNH
              ] 
def create_individual():
    return creator.Individual([np.random.uniform(low, high) for low, high in param_bounds])

toolbox = base.Toolbox()
toolbox.register("individual", create_individual)
toolbox.register("population", tools.initRepeat, list, toolbox.individual)

toolbox.register("mate", tools.cxBlend, alpha=0.7)
toolbox.register("mutate", tools.mutGaussian, mu=0, sigma=0.5, indpb=0.2)
toolbox.register("select", tools.selTournament, tournsize=3)
toolbox.register("evaluate", objective_function)

In [11]:
population = toolbox.population(n=100)
n_generations = 200

# Statistics to keep track of the optimization process
stats = tools.Statistics(lambda ind: ind.fitness.values)
stats.register("avg", np.mean)
stats.register("min", np.min)
stats.register("max", np.max)

In [12]:
population, logbook = algorithms.eaSimple(population, toolbox, cxpb=0.5, mutpb=0.2, ngen=n_generations, 
                                            stats=stats, verbose=True)

gen	nevals	avg    	min    	max    
0  	100   	1346.36	1235.03	1411.03
Solver failed with error: ODE solver failed to converge
Solver failed with error: ODE solver failed to converge
Solver failed with error: ODE solver failed to converge
Solver failed with error: ODE solver failed to converge
Solver failed with error: ODE solver failed to converge
Solver failed with error: ODE solver failed to converge
Solver failed with error: ODE solver failed to converge
Solver failed with error: ODE solver failed to converge
Solver failed with error: ODE solver failed to converge
Solver failed with error: ODE solver failed to converge
Solver failed with error: ODE solver failed to converge
Solver failed with error: ODE solver failed to converge
Solver failed with error: ODE solver failed to converge
Solver failed with error: ODE solver failed to converge
Solver failed with error: ODE solver failed to converge
1  	66    	inf    	1230.78	inf    
Solver failed with error: ODE solver failed to converge

  return lu_factor(A, overwrite_a=True)


Solver failed with error: ODE solver failed to converge
Solver failed with error: ODE solver failed to converge
Solver failed with error: ODE solver failed to converge
Solver failed with error: ODE solver failed to converge
Solver failed with error: ODE solver failed to converge
Solver failed with error: ODE solver failed to converge
Solver failed with error: ODE solver failed to converge
Solver failed with error: ODE solver failed to converge
Solver failed with error: ODE solver failed to converge
12 	59    	inf    	1171.17	inf    
Solver failed with error: ODE solver failed to converge
Solver failed with error: ODE solver failed to converge
Solver failed with error: ODE solver failed to converge
Solver failed with error: ODE solver failed to converge
13 	65    	inf    	1168.97	inf    
Solver failed with error: ODE solver failed to converge
Solver failed with error: ODE solver failed to converge
Solver failed with error: ODE solver failed to converge
Solver failed with error: ODE solv

In [26]:
best_individual = tools.selBest(population, k=1)[0]
print("Best individual is: %s\nwith fitness: %s" % (best_individual, best_individual.fitness.values))
all_par =pd.DataFrame(np.array(population)).loc[pd.DataFrame(np.array(population)).iloc[:,4] < 1,:].to_numpy()
#pd.DataFrame(np.array(population))
#p = [4.72190691e-01, 9.91495733e-02, 4.15003632e+01, 2.26537541e+01,
#       1.29787111e+00, 3.75569963e+01, 1.77107182e+03, 2.31904940e+00,
#       1.91229927e+01, 5.17354504e+02]
all_par.shape

Best individual is: [0.18091324144347565, 0.6487804502653451, 14.517140241628688, -29.772290459747346, -0.7369871937865347, 45.45381695499116, 2811.6697995160357, 190.1928700411113, 80.8706161503031, 4737.2859651561275]
with fitness: (1149.819282580532,)


(100, 10)

In [27]:
P_new = np.array(best_individual)

In [30]:
labels = ['kcat1',
'kcat2',
'Km1',
'Km2',  
'Tmax', 
'Ken',  
'kAB',  
'kAN',  
'kAOH', 
'kNH']
for i in range(10):
    print(labels[i],': ',P_new[i])

kcat1 :  0.18091324144347565
kcat2 :  0.6487804502653451
Km1 :  14.517140241628688
Km2 :  -29.772290459747346
Tmax :  -0.7369871937865347
Ken :  45.45381695499116
kAB :  2811.6697995160357
kAN :  190.1928700411113
kAOH :  80.8706161503031
kNH :  4737.2859651561275


In [31]:
def model_eval(x_model, t_model, val_x, val_t):
    fig, axis = plt.subplots(2, 2, figsize=[6, 6])
    labels = ['POH-PGME','Amoxicillin','6-APA','POHPG']
    mlabels = ['POH-PGME model','Amoxicillin model','6-APA model','POHPG model']
    style = ['.r','.g','.b','.k']
    mstyle = ['-k','-k','-k','-k']    
    for idx,ax in enumerate(axis.flatten()):
        ax.plot(val_t, val_x[:, idx], style[idx], label=labels[idx],linewidth=0.5,markersize=5)
        ax.plot(t_model, x_model[:, idx], mstyle[idx], label=mlabels[idx],linewidth=0.9,markersize=3)
        ax.set_title(f'{labels[idx]}')
        ax.set_ylim([val_x[:, idx].min()-5, val_x[:, idx].max()+5])
        ax.set_xlim([0, val_t[-1]+5])
        ax.legend()

In [32]:
def mass_deviation(model_output, experimental_data):
    
    exp_mass = np.sum(model_output, axis=1)    
    model_mass = np.sum(experimental_data, axis=1)

    mass_deviation = np.sum(np.abs(exp_mass - model_mass))
    return mass_deviation

In [18]:
from sklearn.metrics import root_mean_squared_error,r2_score
import seaborn as sns
def eval_all(P_new):
    total_error = 0
    surface_error = np.zeros((len(data_frames),3)) # columns = mse error, ester init, apa init 
    total_mass_error = 0
    for idx,df in enumerate(data_frames):
        x_target = df.loc[:,x_names].to_numpy()
        t_target = df['apa_t'].to_numpy()
        
        ic = x_target[0]
        surface_error[idx,1] = ic[0] # ester
        surface_error[idx,2] = ic[2] # apa
        
        t_model = np.linspace(t_target[0],t_target[-1],40)
        
        sol = ode15s_amox(P_new,ic,t_model)
        sol_val = ode15s_amox(P_new,ic,t_target).T

        error = root_mean_squared_error(x_target, sol_val)
        mass_error = mass_deviation(sol_val,x_target)
        r2 = r2_score(x_target,sol_val)

        surface_error[idx,0] = error

        total_error += error
        total_mass_error += mass_error
        print(ic)
        print(f'Error {ic}: {error}\tmass: {mass_error}\tr2: {r2}')

        x_model = sol.T
        #model_eval(x_model,t_model,x_target,t_target)
    surfacedf = pd.DataFrame(surface_error,columns=['RMSE','Ester','APA'])
    
    #plt.figure(figsize=(6,6))
    #sns.scatterplot(data=surfacedf, x='Ester', y='APA', size='RMSE', hue='RMSE', sizes=(20, 200),legend='full')
    #plt.legend(title='Error Value', loc='upper right', bbox_to_anchor=(-0.15, 1))
    #sns.heatmap(surfacedf,annot=True,fmt=".3f")
    print(f'Total error: {total_error}\tmass error:{total_mass_error}')
    return total_error,total_mass_error
#eval_all(P_new)

In [19]:
eval_all(best_individual)

[80.  0. 40.  0.]
Error [80.  0. 40.  0.]: 79.08921509701742	mass: 1047.996761074141	r2: -238.73032474328156
[40.5         0.         21.75        3.37262013]
Error [40.5         0.         21.75        3.37262013]: 53.14487233294213	mass: 613.5734178426054	r2: -1426.3834115474717
[30.4   0.   43.    2.55]
Error [30.4   0.   43.    2.55]: 56.65449244959389	mass: 686.6652963439358	r2: -1091.5087502540423
[4.00389864e+01 6.40149834e-02 1.00045181e+02 1.42108547e+00]
Error [4.00389864e+01 6.40149834e-02 1.00045181e+02 1.42108547e+00]: 39.36245859271387	mass: 369.13849293324745	r2: -261.9362919389021
[55.        0.       55.        0.623377]
Error [55.        0.       55.        0.623377]: 3.3027894921308247	mass: 27.431657912840393	r2: -10.907379454647893
[55.  0.  5.  0.]
Error [55.  0.  5.  0.]: 2.419093240284645	mass: 6.633675536713724	r2: -4.368743566746588
[55.  0. 10.  0.]
Error [55.  0. 10.  0.]: 2.2913226193624676	mass: 22.379280302006862	r2: -2.8692184646620427
[32.5  0.  78.   2

(421.5639914311987, 4655.272240496401)

In [20]:
# %pip install openpyxl
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import scipy
import numpy as np
from scipy.integrate import solve_ivp
from scipy.optimize import minimize
from sklearn.metrics import r2_score 
from sklearn.metrics import root_mean_squared_error
from scipy.optimize import differential_evolution, minimize
import glob
import os

In [None]:
all_par_df = pd.DataFrame(np.zeros((len(all_par),len(all_par[0])+2)),columns=[*labels,'RMSE','Mass_error'])
print(all_par_df)
for idx,par in enumerate(all_par):
    method = 'Nelder-Mead'
    options = {'maxiter':2000}
    record = []
    all_error = []

    # Maybe it will not be the best optimization, some parameters values must be high
    result = minimize(objective_function, par, method=method,bounds=param_bounds,options=options)

    # Optimized parameters
    optimized_parameters = result.x
    print("Optimized Parameters:", optimized_parameters)
    P_new = optimized_parameters
    total_error, total_mass = eval_all(P_new)
    print(np.hstack((np.array(P_new),np.array([total_error,total_mass]))))
    all_par_df.iloc[idx,:] = np.hstack((np.array(P_new),np.array([total_error,total_mass])))


In [3]:
all_par_df.sort_values('RMSE')

NameError: name 'all_par_df' is not defined

In [23]:
all_par_df.to_csv('./all_par_ga_mut.csv')

In [1]:
print('a')

a
