In [1]:
# ########################################################## #
#                                                            #
# Name: KEV:Constant Evaluator                               #
# Author: GGamov                                             #
# Date: 2019                                                 #
#                                                            #
# ########################################################## #

# import libraries -------------------------------------------

import pandas as pd
from openpyxl import load_workbook
import re
import io
import os

# basic input ------------------------------------------------
 
def eq_scripts_load(_sep, _subdir, _file):
    
    # if specific file selected it should be XLSX one
    if _file != "":
        
        if _subdir != '':
            _subdir = '/' + _subdir
        _subdir = '../../input' + _subdir + '/'

        _file = _subdir + _file
        
        # open excel file
        with open(_file, "rb") as f:
            inmemory_file = io.BytesIO(f.read())
        wb = load_workbook(inmemory_file, read_only = True)
        
        # read data
        r = re.compile(r'^(input\_)*stoich(iometric)*\_coefficients*$')
        st_coeff_data = pd.read_excel(_file, sheet_name = list(filter(r.search, wb.sheetnames))[0])
        
        r = re.compile(r'^(input\_)*concentrations*$')
        con_data = pd.read_excel(_file, sheet_name = list(filter(r.search, wb.sheetnames))[0], header = 1)
        
        r = re.compile(r'^(input\_)*concentrations*$')
        type_con = pd.read_excel(_file, sheet_name = list(filter(r.search, wb.sheetnames))[0]
                                 , header = None, nrows = 1).iloc[0,:]
        
        r = re.compile(r'^(input\_)*k\_constants\_log10$')
        lg_k_data = pd.read_excel(_file, sheet_name = list(filter(r.search, wb.sheetnames))[0])
        
        r = re.compile(r'heats')
        heats_data = pd.read_excel(_file, sheet_name = list(filter(r.search, wb.sheetnames))[0])
        
        r = re.compile(r'targets')
        targets = pd.read_excel(_file, sheet_name = list(filter(r.search, wb.sheetnames))[0], header = None)
        
        r = re.compile(r'enthalpies')
        delta_H = pd.read_excel(_file, sheet_name = list(filter(r.search, wb.sheetnames))[0])
        
    # use a bunch of plain text files instead
    else:
          
        if _subdir != '':
            _subdir = '/' + _subdir
        _subdir = '../../input' + _subdir + '/'

        file_names = list(os.listdir(path = _subdir))

        r = re.compile(r'^(input\_)*stoich(iometric)*\_coefficients*')
        file = list(filter(r.search, file_names))[0]
        file = _subdir + str(file)
        st_coeff_data = pd.read_csv(file, sep = _sep)

        r = re.compile(r'^(input\_)*k\_constants\_log10')
        file = list(filter(r.search, file_names))[0]
        file = _subdir + str(file)
        lg_k_data = pd.read_csv(file, sep = _sep, decimal = ",")
        
        r = re.compile(r'^(input\_)*concentrations*')
        file = list(filter(r.search, file_names))[0]
        file = _subdir + str(file)
        con_data = pd.read_csv(file, sep = _sep, decimal = ",", header = 1)
        
        type_con = pd.read_csv(file, sep = _sep, header = None, nrows = 1).iloc[0,:]
        
        r = re.compile(r'heats')
        file = list(filter(r.search, file_names))[0]
        file = _subdir + str(file)
        heats_data = pd.read_csv(file, sep = _sep, decimal = ",")
        
        r = re.compile(r'targets')
        file = list(filter(r.search, file_names))[0]
        file = _subdir + str(file)
        targets = pd.read_csv(file, sep = _sep, header = None)
        
        r = re.compile(r'enthalpies')
        file = list(filter(r.search, file_names))[0]
        file = _subdir + str(file)
        delta_H = pd.read_csv(file, sep = _sep)

    return st_coeff_data, lg_k_data, con_data, type_con, heats_data, targets, delta_H 

In [2]:
# ########################################################## #
#                                                            #
# Name: KEV:Constant Evaluator                               #
# Author: GGamov                                             #
# Date: 2019                                                 #
#                                                            #
# ########################################################## #

# import libraries -------------------------------------------

import numpy as np
import re

# basic preprocessing ----------------------------------------
    
def eq_preproc(st_coeff_data, lg_k_data, con_data, type_con, heats_data, targets, delta_H):
    
    # checking if there are several series
    
    if 'series' not in con_data.columns:        
        con_data['series'], type_con[np.shape(st_coeff_data)[1]] = '', ''

    # series variables
    
    ser_info = con_data['series'].to_numpy()
    ser_unique = np.unique(ser_info)
    ser_num = np.shape(np.unique(ser_info))[0]

    # matrix of stoich coeff with formal reactions added
    st_coeff_matrix = st_coeff_data.drop('name', axis = 1).to_numpy()
    formal_matrix = np.eye(np.shape(st_coeff_matrix)[1], dtype = int)
    st_coeff_matrix = np.vstack((formal_matrix, st_coeff_matrix))
        
    # product names lists : full and base components only
    
    prod_names_con = list(con_data.drop('series', axis = 1))
    prod_names = prod_names_con + st_coeff_data['name'].tolist()
    
    # creating the vector of equilibrium constants including the formal reactions
    lg_k = (np.vstack((np.zeros((np.shape(st_coeff_matrix)[1], 1)), lg_k_data.to_numpy().astype(float))))
    
    # checking the consistency of reagent names in different sheets    
    if prod_names_con != list(st_coeff_data.drop('name', axis = 1)):
        print('Check the consistency of reagent names!')
    
    # split concentrations matrix
    con_matrix = [g for _, g in con_data.groupby(['series'])]
        
    for cnm_index, cnm in enumerate(con_matrix):
        con_matrix[cnm_index] = cnm.drop('series', axis = 1).to_numpy().astype(float)
    
    ser_counts = con_data.groupby(['series']).size().tolist();
    
    # creating vector of indices of components with predetermined concentrations
    ign_indices = np.array(type_con.index[type_con == 'eq'])
    
    # reading volumes from experimental data
    volumes = heats_data.drop('data', axis = 1).to_numpy()[0]
    
    # reading exp heats from experimental data
    heats = heats_data.drop('data', axis = 1).to_numpy()[1] - heats_data.drop('data', axis = 1).to_numpy()[2]
        
    devs = heats_data.drop('data', axis = 1).to_numpy()[3]
    
    # number of constant to find
    tar_names = set(targets.to_numpy()[0][1:])
    tar_num = [index for index, item in enumerate(prod_names) if item in tar_names]
    
    # number of enthalpy to find
    dH_ind = np.where(delta_H.drop('Reaction', axis = 1).to_numpy() == [])[0] + len(prod_names_con)
    
    return ser_num, st_coeff_matrix, prod_names, lg_k, prod_names_con, con_matrix, ign_indices, ser_counts, ser_info, type_con, volumes, heats, devs, tar_num, dH_ind  
    # ser_num, ser_counts, ser_info not further used yet! 

In [3]:
# input for xlsx file
_subdir = "calorimetry"
_sep = ";"
_file = "test_1.xlsx"

st_coeff_data, lg_k_data, con_data, type_con, heats_data, targets, delta_H = eq_scripts_load(_sep, _subdir, _file)

(ser_num, st_coeff_matrix, prod_names, lg_k, prod_names_con, con_matrix, ign_indices, ser_counts,
 ser_info, type_con, volumes, heats, devs, tar_num, dH_ind) = eq_preproc(st_coeff_data, lg_k_data, con_data, type_con, heats_data, 
                                                   targets, delta_H)

print('\nStoich coeff data')
print(st_coeff_data)

print('\nlg K')
print(lg_k_data)

print('\nconcentrations')
print(con_data)

print('\ntype con')
print(type_con)

print('\nexperimental data')
print(heats_data)

print('\ntargets')
print(targets)

print('\ndelta_H')
print(delta_H)


Stoich coeff data
   PLP  T3H  name
0    1    1  Comp

lg K
   lg_k
0  5.13

concentrations
         PLP           T3H series
0   0.000797  1.000000e-18       
1   0.000796  9.912290e-05       
2   0.000794  1.979165e-04       
3   0.000793  2.963825e-04       
4   0.000792  3.945225e-04       
5   0.000790  4.923381e-04       
6   0.000789  5.898309e-04       
7   0.000788  6.870025e-04       
8   0.000787  7.838545e-04       
9   0.000785  8.803885e-04       
10  0.000784  9.766060e-04       
11  0.000783  1.072509e-03       
12  0.000781  1.168098e-03       
13  0.000780  1.263375e-03       
14  0.000779  1.358342e-03       
15  0.000778  1.453001e-03       

type con
0    tot
1    tot
3       
Name: 0, dtype: object

experimental data
          data          1          2          3          4          5  \
0      volumes  15.000000  15.024997  15.049994  15.074991  15.099988   
1  observation   0.069951   0.069513   0.069854   0.071474   0.067108   
2     dilution  -0.001954   0.0

In [4]:
print('\nSeries')
print(ser_num)
print(ser_counts)
print(ser_info)

print('\nSt coeff matr')
print(st_coeff_matrix)

print('\nprod names')
print(prod_names)
print(prod_names_con)

print('\nconst')
print(lg_k)

print('\nconcentrations')
print(con_matrix)

print(ign_indices)

print('\ntype con')
print(type_con)

print('\nvolumes')
print(volumes)

print('\nheats')
print(heats)

print('\ndeviations')
print(devs)

print('\nwhat to find')
print(tar_num)
print(dH_ind)


Series
1
[16]
['' '' '' '' '' '' '' '' '' '' '' '' '' '' '' '']

St coeff matr
[[1 0]
 [0 1]
 [1 1]]

prod names
['PLP', 'T3H', 'Comp']
['PLP', 'T3H']

const
[[0.  ]
 [0.  ]
 [5.13]]

concentrations
[array([[7.97022000e-04, 1.00000000e-18],
       [7.95695999e-04, 9.91228990e-05],
       [7.94374403e-04, 1.97916525e-04],
       [7.93057190e-04, 2.96382517e-04],
       [7.91744338e-04, 3.94522502e-04],
       [7.90435825e-04, 4.92338095e-04],
       [7.89131631e-04, 5.89830903e-04],
       [7.87831733e-04, 6.87002520e-04],
       [7.86536110e-04, 7.83854532e-04],
       [7.85244742e-04, 8.80388513e-04],
       [7.83957608e-04, 9.76606026e-04],
       [7.82674686e-04, 1.07250863e-03],
       [7.81395956e-04, 1.16809785e-03],
       [7.80121398e-04, 1.26337525e-03],
       [7.78850991e-04, 1.35834233e-03],
       [7.77584715e-04, 1.45300060e-03]])]
[]

type con
0    tot
1    tot
3       
Name: 0, dtype: object

volumes
[15.       15.024997 15.049994 15.074991 15.099988 15.124985 15.14998