# Consolidating the NIST Keq data 

The Keqs of the NIST CSV file are condensed into a JSON file. The Keq values are complemented in the JSON with the experimental temperatures and pH values that will guide user selection of the appropriate Keq value to describe the system. The values are respectively listed with the references to facilitate verification of the reference source for each provided datum. The JSON may readily be imported to a Python scripted and used in calculations or model development.

In [8]:
#import libraries
import pandas
import numpy
import math
import json
import re

#import the final CSV file
final_csv = pandas.read_csv('2021-05-06_vetted & reorganized NIST database_01.csv')

#acquire a list of all enzymes
enzyme_list = []
empty_cell = ['nan', 'NaN', 'none', 'not given', '', ' ', None, numpy.nan]
for index, row in final_csv.iterrows():
    if final_csv.at[index, 'Enzyme:'] not in enzyme_list and final_csv.at[index, 'Enzyme:'] not in empty_cell:
        enzyme_list.append(final_csv.at[index, 'Enzyme:'])      
        
enzymes = []
for original_enzyme in enzyme_list:
    enzyme_name = re.search('(\w.*)',original_enzyme)
    enzymes.append(enzyme_name.group())

data_per_enzyme = {}
for enzyme in enzymes:
    # lists of the database varialbes
    keq_values_per_enzyme = []
    km_values_per_enzyme = []
    enthalpy_values_per_enzyme = []
    temperatures_per_enzyme = []
    phs_per_enzyme = []
    
    # lists of identifying whether the reference contains the identified variable 
    references_of_an_enzyme = []
    reaction_of_an_enzyme = []
    km_in_the_reference = []
    enthalpy_in_the_reference = []
    keqs_in_a_reference = []
    for index, row in final_csv.iterrows():
        iteration = 0
        if final_csv.at[index, 'Enzyme:'] == ' %s' %(enzyme):
            reaction_of_an_enzyme.append(final_csv.at[index, 'Reaction:'])
            if final_csv.at[index, 'Reference:'] not in empty_cell:
                references_of_an_enzyme.append(final_csv.at[index, 'Reference:'])
            elif final_csv.at[index, 'Reference:'] in empty_cell:
                references_of_an_enzyme.append('Ibid')
            
            # clean keqs are added to a list
            if final_csv.at[index, 'Keq'] not in empty_cell:
                #print(final_csv.at[index, 'Keq'])
                cleaned_keq = re.search('(\-?\d+\.?\d*)', '%s' %(final_csv.at[index, 'Keq']))
                #print(cleaned_keq.group())
                keq_values_per_enzyme.append(float(cleaned_keq.group())) 
                keqs_in_a_reference.append('True')
                temperatures_per_enzyme.append(final_csv.at[index, 'T [K]'])
                if final_csv.at[index, 'pH '] not in empty_cell:
                    phs_per_enzyme.append(final_csv.at[index, 'pH '])
                elif final_csv.at[index, 'pH '] in empty_cell:
                    phs_per_enzyme.append('nan')
            
            elif final_csv.at[index, 'Keq'] in empty_cell:
                keqs_in_a_reference.append('False')    
                
            # clean kms are added to a list
            if final_csv.at[index, 'Km'] not in empty_cell:
                #print(final_csv.at[index, 'Km\'])
                cleaned_km = re.search('(\-?\d+\.?\d*)', '%s' %(final_csv.at[index, 'Km']))
                #print(cleaned_km.group())
                km_values_per_enzyme.append(float(cleaned_km.group())) 
                if final_csv.at[index, 'Keq'] in empty_cell:
                    temperatures_per_enzyme.append(final_csv.at[index, 'T [K]'])
                    if final_csv.at[index, 'pH '] not in empty_cell:
                        phs_per_enzyme.append(final_csv.at[index, 'pH '])
                    elif final_csv.at[index, 'pH '] in empty_cell:
                        phs_per_enzyme.append('nan')
                    
            elif final_csv.at[index, 'Km'] in empty_cell:
                km_in_the_reference.append('False')
                
            # clean enthalpy values are added to a list
            if final_csv.at[index, 'Enthalpy [kJ / mol]'] not in empty_cell:
                #print(final_csv.at[index, 'Km\'])
                cleaned_enthalpy = re.search('(\-?\d+\.?\d*)', '%s' %(final_csv.at[index, 'Enthalpy [kJ / mol]']))
                #print(cleaned_km.group())
                enthalpy_in_the_reference.append('True')
                enthalpy_values_per_enzyme.append(float(cleaned_enthalpy.group())) 
                if final_csv.at[index, 'Keq'] in empty_cell and final_csv.at[index, 'Km'] in empty_cell:
                    temperatures_per_enzyme.append(final_csv.at[index, 'T [K]'])
                    if final_csv.at[index, 'pH '] not in empty_cell:
                        phs_per_enzyme.append(final_csv.at[index, 'pH '])
                    elif final_csv.at[index, 'pH '] in empty_cell:
                        phs_per_enzyme.append('nan')
                    
            elif final_csv.at[index, 'Enthalpy [kJ / mol]'] in empty_cell:
                enthalpy_in_the_reference.append('False')
                
                
            #loop through the unlabeled rows of each enzyme
            while final_csv.at[index + iteration, 'Enzyme:'] in empty_cell:
                if final_csv.at[index, 'Keq'] not in empty_cell:
                    #clean keqs are added to a list
                    cleaned_keq = re.search('(\-?\d+\.?\d*)', '%s' %(final_csv.at[index, 'Keq']))
                    #print(cleaned_keq)
                    keq_values_per_enzyme.append(float(cleaned_keq.group())) 
                    keqs_in_a_reference.append('True')
                    temperatures_per_enzyme.append(final_csv.at[index, 'T [K]'])
                    if final_csv.at[index, 'pH '] not in empty_cell:
                        phs_per_enzyme.append(final_csv.at[index, 'pH '])
                    elif final_csv.at[index, 'pH '] in empty_cell:
                        phs_per_enzyme.append('nan')

                elif final_csv.at[index, 'Keq'] in empty_cell:
                    keqs_in_a_reference.append('False')  
                    
                #clean kms are added to a list
                if final_csv.at[index, 'Km'] not in empty_cell:
                    #print(final_csv.at[index, 'Km\'])
                    cleaned_km = re.search('(\-?\d+\.?\d*)', '%s' %(final_csv.at[index, 'Km']))
                    #print(cleaned_km.group())
                    km_in_a_reference.append('True')
                    km_values_per_enzyme.append(float(cleaned_km.group())) 
                    if final_csv.at[index, 'Keq'] in empty_cell:
                        temperatures_per_enzyme.append(final_csv.at[index, 'T [K]'])
                        if final_csv.at[index, 'pH '] not in empty_cell:
                            phs_per_enzyme.append(final_csv.at[index, 'pH '])
                        elif final_csv.at[index, 'pH '] in empty_cell:
                            phs_per_enzyme.append('nan')

                elif final_csv.at[index, 'Km'] in empty_cell:
                    km_in_the_reference.append('False')
                    
                # clean enthalpy values are added to a list
                if final_csv.at[index, 'Enthalpy [kJ / mol]'] not in empty_cell:
                    #print(final_csv.at[index, 'Km\'])
                    cleaned_ethalpy = re.search('(\-?\d+\.?\d*)', '%s' %(final_csv.at[index, 'Enthalpy [kJ / mol]']))
                    #print(cleaned_km.group())
                    enthalpy_in_the_reference.append('True')
                    enthalpy_values_per_enzyme.append(float(cleaned_enthalpy.group())) 
                    if final_csv.at[index, 'Keq'] in empty_cell and final_csv.at[index, 'Km'] in empty_cell:
                        temperatures_per_enzyme.append(final_csv.at[index, 'T [K]'])
                        if final_csv.at[index, 'pH '] not in empty_cell:
                            phs_per_enzyme.append(final_csv.at[index, 'pH '])
                        elif final_csv.at[index, 'pH '] in empty_cell:
                            phs_per_enzyme.append('nan')

                elif final_csv.at[index, 'Enthalpy [kJ / mol]'] in empty_cell:
                    enthalpy_in_the_reference.append('False')
                    
                    
                #proceed to the next loop
                if iteration + index < 3979:
                    iteration += 1

    #processing the average and standard deviation Keq values
    if len(keq_values_per_enzyme) != 0:
        average_keq_per_enzyme = sum(keq_values_per_enzyme) / len(keq_values_per_enzyme)
        standard_deviation_keq_per_enzyme = math.sqrt(sum([(x - average_keq_per_enzyme)**2 for x in keq_values_per_enzyme]) / len(keq_values_per_enzyme))

    elif len(keq_values_per_enzyme) == 0:
        average_keq_per_enzyme = 'nan'
        standard_deviation_keq_per_enzyme = 'nan'
        
    #processing the average and standard deviation Km values
    if len(km_values_per_enzyme) != 0:
        average_km_per_enzyme = sum(km_values_per_enzyme) / len(km_values_per_enzyme)
        standard_deviation_km_per_enzyme = math.sqrt(sum([(x - average_km_per_enzyme)**2 for x in km_values_per_enzyme]) / len(km_values_per_enzyme))

    elif len(km_values_per_enzyme) == 0:
        average_km_per_enzyme = 'nan'
        standard_deviation_km_per_enzyme = 'nan' 
        
    #processing the average and standard deviation enthalpy values
    if len(enthalpy_values_per_enzyme) != 0:
        average_enthalpy_per_enzyme = sum(enthalpy_values_per_enzyme) / len(enthalpy_values_per_enzyme)
        standard_deviation_enthalpy_per_enzyme = math.sqrt(sum([(x - average_enthalpy_per_enzyme)**2 for x in enthalpy_values_per_enzyme]) / len(enthalpy_values_per_enzyme))

    elif len(enthalpy_values_per_enzyme) == 0:
        average_enthalpy_per_enzyme = 'nan'
        standard_deviation_enthalpy_per_enzyme = 'nan' 
        
        
    #store the information into a nested dictionary structure
    data_per_enzyme[enzyme] = {'reaction':reaction_of_an_enzyme,
                               'experimental temperatures':temperatures_per_enzyme,
                               'experimental phs':phs_per_enzyme,
                               'keq reference':references_of_an_enzyme,
                               'Keq':{'keq values in the reference':keqs_in_a_reference,
                                       'keqs':keq_values_per_enzyme, 
                                       'keq quantity':len(keq_values_per_enzyme), 
                                       'keq average':average_keq_per_enzyme, 
                                       'keq standard deviation':standard_deviation_keq_per_enzyme},
                               'Km':{'km values in the reference':km_in_the_reference,
                                    'km values':km_values_per_enzyme,
                                    'km average':average_km_per_enzyme,
                                    'km standard deviation':standard_deviation_km_per_enzyme},
                               'Enthalpy':{'enthalpy values in the reference':enthalpy_in_the_reference,
                                         'enthalpy values':enthalpy_values_per_enzyme,
                                         'enthalpy average':average_enthalpy_per_enzyme,
                                         'enthalpy standard deviation':standard_deviation_enthalpy_per_enzyme}
                              }


#export the dictionary as a JSON file
with open('2021-05-06_NIST consolidated.json', 'w') as output:
    json.dump(data_per_enzyme, output, indent = 4)

In [42]:
import pandas
import numpy

old = pandas.read_csv('2021-03-21_vetted + reorganized NIST_1.csv')
new = pandas.read_csv('2021-05-06_vetted & reorganized NIST database_01.csv')

empty_cell = ['nan', 'NaN', 'none', 'not given', '', ' ', None, numpy.nan]

# match the indexes for the reference IDs
reference_ids = {}
old_ids = old['Reference ID:'].tolist()
new_ids = new['Reference ID:'].tolist()
for id in old_ids:
    if id in empty_cell:
        continue
    old_id = old_ids.index(id)
    new_id = new_ids.index(id)
    reference_ids[id] = {'Old index': old_id,
                         'New index': new_id}

for id in reference_ids:
    print('\n')
    valid = True
    old_index = reference_ids[id]['Old index']
    new_index = reference_ids[id]['New index']
    '''display(old.iloc[old_index])
    display(new.iloc[new_index])'''
    while valid:
        '''if old.at[old_index, 'Enzyme'] != new.at[new_index, 'Enzyme:']:
            if old.at[old_index, 'Enzyme'] not in empty_cell:
                print('Old enzyme, index {}:'.format(old_index), old.at[old_index, 'Enzyme'])
                print('New enzyme, index {}:'.format(new_index), new.at[new_index, 'Enzyme:'])
                valid = False
        
        if old.at[old_index, 'Reaction'] != new.at[new_index, 'Reaction:']:
            if old.at[old_index, 'Reaction'] not in empty_cell:
                print('Old reaction, index {}:'.format(old_index), old.at[old_index, 'Reaction'])
                print('New reaction, index {}:'.format(new_index), new.at[new_index, 'Reaction:'])
                valid = False'''
        
        if old.at[old_index, 'T [K]'] != new.at[new_index, 'T [K]']:
            if old.at[old_index, 'T [K]'] not in empty_cell:
                print('Old temperature, index {}:'.format(old_index), old.at[old_index, 'T [K]'])
                print('New temperature, index {}:'.format(new_index), new.at[new_index, 'T [K]'])
                valid = False
        
        if old.at[old_index, 'Keq'] != new.at[new_index, 'Keq']:
            if old.at[old_index, 'Keq'] not in empty_cell:
                print('Old Keq, index {}:'.format(old_index), old.at[old_index, 'Keq'])
                print('New Keq, index {}:'.format(new_index), new.at[new_index, 'Keq'])
                valid = False
        
        if old.at[old_index, 'Enthalpy [kJ / mol]'] != new.at[new_index, 'Enthalpy [kJ / mol]']:
            if old.at[old_index, 'Enthalpy [kJ / mol]'] not in empty_cell:
                print('Old enthalpy, index {}:'.format(old_index), old.at[old_index, 'Enthalpy [kJ / mol]'])
                print('New enthalpy, index {}:'.format(new_index), new.at[new_index, 'Enthalpy [kJ / mol]'])
                valid = False
        
        if old.at[old_index, 'Km'] != new.at[new_index, 'Km']:
            if old.at[old_index, 'Km'] not in empty_cell:
                print('Old Km, index {}:'.format(old_index), old.at[old_index, 'Km'])
                print('New Km, index {}:'.format(new_index), new.at[new_index, 'Km'])
                valid = False
            
        if old.at[old_index, 'pH '] != new.at[new_index, 'pH ']:
            if old.at[old_index, 'pH '] not in empty_cell:
                print('Old pH, index {}:'.format(old_index), old.at[old_index, 'pH '])
                print('New pH, index {}:'.format(new_index), new.at[new_index, 'pH '])
                valid = False
        
        old_index += 1
        new_index += 1
    
    try:
        if re.search('(\d\d\w+.?\w+)', old.at[old_index, 'Reference ID:']):   
            print('{} is equivalent.'.format(id))
            
        elif not re.search('(\d\d\w+.?\w+)', old.at[old_index, 'Reference ID:']):   
            print('{} is different.'.format(id))
    
    except:
        print('ERROR\n', '='*len('ERROR'))
        print('Old Reference ID {}, index {}:'.format(id, old_index),old.at[old_index, 'Reference ID:'])
        
    '''for index, row in old.iterrows():
        if id == old.at[index, row['Reference ID:']]: 
            for index2, row2 in new.iterrows():
                while old.at[index, row] == new.at[index2, row2]:'''            



Old temperature, index 18: 301.15
New temperature, index 18: 266.15
Old Keq, index 18: 2.3
New Keq, index 18: 0.00018
26QUA/WOO_1205 is equivalent.


Old temperature, index 18: 301.15
New temperature, index 18: 266.15
Old Keq, index 18: 2.3
New Keq, index 18: 0.00018
29WOO_1206 is equivalent.


Old temperature, index 18: 301.15
New temperature, index 18: 266.15
Old Keq, index 18: 2.3
New Keq, index 18: 0.00018
31BOR/SCH_1141 is equivalent.


Old temperature, index 18: 301.15
New temperature, index 18: 266.15
Old Keq, index 18: 2.3
New Keq, index 18: 0.00018
34JAC_1142 is equivalent.


Old temperature, index 18: 301.15
New temperature, index 18: 266.15
Old Keq, index 18: 2.3
New Keq, index 18: 0.00018
34LOH/MEY_1169 is equivalent.


Old temperature, index 18: 301.15
New temperature, index 18: 266.15
Old Keq, index 18: 2.3
New Keq, index 18: 0.00018
34MEY/LOH_1100 is equivalent.


Old pH, index 19: 7
New pH, index 24: 7.0
ERROR
 =====
Old Reference ID 35AKA_1170, index 20: nan


Old pH

New pH, index 1080: 7.0
ERROR
 =====
Old Reference ID 59TAL/LEV_183, index 1039: nan


Old Keq, index 1041: 1.88E-02
New Keq, index 1083: 0.0188
Old pH, index 1041: 7
New pH, index 1083: 7.0
ERROR
 =====
Old Reference ID 59TAL/LEV_184, index 1042: nan


Old Keq, index 1044: 9.90E-03
New Keq, index 1086: 0.0099
Old pH, index 1044: 7
New pH, index 1086: 7.0
ERROR
 =====
Old Reference ID 59TAL/LEV_185, index 1045: nan


Old Keq, index 1047: 1.22E-06
New Keq, index 1089: 1.22e-06
59VAG/EAR_1221 is equivalent.


Old pH, index 1051: 8
New pH, index 1093: 8.0
ERROR
 =====
Old Reference ID 60AGO/ARA_1336, index 1052: nan


Old pH, index 1051: 8
New pH, index 1093: 8.0
ERROR
 =====
Old Reference ID 60ASH/WAH_1368, index 1052: nan


Old pH, index 1053: 8
New pH, index 1095: 8.0
60ASH/WAH_1369 is equivalent.


Old Keq, index 1056: 1
New Keq, index 1098: 1.0
60BLA_349 is equivalent.


Old Keq, index 1056: 1
New Keq, index 1098: 1.0
60COM/ROS_1117 is equivalent.


Old Keq, index 1056: 1
New Keq, in

New Keq, index 1929: 27.0
ERROR
 =====
Old Reference ID 71KAT_220, index 1878: nan


Old Keq, index 1877: 27
New Keq, index 1929: 27.0
ERROR
 =====
Old Reference ID 71KUN/STA_1384, index 1878: nan


Old Keq, index 1877: 27
New Keq, index 1929: 27.0
ERROR
 =====
Old Reference ID 71KUN/STA_1422, index 1878: nan


Old Keq, index 1877: 27
New Keq, index 1929: 27.0
ERROR
 =====
Old Reference ID 71MCC/CHA_959, index 1878: nan


Old Keq, index 1877: 27
New Keq, index 1929: 27.0
ERROR
 =====
Old Reference ID 71NOJ/TAN_399, index 1878: nan


Old Keq, index 1877: 27
New Keq, index 1929: 27.0
ERROR
 =====
Old Reference ID 71RAJ/LUM_965, index 1878: nan


Old Keq, index 1877: 27
New Keq, index 1929: 27.0
ERROR
 =====
Old Reference ID 71ROB_710, index 1878: nan


Old Keq, index 1877: 27
New Keq, index 1929: 27.0
ERROR
 =====
Old Reference ID 71RUD/JOH_833, index 1878: nan


Old Keq, index 1877: 27
New Keq, index 1929: 27.0
ERROR
 =====
Old Reference ID 71RUD/JOH_834, index 1878: nan


Old Keq, inde

Old enthalpy, index 3255: 23.7
New enthalpy, index 3313: 24.2
Old pH, index 3255: 7.32
New pH, index 3313: 7.3
86GOL/GAJ_1213 is equivalent.


Old pH, index 3257: 7
New pH, index 3316: 7.0
86GRA/ELL_620 is equivalent.


Old pH, index 3257: 7
New pH, index 3316: 7.0
86HUB/HUR_930 is equivalent.


Old pH, index 3260: 8
New pH, index 3319: 8.0
86KIM/LEE_601 is equivalent.


Old pH, index 3260: 8
New pH, index 3319: 8.0
86KON/POL_121 is equivalent.


Old pH, index 3260: 8
New pH, index 3319: 8.0
86KUP/FER_661 is equivalent.


Old Keq, index 3261: 1.40E-05
New Keq, index 3320: 1.4e-05
Old pH, index 3261: 7
New pH, index 3320: 7.0
86MEI/GAD_111 is equivalent.


Old Keq, index 3262: 3.00E-06
New Keq, index 3321: 3e-06
Old pH, index 3262: 7
New pH, index 3321: 7.0
86MEI/GAD_302 is equivalent.


Old Keq, index 3263: 1.00E-06
New Keq, index 3322: 1e-06
Old pH, index 3263: 7
New pH, index 3322: 7.0
86MEI/GAD_306 is equivalent.


Old Keq, index 3264: 3.70E-07
New Keq, index 3323: 3.7e-07
Old pH, i

KeyError: 3980

In [71]:
import pandas
import numpy

old = pandas.read_csv('2021-03-21_vetted + reorganized NIST_1.csv')
old = old.fillna(' ')
#display(old)
new = pandas.read_csv('2021-05-06_vetted & reorganized NIST database_01.csv')
#display(new)

empty_cell = ['nan', 'NaN', 'none', 'not given', '', ' ', None, numpy.nan]

# match the indexes for the reference IDs
reference_ids = {}
old_ids = old['Reference ID:'].tolist()
new_ids = new['Reference ID:'].tolist()
for id in old_ids:
    if id in empty_cell:
        continue
        
    old_id = old_ids.index(id)
    new_id = new_ids.index(id)
    reference_ids[id] = {'Old index': old_id,
                         'New index': new_id}

for id in reference_ids:
    print('\n')
    problem = False
    valid = True
    old_index = reference_ids[id]['Old index']
    new_index = reference_ids[id]['New index']
    '''display(old.iloc[old_index])
    display(new.iloc[new_index])'''
    error_string = []
    while valid:
        '''if old.at[old_index, 'Enzyme'] != new.at[new_index, 'Enzyme:']:
            if old.at[old_index, 'Enzyme'] not in empty_cell:
                print('Old {}, index {}:'.format(old_index), old.at[old_index, 'Enzyme'])
                print('New {}, index {}:'.format(new_index), new.at[new_index, 'Enzyme:'])
                valid = False
        
        if old.at[old_index, 'Reaction'] != new.at[new_index, 'Reaction:']:
            if old.at[old_index, 'Reaction'] not in empty_cell:
                print('Old reaction, index {}:'.format(old_index), old.at[old_index, 'Reaction'])
                print('New reaction, index {}:'.format(new_index), new.at[new_index, 'Reaction:'])
                valid = False'''
        
        if old.at[old_index, 'T [K]'] != new.at[new_index, 'T [K]']:
            if old.at[old_index, 'T [K]'] not in empty_cell:
                error_string.extend(['Old temperature, index {}: {}'.format(old_index, old.at[old_index, 'T [K]']),
                                    'New temperature, index {}: {}'.format(new_index, new.at[new_index, 'T [K]'])]
                                 )
                
                #valid = False
                problem = True
        
        if old.at[old_index, 'Keq'] != new.at[new_index, 'Keq']:
            if old.at[old_index, 'Keq'] not in empty_cell:
                error_string.extend(['Old Keq, index {}: {}'.format(old_index, old.at[old_index, 'Keq']),
                                    'New Keq, index {}: {}'.format(new_index, new.at[new_index, 'Keq'])]
                                 )
                
                #valid = False
                problem = True
        
        if old.at[old_index, 'Enthalpy [kJ / mol]'] != new.at[new_index, 'Enthalpy [kJ / mol]']:
            if old.at[old_index, 'Enthalpy [kJ / mol]'] not in empty_cell:
                error_string.extend(['Old enthalpy, index {}: {}'.format(old_index, old.at[old_index, 'Enthalpy [kJ / mol]']),
                                    'New enthalpy, index {}: {}'.format(new_index, new.at[new_index, 'Enthalpy [kJ / mol]'])]
                                 )

                #valid = False
                problem = True
        
        if old.at[old_index, 'Km'] != new.at[new_index, 'Km']:
            if old.at[old_index, 'Km'] not in empty_cell:
                error_string.extend(['Old Km, index {}: {}'.format(old_index, old.at[old_index, 'Km']),
                                    'New Km, index {}: {}'.format(new_index, new.at[new_index, 'Km'])]
                                 )

                #valid = False
                problem = True
            
        if old.at[old_index, 'pH '] != new.at[new_index, 'pH ']:
            if old.at[old_index, 'pH '] not in empty_cell:
                error_string.extend(['Old pH, index {}: {}'.format(old_index, old.at[old_index, 'pH ']),
                                    'New pH, index {}: {}'.format(new_index, new.at[new_index, 'pH '])]
                                 )

                #valid = False
                problem = True
        
        if old.at[old_index, 'Reference ID:'] != (id or ' '):
            print('{} is equivalent.'.format(id))
            valid = False
        
        old_index += 1
        new_index += 1
        
    if problem:
        print('ERROR: {}\n'.format(id), '='*len('{} ERROR'.format(id)))
        print('\n'.join(error_string))
        
    '''for index, row in old.iterrows():
        if id == old.at[index, row['Reference ID:']]: 
            for index2, row2 in new.iterrows():
                while old.at[index, row] == new.at[index2, row2]:'''            



26QUA/WOO_1205 is equivalent.


29WOO_1206 is equivalent.


31BOR/SCH_1141 is equivalent.


34JAC_1142 is equivalent.


34LOH/MEY_1169 is equivalent.


34MEY/LOH_1100 is equivalent.


35AKA_1170 is equivalent.
ERROR: 35AKA_1170
Old pH, index 19: 7
New pH, index 24: 7.0


35JAC/TAP_1207 is equivalent.
ERROR: 35JAC/TAP_1207
Old pH, index 19: 7
New pH, index 24: 7.0
Old pH, index 20: 7
New pH, index 25: 7.0


35MEY/KIE_1385 is equivalent.


35MEY/KIE2_1386 is equivalent.
ERROR: 35MEY/KIE2_1386
Old Keq, index 25: 4
New Keq, index 26: 4.0
Old temperature, index 26: 293.15
New temperature, index 27: 273.15
Old enthalpy, index 26: 58
New enthalpy, index 27:  


35MEY/LOH_1102 is equivalent.


35MEY/SCH_797 is equivalent.
ERROR: 35MEY/SCH_797
Old enthalpy, index 28: -31
New enthalpy, index 35: -31.0


35MEY/SCH_798 is equivalent.


35MEY/SCH_802 is equivalent.


35MEY/SCH_805 is equivalent.


35MEY_1101 is equivalent.
ERROR: 35MEY_1101
Old Keq, index 33: 0.000071
New Keq, index 40: 7.1e-05




68LON/DAL_153 is equivalent.


68MAY/AND_1379 is equivalent.


68MIZ/WEE_1200 is equivalent.


68NIX/BLA_298 is equivalent.
ERROR: 68NIX/BLA_298
Old Keq, index 1641: 1.20E-03
New Keq, index 1683: 1.2E-3
Old pH, index 1641: 8
New pH, index 1683: 8.0


68POT/GLO_386 is equivalent.
ERROR: 68POT/GLO_386
Old pH, index 1642: 7
New pH, index 1684: 7.0
Old Keq, index 1643: 1.75E-04
New Keq, index 1685: 0.000175


68REE/MEN_713 is equivalent.
ERROR: 68REE/MEN_713
Old Keq, index 1643: 1.75E-04
New Keq, index 1685: 0.000175
Old Keq, index 1644: 9.35E-04
New Keq, index 1686: 0.000935


68SAL/NOR_1253 is equivalent.


68SAL/NOR_1259 is equivalent.


68SAL/NOR_1268 is equivalent.
ERROR: 68SAL/NOR_1268
Old pH, index 1657: 7
New pH, index 1699: 7.0


68SU/RUS_642 is equivalent.
ERROR: 68SU/RUS_642
Old pH, index 1657: 7
New pH, index 1699: 7.0
Old pH, index 1658: 7
New pH, index 1700: 7.0


68SU/RUS_660 is equivalent.
ERROR: 68SU/RUS_660
Old pH, index 1670: 7
New pH, index 1712: 7.0
Old pH, index 1671

In [65]:
list = ['q', 's', 'e']

string = ''

string = string.join(list)

print(string)

qse
