# SAMPL8-pKa Macrostate Analysis (pKa Submission Class Modification)
The goal of this script is to try to incorperate multi-protic pKa compounds into the macrostate analysis. This mainly involves modification of the pKa submission class.

In [1]:
import os
import glob
import io
import collections
import pickle
import numpy as np
import pandas as pd
import seaborn as sns
from matplotlib import pyplot as plt
import scipy.stats
from scipy.special import logsumexp
from scipy.optimize import fsolve

In [2]:
pKa_SUBMISSIONS_DIR_PATH = '/Users/aakankschit/Desktop/Mobley Lab/scratches and workthroughs/SAMPL8/pKa_analysis/submissions'
EXPERIMENTAL_DATA_FILE_PATH = '/Users/aakankschit/Desktop/Mobley Lab/scratches and workthroughs/SAMPL8/pKa_analysis/pKa.csv'
USER_MAP_FILE_PATH = '/Users/aakankschit/Desktop/Mobley Lab/SAMPL8/physical_properties/pKa/multi_protic_test/SAMPL8-pKa-user-map.csv'

## Utility Classes
Used to output error/warning messages in the event of an exception.

In [3]:
class IgnoredSubmissionError(Exception):
    """Exception used to signal a submission that must be ignored."""
    pass


class BadFormatError(Exception):
    """Exception used to signal a submission with unexpected formatting."""
    pass

## SAMPL Submission Class
A generic SAMPL submission generator.
* Inputs -> file_path : str (Submission File)
* Exception -> If the submission ID is among the ignored submissions.

In [4]:
class SamplSubmission:
    """A generic SAMPL submission.
    Parameters
    ----------
    file_path : str
        The path to the submission file.
    Raises
    ------
    IgnoredSubmission
        If the submission ID is among the ignored submissions.
    """
    # Section of the submission file.
    SECTIONS = {}

    # Sections in CSV format with columns names.
    CSV_SECTIONS = {}

    def __init__(self, file_path, user_map):
        file_name = os.path.splitext(os.path.basename(file_path))[0]
        file_data = file_name.split('-')

        # Load predictions.
        sections = self._load_sections(file_path)  # From parent-class.
        self.data = sections['Predictions']  # This is a list
        self.data = pd.DataFrame(data=self.data) # Now a DataFrame
        self.file_name = file_name
        self.method_name = sections['Name'][0]

        # Check if this is a reference submission
        self.reference_submission = False

        if "REF" in self.method_name or "NULL" in self.method_name:
            print("REF found: ", self.method_name)
            self.reference_submission = True
        #print(self.data)


    @classmethod
    def _read_lines(cls, file_path):
        """Generator to read the file and discard blank lines and comments."""
        with open(file_path, 'r', encoding='utf-8-sig') as f:
            for line in f:
                # Strip whitespaces.
                line = line.strip()
                # Don't return blank lines and comments.
                if line != '' and line[0] != '#':
                    yield line

    @classmethod
    def _load_sections(cls, file_path):
        """Load the data in the file and separate it by sections."""
        #print(os.path.basename(file_path))
        sections = {}
        current_section = None
        for line in cls._read_lines(file_path):
            # Check if this is a new section.
            if line[:-1] in cls.SECTIONS:
                current_section = line[:-1]
            else:
                if current_section is None:
                    import pdb
                    pdb.set_trace()
                try:
                    sections[current_section].append(line)
                except KeyError:
                    sections[current_section] = [line]

        # Check that all the sections have been loaded.
        found_sections = set(sections.keys())
        if found_sections != cls.SECTIONS:
            raise BadFormatError('Missing sections: {}.'.format(found_sections - cls.SECTIONS))

        # Create a Pandas dataframe from the CSV format.
        for section_name in cls.CSV_SECTIONS:
            csv_str = io.StringIO('\n'.join(sections[section_name]))
            columns = cls.CSV_SECTIONS[section_name]
            id_column = columns[0]
            #print("trying", sections)
            section = pd.read_csv(csv_str, index_col=id_column, names=columns, skipinitialspace=True)
            #section = pd.read_csv(csv_str, names=columns, skipinitialspace=True)
            sections[section_name] = section
        return sections

    @classmethod
    def _create_comparison_dataframe(cls, column_name, submission_data, experimental_data):
        """Create a single dataframe with submission and experimental data."""
        # Filter only the systems IDs in this submissions.


        experimental_data = experimental_data[experimental_data.index.isin(submission_data.index)] # match by column index
        # Fix the names of the columns for labelling.
        submission_series = submission_data[column_name]
        submission_series.name += ' (calc)'
        experimental_series = experimental_data[column_name]
        experimental_series.name += ' (expt)'

        # Concatenate the two columns into a single dataframe.
        return pd.concat([experimental_series, submission_series], axis=1)

    @classmethod
    def _create_single_dataframe(cls, column_name, submission_data):
        """Create a single dataframe with submission and experimental data."""
        # Filter only the systems IDs in this submissions.


        #experimental_data = experimental_data[experimental_data.index.isin(submission_data.index)] # match by column index
        # Fix the names of the columns for labelling.
        submission_series = submission_data[column_name]
        #submission_series.name += ' (calc)'
        #experimental_series = experimental_data[column_name]
        #experimental_series.name += ' (expt)'

        # Concatenate the two columns into a single dataframe.
        return pd.DataFrame(submission_series)#, axis=1) #pd.concat([submission_series], axis=1)


## Classes for loading submissions
The first class here is the pKa submission class which is used to define the plots and create a dataframe from the experimental and theoretically calculated datasets.
### Dependencies:
* SampleSubmission Class
* *compute_bootstrap_statistics*
* *getQQdata*

In [5]:
def compute_bootstrap_statistics(samples, stats_funcs, percentile=0.95, n_bootstrap_samples=1000):
    """Compute bootstrap confidence interval for the given statistics functions."""
    # Handle case where only a single function is passed.
    #print("\nSAMPLES:\n", samples)

    try:
        len(stats_funcs)
    except TypeError:
        stats_funcs = [stats_funcs]

    # Compute mean statistics.
    statistics = [stats_func(samples) for stats_func in stats_funcs]

    # Generate bootstrap statistics.
    bootstrap_samples_statistics = np.zeros((len(statistics), n_bootstrap_samples))
    for bootstrap_sample_idx in range(n_bootstrap_samples):
        samples_indices = np.random.randint(low=0, high=len(samples), size=len(samples)) # picks X num of new samples to use
        for stats_func_idx, stats_func in enumerate(stats_funcs): #go through each stat
            bootstrap_samples_statistics[stats_func_idx][bootstrap_sample_idx] = stats_func(samples[samples_indices])#compute new stat


    # Compute confidence intervals.
    percentile_index = int(np.floor(n_bootstrap_samples * (1 - percentile) / 2)) - 1
    bootstrap_statistics = []
    for stats_func_idx, samples_statistics in enumerate(bootstrap_samples_statistics):
        samples_statistics.sort()
        stat_lower_percentile = samples_statistics[percentile_index]
        stat_higher_percentile = samples_statistics[-percentile_index+1]
        confidence_interval = (stat_lower_percentile, stat_higher_percentile)
        bootstrap_statistics.append([statistics[stats_func_idx], confidence_interval, samples_statistics])

    return bootstrap_statistics

#### QQPlot Function
**Dependencies**
* *compute_range_table*
* *fracfound_vs_error*
* *bootstrap_exptnoise*
* *fracfound_vs_error*

In [6]:
def normal(y):
    """Return unit normal distribution value at specified location."""
    return 1. / np.sqrt(2 * np.pi) * np.exp(-y ** 2 / 2.)

def compute_range_table(stepsize=0.001, maxextent=10):
    """Compute integrals of the unit normal distribution and return these tabulated.
    Returns:
    --------
    - range: NumPy array giving integration range (x) where integration range runs -x to +x
    - integral: NumPy arrange giving integrals over specified integration range.
    Arguments (optional):
    ---------------------
    - stepsize: Step size to advance integration range by each trial. Default: 0.001
    - maxextent: Maximum extent of integration range
"""
    # Calculate integration range
    x = np.arange(0, maxextent, stepsize)  # Symmetric, so no need to do negative values.

    # Calculate distribution at specified x values
    distrib = normal(x)

    integral = np.zeros(len(x), float)
    for idx in range(1, len(x)):
        integral[idx] = 2 * scipy.integrate.trapz(distrib[0:idx + 1], x[0:idx + 1])  # Factor of 2 handles symmetry

    return x, integral

In [7]:
def get_range(integral, rangetable, integraltable):
    """Use rangetable and integral table provided (i.e. from compute_range_table) to find the smallest range of integration for which the integral is greater than the specified value (integral). Return this range as a float."""

    idx = np.where(integraltable > integral)[0]
    return rangetable[idx[0]]

In [8]:
def fracfound_vs_error(calc, expt, dcalc, dexpt, integral_range, integral):
    """
    Takes in calculated and experimental values, their uncertainties as well as
    """
    # Fraction of Gaussian distribution we want to compute
    X = np.arange(0, 1.0, 0.01)
    Y = np.zeros(len(X))

    for (i, x) in enumerate(X):
        # Determine integration range which gives us this much probability
        rng = get_range(x, integral_range, integral)
        # print x, rng

        # Loop over samples and compute fraction of measurements found
        y = 0.
        # for n in range(0, len(DGcalc)):
        #    sigma_eff = sqrt( sigma_calc[n]**2 + sigma_expt[n]**2 )
        #    absdiff = abs( DGcalc[n] - DGexpt[n])
        #    #print absdiff, n, sigma_eff
        #    if absdiff < rng * sigma_eff: #If the difference falls within the specified range of sigma values, then this is within the range we're looking at; track it
        #        #print "Incrementing y for n=%s, x = %.2f" % (n, x)
        #        y += 1./len(DGcalc)
        # Rewrite for speed
        sigma_eff = np.sqrt(np.array(dcalc) ** 2 + np.array(dexpt) ** 2)
        absdiff = np.sqrt((np.array(calc) - np.array(expt)) ** 2)
        idx = np.where(absdiff < rng * sigma_eff)[0]
        Y[i] = len(idx) * 1. / len(calc)

    # print Y
    # raw_input()

    return X, Y

In [9]:
def bootstrap_exptnoise(calc1, expt1, exptunc1, returnunc=False):
    """Take two datasets (equal length) of calculated and experimental values. Construct new datasets of equal length by picking, with replacement, a set of indices to use from both sets. Return the two new datasets. To take into account experimental uncertainties, random noise is added to the experimental set, distributed according to gaussians with variance taken from the experimental uncertainties. Approach suggested by J. Chodera.
Optionally, 'returnunc = True', which returns a third value -- experimental uncertainties corresponding to the data points actually used."""

    # Make everything an array just in case
    calc = np.array(calc1)
    expt = np.array(expt1)
    exptunc = np.array(exptunc1)
    npoints = len(calc)

    # Pick random datapoint indices
    idx = np.random.randint(0, npoints,
                            npoints)  # Create an array consisting of npoints indices, where each index runs from 0 up to npoints.

    # Construct initial new datasets
    newcalc = calc[idx]
    newexpt = expt[idx]
    newuncExp = exptunc[idx]

    # Add noise to experimental set
    noise = np.random.normal(0.,
                             exptunc)  # For each data point, draw a random number from a normal distribution centered at 0, with standard devaitions given by exptunc
    newexpt += noise

    if not returnunc:
        return newcalc, newexpt
    else:
        return newcalc, newexpt, newuncExp

In [10]:
def getQQdata(calc, expt, dcalc, dexpt, boot_its):
    """
    Takes calculated and experimental values and their uncertainties
    Parameters
    ----------
    calc: predicted pKa value
    expt: experimental pKa value
    dcalc: predicted model uncertainty
    dexp: experimental pKa SEM
    Outputs
    -------
    X: array of x axis values for QQ-plot
    Y: array of y axis values for QQ-plot
    slope: Error Slope (ES) of line fit to QQ-plot
    slopes: Erros Slope (ES) of line fit to QQ-plot of bootstrapped datapoints
    """
    integral_range, integral = compute_range_table()
    #print("calc, expt, dcalc, dexpt, integral_range, integral\n",calc, expt, dcalc, dexpt, integral_range, integral)
    X, Y = fracfound_vs_error(calc, expt, dcalc, dexpt, integral_range, integral)
    xtemp = X[:, np.newaxis]
    coeff, _, _, _ = np.linalg.lstsq(xtemp, Y,rcond=-1)
    slope = coeff[0]
    slopes = []
    for it in range(boot_its):
        n_calc, n_expt, n_dexpt = bootstrap_exptnoise(calc, expt, dexpt, returnunc=True)
        nX, nY = fracfound_vs_error(n_calc, n_expt, dcalc, n_dexpt, integral_range, integral)
        a, _, _, _ = np.linalg.lstsq(xtemp, nY,rcond=-1)
        slopes.append(a[0])
    return X, Y, slope, np.array(slopes).std(), slopes

In [11]:
class pKaSubmission(SamplSubmission):
    """A submission for pKa challenge.
    Parameters
    ----------
    file_path : str
        The path to the submission file
    Raises
    ------
    IgnoredSubmission
        If the submission ID is among the ignored submissions.
    """

    # Section of the submission file.
    SECTIONS = {"Predictions",
                "Participant name",
                "Participant organization",
                "Name",
                "Compute time",
                "Computing and hardware",
                "Software",
                "Category",
                "Method",
                "Ranked"}

    # Sections in CSV format with columns names.
    #CSV_SECTIONS = {‘Predictions’: (“Molecule ID”, “pKa mean”, “pKa SEM”, “pKa model uncertainty”)}
    CSV_SECTIONS = {"Predictions": ("Molecule ID",
                                    "ID tag",
                                    "total charge",
                                    "pKa mean",
                                    "pKa SEM",
                                    "pKa model uncertainty")}


    def __init__(self, file_path, user_map):
        super().__init__(file_path, user_map)

        file_name = os.path.splitext(os.path.basename(file_path))[0]
        print("file_name: \n", file_name)
        file_data = file_name.split('-')

        # Load predictions.
        sections = self._load_sections(file_path)  # From parent-class.
        self.data = sections['Predictions']  # This is a pandas DataFrame.
        self.method_name = sections['Name'][0]
        self.category = sections['Category'][0]
        self.participant = sections['Participant name'][0].strip()
        self.organization = sections['Participant organization'][0].strip()
        self.ranked = sections['Ranked'][0].strip() =='True'

        # Check if this is a reference submission
        self.reference_submission = False
        if "REF" in self.method_name or "NULL" in self.method_name:
            self.reference_submission = True




    def compute_pKa_statistics(self, experimental_data, stats_funcs):
        data = self._create_comparison_dataframe('pKa mean', self.data, experimental_data)

        # Create lists of stats functions to pass to compute_bootstrap_statistics.
        stats_funcs_names, stats_funcs = zip(*stats_funcs.items())
        #bootstrap_statistics = compute_bootstrap_statistics(data.as_matrix(), stats_funcs, n_bootstrap_samples=10000) #10000

        bootstrap_statistics = compute_bootstrap_statistics(data.to_numpy(), stats_funcs, n_bootstrap_samples=10000)

        # Return statistics as dict preserving the order.
        return collections.OrderedDict((stats_funcs_names[i],
                                        bootstrap_statistics[i])
                                       for i in range(len(stats_funcs)))

    def compute_pKa_model_uncertainty_statistics(self,experimental_data):

        # Create a dataframe for data necessary for error slope analysis
        # Experimental
        expt_pKa_series = experimental_data["pKa mean"]
        expt_pKa_SEM_series = experimental_data["pKa SEM"]
        # Predictions
        pred_pKa_series = self.data["pKa mean"]
        pred_pKa_SEM_series = self.data["pKa SEM"]
        pred_pKa_mod_unc_series = self.data["pKa model uncertainty"]

        # Concatenate the columns into a single dataframe.
        data_exp =  pd.concat([expt_pKa_series, expt_pKa_SEM_series], axis=1)
        data_exp = data_exp.rename(index=str, columns={"pKa mean": "pKa mean (expt)", "pKa SEM": "pKa SEM (expt)"})

        data_mod_unc = pd.concat([data_exp, pred_pKa_series, pred_pKa_SEM_series, pred_pKa_mod_unc_series], axis=1)
        #print("\nBEFORE data_mod_unc:\n", data_mod_unc)
        data_mod_unc = data_mod_unc.rename(index=str, columns={"pKa mean": "pKa mean (calc)", "pKa SEM": "pKa SEM (calc)", "pKa model uncertainty": "pKa model uncertainty"})
        #print("\nAFTER data_mod_unc:\n", data_mod_unc)

        # Compute QQ-Plot Error Slope (ES)
        calc = data_mod_unc.loc[:, "pKa mean (calc)"].values
        expt = data_mod_unc.loc[:, "pKa mean (expt)"].values
        dcalc = data_mod_unc.loc[:, "pKa model uncertainty"].values
        dexpt = data_mod_unc.loc[:, "pKa SEM (expt)"].values
        n_bootstrap_samples = 1000 #1000

        X, Y, error_slope, error_slope_std, slopes = getQQdata(calc, expt, dcalc, dexpt, boot_its=n_bootstrap_samples)

        QQplot_data = [X, Y, error_slope]

        # Compute 95% confidence intervals of Error Slope
        percentile = 0.95
        percentile_index = int(np.floor(n_bootstrap_samples * (1 - percentile) / 2)) - 1

        #for stats_func_idx, samples_statistics in enumerate(bootstrap_samples_statistics):
        samples_statistics = np.asarray(slopes)
        samples_statistics.sort()
        stat_lower_percentile = samples_statistics[percentile_index]
        stat_higher_percentile = samples_statistics[-percentile_index + 1]
        confidence_interval = (stat_lower_percentile, stat_higher_percentile)

        model_uncertainty_statistics = [error_slope, confidence_interval, samples_statistics]


        return model_uncertainty_statistics, QQplot_data


Load Submissions is a utility function that collects all the .csv files that contain the submission and create pKasubmission objects from them.
### Dependencies:
* pKaSubmission Class

In [12]:
def load_submissions(directory_path, user_map):
    """Load submissions from a specified directory using a specified user map,
    correct unit and sign errors in submissions, and convert relative free energy
    calculations to macro pKa predictions.
    Returns: submissions (where each prediction is a macro pKa)
    """
    submissions = []
    for file_path in glob.glob(os.path.join(directory_path, '*.csv')):
        print(file_path)
        try:
            submission = pKaSubmission(file_path, user_map)
            print(submission)

        except IgnoredSubmissionError:
            continue
        submissions.append(submission)

    return submissions


## Converting Microstates to Macrostates:
This involves two parts-
* *submission_fix_and_convert function* : Used to standardize units for free energy calculations.
* *get_macropka function*: This is a titration code that is used to calculate the macro pKa from the microstates.

### Function to Calculate Macro pKa (*get_macropka*)
This function is used to calculate the macropKa and then store the results in a class called *Macro_pKa*.

In [13]:
class Macro_pKa:
    def __init__(self):
        self.molecule = ""
        self.transition_from = 0
        self.transition_to = 0
        self.pKa_bytitration = 0.0
        self.pKa_bydG = 0.0
        self.SEM = 0.0
        self.MU = 0.0

#### Dependencies for the *get_macropka* function
* *DeltaG*
* *pop_charge*
* *getG*

In [14]:
# Compute beta and other constants
kB = 1.381 * 6.02214 / 1000.0  # [kJ/(mol K)]
beta = 1. / (kB * 300)  # [mol/kJ]
beta = beta * 4.186
C_unit = 1 / beta * np.log(10)


# Compute free energy as a function of pH for states
# WITHIN-charge transitions have same pH dependence
def DeltaG(pH, state, state_details):
    #print("state_details",state_details)
    for item in state_details:
        if item[0] == state:
            # 0 serves as the reference state; all transitions are away from 0.
            if item[2] == -1:
                DeltaM = 1
            elif item[2] == 1:
                DeltaM = -1
            elif item[2] ==2:
                DeltaM=-2
            else:
                DeltaM = 0  # Hack to capture fact that pH dependence of states at same formal charge is same/cancels
            # Compute value
            return (item[1] - pH * DeltaM * C_unit)  # Gunner eq 3


# Compute populations for charge states (without normalization, due to laziness/since it'll drop out)
def pop_charge(pH, formal_charge, state_details):
    free_energies = []
    for item in state_details: #state_details [('SM42_micro001', 0.5304000000000001, -1, 0.0, 1.3872000000000002)
        if item[2] == formal_charge:
            free_energies.append(-beta * DeltaG(pH, item[0], state_details))
    if formal_charge == 0:
        free_energies.append(0 * pH)
    #print("free_energies",free_energies)
    return np.exp(logsumexp(free_energies))

# get G of each group
def getG(msgroup):
    Pi_raw = np.array([np.exp(-beta*ms[1]) for ms in msgroup])
    Pi_norm = Pi_raw/sum(Pi_raw)
    E = sum(np.array([ms[1] for ms in msgroup]) * Pi_norm)
    TS = -sum(Pi_norm * np.log(Pi_norm))/beta
    G = E - TS
    return G

In [15]:
def get_macropka(rfe_data):
    macropkas = []

    # Extract each molecule
    molecules = {}
    for index, row in rfe_data.iterrows():
        SM = index
        state = row["ID tag"]
        charge = row["total charge"]
        rfe = row["pKa mean"]
        sem = row["pKa SEM"]
        model_uncertainty = row["pKa model uncertainty"]

        if SM in molecules:
            molecules[SM].append((state, rfe, charge, sem, model_uncertainty))
        else:
            molecules[SM] = [(state, rfe, charge, sem, model_uncertainty)]



    # Loop over molecules, convert to state_details
    SM_names = [x for x in molecules.keys()]
    SM_names.sort()
    for sm_name in SM_names:
        state_details = molecules[sm_name]

        # Figure out what formal charges are present in states
        formal_charges = [info[2] for info in state_details]
        print(formal_charges)

        # group microstates into groups based their formal charge
        msgroup_p2 = [state for state in state_details if state[2] == 2]  # microstates with formal charge +2
        msgroup_p1 = [state for state in state_details if state[2] == 1]  # microstates with formal charge +1
        msgroup_p0 = [state for state in state_details if state[2] == 0]  # microstates with formal charge 0
        msgroup_p0.append(("reference state", 0, 0))  # add back reference state
        msgroup_n1 = [state for state in state_details if state[2] == -1]  # microstates with formal charge -1

        # for reaction A -> B
        # ΔGAB = (-1)(C_unit)(pH - pKaBA)
        # Therefore when pH = 0, we have pKaBA = ΔGAB/C_unit
        # TODO: How are these formal charges estimated?
        # Compute +2 to +1 transition
        if 2 in formal_charges:
            pka = Macro_pKa()
            pka.molecule = sm_name.split("_")[0]
            pka.transition_from = 2
            pka.transition_to = 1
            pka.SEM = state_details[0][3]
            pka.MU = state_details[0][4]

            # titration method given my David's group
            init_guess = -15
            func_2to1 = lambda pH : (pop_charge(pH, 2, state_details) - pop_charge(pH, 1, state_details))
            pH_solution_2to1, infodict, ier, mesg = fsolve(func_2to1, init_guess, factor = 0.1, full_output=True)
            # If message indicates poor convergence, change initial guess and try again
            if 'The iteration is not making good progress' in mesg:
                init_guess-=5
            pH_solution_2to1, infodict, ier, mesg = fsolve(func_2to1, init_guess, factor = 0.1, full_output=True)
            # If still poor convergence, print warning (MAY NEED BETTER SOLUTION TO THIS)
            if 'The iteration is not making good progress' in mesg:
                print("WARNING: Numerical problems encountered with fsolv")
            pka.pKa_bytitration = pH_solution_2to1

            # delta G method given by Junjun Mao
            dG = getG(msgroup_p1) - getG(msgroup_p2)
            pka.pKa_bydG = (dG / C_unit)

            macropkas.append(pka)

        # Compute +1 to 0 transition
        if 1 in formal_charges:
            pka = Macro_pKa()
            pka.molecule = sm_name.split("_")[0]
            pka.transition_from = 1
            pka.transition_to = 0
            pka.SEM = state_details[0][3]
            pka.MU = state_details[0][4]

            # titration method given by David's group
            init_guess = -5
            func_10 = lambda pH: (pop_charge(pH, 1, state_details) - pop_charge(pH, 0, state_details))
            pH_solution_1to0, infodict, ier, mesg = fsolve(func_10, init_guess, factor=0.1, full_output=True)
            if 'The iteration is not making good progress' in mesg:
                init_guess-=3
            pH_solution_1to0, infodict, ier, mesg = fsolve(func_10, init_guess, factor=0.1, full_output=True)
            # If still poor convergence, print warning (MAY NEED BETTER SOLUTION TO THIS)
            if 'The iteration is not making good progress' in mesg:
                print("WARNING: Numerical problems encountered with fsolv")
            pka.pKa_bytitration = pH_solution_1to0

            # delta G method given by Junjun Mao
            dG = getG(msgroup_p0) - getG(msgroup_p1)
            pka.pKa_bydG = (dG / C_unit)

            macropkas.append(pka)

        # Compute 0 to -1 transition
        if -1 in formal_charges:
            pka = Macro_pKa()
            pka.molecule = sm_name.split("_")[0]
            pka.transition_from = 0
            pka.transition_to = -1
            pka.SEM = state_details[0][3]
            pka.MU = state_details[0][4]

            # titration method given by David's group
            init_guess = 5
            func_0neg1 = lambda pH: (pop_charge(pH, -1, state_details) - pop_charge(pH, 0, state_details))
            pH_solution_0toneg1, infodict, ier, mesg = fsolve(func_0neg1, init_guess, factor=0.1, full_output=True)
            if 'The iteration is not making good progress' in mesg:
                init_guess+=3
            pH_solution_0toneg1, infodict, ier, mesg = fsolve(func_0neg1, init_guess, factor=0.1, full_output=True)
            # If still poor convergence, print warning (MAY NEED BETTER SOLUTION TO THIS)
            if 'The iteration is not making good progress' in mesg:
                print("WARNING: Numerical problems encountered with fsolv")
            pka.pKa_bytitration = pH_solution_0toneg1

            # delta G method given by Junjun Mao
            dG = getG(msgroup_n1) - getG(msgroup_p0)
            pka.pKa_bydG = (dG / C_unit)

            macropkas.append(pka)

    return macropkas


### *submission_fix_and_convert*
This function is used to standardize the units of the different submissions. Additionally converts the microstates from the submissions to macrostate pKa's so that they can be compared to the experimental pKa's.

In [16]:
def submission_fix_and_convert(submission_data):
    one_to_0transitions = []
    for submission in submission_data:
        sub = submission.data
        #reset the index so that each prediction can be accessed individually (without having to rearrange reference state and microstate)
        submission.data=submission.data.rename_axis('Molecule ID').reset_index()
        for mol_ID, series in submission.data.iterrows():
            pKa_mean_pred = submission.data.loc[mol_ID, "pKa mean"]
            pKa_SEM_pred = submission.data.loc[mol_ID, "pKa SEM"]
            pKa_model_uncertainty =  submission.data.loc[mol_ID, "pKa model uncertainty"]

            # Convert submissions to kcal/mol
            if submission.file_name in ["pKa-ECRISM-1", "pKa-VA-2-charge-correction", "pKa_RodriguezPaluch_SMD_1", "pKa_RodriguezPaluch_SMD_2", "pKa_RodriguezPaluch_SMD_3"]:
                pKa_mean_pred = pKa_mean_pred*C_unit #convert submission to kcal/mol
                pKa_SEM_pred = pKa_SEM_pred*C_unit
                pKa_model_uncertainty = pKa_model_uncertainty*C_unit

            # fix submission which seems to be in kJ/mol
            if submission.file_name in ["pka-nhlbi-1c"]:
                # correct free energies into kcal/mol
                # submission seemed to have used C_units = 5.69 for kJ/mol, so can divide by 4.186 to get kcal/mol
                pKa_mean_pred = pKa_mean_pred/4.186
                pKa_SEM_pred = pKa_SEM_pred/4.186
                pKa_model_uncertainty = pKa_model_uncertainty/4.186

            #If single transition states are opposite in sign from macro pKa, we assume they made a sign error
            if submission.file_name in [ "pKa-VA-2-charge-correction", "pka-nhlbi-1c", "pKa_RodriguezPaluch_SMD_1","pKa_RodriguezPaluch_SMD_2", "pKa_RodriguezPaluch_SMD_3"]:
                pKa_mean_pred = pKa_mean_pred*-1 #fix sign error
                submission.data.loc[mol_ID, "pKa mean"] = pKa_mean_pred

            submission.data.loc[mol_ID, "pKa mean"] = pKa_mean_pred
            submission.data.loc[mol_ID, "pKa SEM"] = pKa_SEM_pred
            submission.data.loc[mol_ID, "pKa model uncertainty"] = pKa_model_uncertainty

        cleared_sub = submission.data[0:0] #keep column names only
        del cleared_sub['ID tag']
        del cleared_sub['total charge']
        submission.data=submission.data.set_index('Molecule ID')


        # Convert relative free energies to macro pKa's
        if submission.reference_submission == False:
            macropkas = get_macropka(submission.data)
            for pka in macropkas:
                print('Macro pKa of',pka.molecule,'is',pka.pKa_bytitration)
                cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
                                                  "pKa mean": pka.pKa_bytitration[0],
                                                  "pKa SEM": pka.SEM,
                                                  "pKa model uncertainty":pka.MU},
                                                 ignore_index = True)
                submission.data = cleared_sub.set_index('Molecule ID')
        # Cleanup the reference calculations df column names
        else:
            del submission.data['pKa SEM']
            del submission.data['pKa model uncertainty']
            submission.data.rename(columns = {'ID tag':'pKa mean','total charge':'pKa SEM','pKa mean':'pKa model uncertainty'}, inplace = True)

        if submission.file_name in ["pKa_RodriguezPaluch_SMD_3", "pKa_RodriguezPaluch_SMD_2", "pKa_RodriguezPaluch_SMD_1", "pKa-RobertRaddi", "pKa_prediction_Iorga_Beckstein_1", "pKa-IEFPCMMST-1", "pKa-ECRISM-1", "pka-nhlbi-1c"]:
            one_to_0transitions.append(submission)

    #print(one_to_0transitions)

    return submission_data, one_to_0transitions

## Collection of pKa data
Collects all of the pKa data objects (containing both experimental and calculated pKa's) for generation of plots.

In [None]:
class pKaSubmissionCollection:
    """A collection of pKa submissions."""

    pKa_CORRELATION_PLOT_BY_METHOD_PATH_DIR = 'pKaCorrelationPlots'
    pKa_CORRELATION_PLOT_WITH_SEM_BY_METHOD_PATH_DIR = 'pKaCorrelationPlotsWithSEM'
    pKa_CORRELATION_PLOT_BY_pKa_PATH_DIR = 'error_for_each_pKa.pdf'
    ABSOLUTE_ERROR_VS_pKa_PLOT_PATH_DIR = 'AbsoluteErrorPlots'


    def __init__(self, submissions, experimental_data, output_directory_path, pKa_submission_collection_file_path,
                 ignore_refcalcs = True, ranked_only = True, allow_multiple = True):
        # Build collection dataframe from the beginning.
        # Build full pKa collection table.

        data = []

        # Participant names we've found so far; tracked to ensure no one has more than one
        # ranked submission
        self.participant_names_ranked = []

        # Submissions for pKa.
        for submission in submissions:
            if submission.reference_submission and ignore_refcalcs:
                continue

            if ranked_only and not submission.ranked:
                continue
            # Store names associated with ranked submission, skip if they submitted multiple (only if we need to check for duplicate authors)
            if submission.ranked and not allow_multiple:
                if not submission.participant in self.participant_names_ranked:
                    self.participant_names_ranked.append(submission.participant)
                else:
                    print(f"Error: {submission.participant} submitted multiple ranked submissions.")
                    continue

            for mol_ID, series in submission.data.iterrows():
                print("mol_ID",mol_ID)
                pKa_mean_exp = experimental_data.loc[mol_ID, 'pKa mean']
                pKa_SEM_exp = experimental_data.loc[mol_ID, 'pKa SEM']

                pKa_mean_pred = submission.data.loc[mol_ID, "pKa mean"]
                pKa_SEM_pred = submission.data.loc[mol_ID, "pKa SEM"]
                print("pKa_mean_pred \n",pKa_mean_pred)

                pKa_model_uncertainty =  submission.data.loc[mol_ID, "pKa model uncertainty"]
                ranked = submission.ranked

                data.append({
                    'method_name': submission.method_name,
                    'file name': submission.file_name,
                    'category': submission.category,
                    'Molecule ID': mol_ID,
                    'pKa (calc)': pKa_mean_pred,
                    'pKa SEM (calc)': pKa_SEM_pred,
                    'pKa (exp)': pKa_mean_exp,
                    'pKa SEM (exp)': pKa_SEM_exp,
                    '$\Delta$pKa error (calc - exp)': pKa_mean_pred - pKa_mean_exp,
                    'pKa model uncertainty': pKa_model_uncertainty
                })

        # Transform into Pandas DataFrame.
        self.data = pd.DataFrame(data=data)
        self.output_directory_path = output_directory_path

        print("\n SubmissionCollection: \n")
        print(self.data)

        # Create general output directory.
        os.makedirs(self.output_directory_path, exist_ok=True)

        # Save collection.data dataframe in a CSV file.
        self.data.to_csv(pKa_submission_collection_file_path)

    def generate_correlation_plots(self):

        # pKa correlation plots.
        output_dir_path = os.path.join(self.output_directory_path, self.pKa_CORRELATION_PLOT_BY_METHOD_PATH_DIR)

        os.makedirs(output_dir_path, exist_ok=True)

        for method_name in self.data.method_name.unique():

            # Skip NULL0 submission
            if "NULL" in method_name:
                continue

            data = self.data[self.data.method_name == method_name]

            title = '{}'.format(method_name)

            plt.close('all')
            plot_correlation(x='pKa (exp)', y='pKa (calc)',
                             data=data, title=title, kind='joint')
            plt.tight_layout()
            # plt.show()
            method_name = name_to_filename(method_name)
            output_path = os.path.join(output_dir_path, '{}.pdf'.format(method_name))
            plt.savefig(output_path)

    def generate_correlation_plots_with_SEM(self):

        # pKa correlation plots.
        output_dir_path = os.path.join(self.output_directory_path, self.pKa_CORRELATION_PLOT_WITH_SEM_BY_METHOD_PATH_DIR)
        os.makedirs(output_dir_path, exist_ok=True)
        for method_name in self.data.method_name.unique():

            # Skip NULL0 submission
            if "NULL" in method_name:
                continue

            data = self.data[self.data.method_name == method_name]
            title = '{}'.format(method_name)

            plt.close('all')
            plot_correlation_with_SEM(x_lab='pKa (exp)', y_lab='pKa (calc)',
                                      x_err_lab='pKa SEM (exp)', y_err_lab='pKa SEM (calc)',
                                      data=data, title=title)
            plt.tight_layout()
            # plt.show()
            method_name = name_to_filename(method_name)
            output_path = os.path.join(output_dir_path, '{}.pdf'.format(method_name))
            plt.savefig(output_path)

    def generate_molecules_plot(self):
        # Correlation plot by molecules.
        plt.close('all')
        data_ordered_by_mol_ID = self.data.sort_values(["Molecule ID"], ascending=["True"])
        sns.set(rc={'figure.figsize': (8.27,11.7)})
        sns.violinplot(y='Molecule ID', x='$\Delta$pKa error (calc - exp)', data=data_ordered_by_mol_ID,
                       inner='point', linewidth=1, width=1.2)
        plt.tight_layout()
        # plt.show()
        plt.savefig(os.path.join(self.output_directory_path, self.pKa_CORRELATION_PLOT_BY_pKa_PATH_DIR))

    def generate_absolute_error_vs_molecule_ID_plot(self):
        """
        For each method a bar plot is generated so that absolute errors of each molecule can be compared.
        """
        # Setup output directory
        output_dir_path = os.path.join(self.output_directory_path,
                                       self.ABSOLUTE_ERROR_VS_pKa_PLOT_PATH_DIR)
        os.makedirs(output_dir_path, exist_ok=True)

        # Calculate absolute errors.
        self.data["absolute error"] = np.NaN
        self.data.loc[:, "absolute error"] = np.absolute(self.data.loc[:, "$\Delta$pKa error (calc - exp)"])

        # Create a separate plot for each submission.
        for method_name in self.data.method_name.unique():
            data = self.data[self.data.method_name == method_name]
            title = '{}'.format(method_name)

            plt.close('all')
            barplot(df=data, x_label="Molecule ID", y_label="absolute error", title=title)
            method_name = name_to_filename(method_name)
            output_path = os.path.join(output_dir_path, '{}.pdf'.format(method_name))
            plt.savefig(output_path)

## Read Experimental Data
Reads in experimental data as a dataframe for the analysis.

In [18]:
with open(EXPERIMENTAL_DATA_FILE_PATH, 'r') as f:
    names = ('Molecule ID', 'pKa mean', 'pKa SEM')
    experimental_data = pd.read_csv(f, names=names, skiprows=1)

# Convert numeric values to dtype float.
for col in experimental_data.columns[1:7]:
    experimental_data[col] = pd.to_numeric(experimental_data[col], errors='coerce')


experimental_data.set_index("Molecule ID", inplace=True)
experimental_data["Molecule ID"] = experimental_data.index
print("Experimental data: \n", experimental_data)

# Import user map.
with open(USER_MAP_FILE_PATH, 'r') as f:
    user_map = pd.read_csv(f)

Experimental data: 
              pKa mean  pKa SEM Molecule ID
Molecule ID                               
SAMPL8-1         2.54     0.32    SAMPL8-1
SAMPL8-1         5.01     0.19    SAMPL8-1
SAMPL8-2         4.41     0.02    SAMPL8-2
SAMPL8-3         4.00     0.31    SAMPL8-3
SAMPL8-4         5.77     0.17    SAMPL8-4
SAMPL8-5         3.92     0.16    SAMPL8-5
SAMPL8-6         4.17     0.09    SAMPL8-6
SAMPL8-7         6.63     0.04    SAMPL8-7
SAMPL8-8         2.78     0.08    SAMPL8-8
SAMPL8-9         6.08     0.10    SAMPL8-9
SAMPL8-10        7.71     0.15   SAMPL8-10
SAMPL8-12        6.98     0.06   SAMPL8-12
SAMPL8-14        7.27     0.08   SAMPL8-14
SAMPL8-15        2.54     0.03   SAMPL8-15
SAMPL8-16        5.10     0.10   SAMPL8-16
SAMPL8-17        6.58     0.05   SAMPL8-17
SAMPL8-18        2.72     0.09   SAMPL8-18
SAMPL8-19        4.93     0.24   SAMPL8-19
SAMPL8-19        6.99     0.22   SAMPL8-19
SAMPL8-20        2.44     0.10   SAMPL8-20
SAMPL8-20       11.44     0.16   

# Working Area for class modification
These blocks are attempts to fix bugs in the code when creating data frame of both experimental and calculated pKa values introduced by variations in the number of macro pKas being calculated.

In [None]:
submissions_RFE = load_submissions(pKa_SUBMISSIONS_DIR_PATH, user_map)
microstates_store = open("microstates.pickle","wb")
pickle.dump(submissions_RFE,microstates_store)
microstates_store.close()

In [None]:
# Getting the Number of experimental pKas for each compound
no_pKas = pd.DataFrame(columns=['Molecule ID','No of pKa'])
for mol_ID,row in experimental_data.iterrows():
    no = len(experimental_data.loc[mol_ID, 'pKa mean'])
    no_pkas_series =
    no_pKas = pd.concat(no_pKas,)


In [19]:
submissions_RFE = load_submissions(pKa_SUBMISSIONS_DIR_PATH, user_map)
submissions_pKa, one_to_0transitions = submission_fix_and_convert(submissions_RFE)

/Users/aakankschit/Desktop/Mobley Lab/scratches and workthroughs/SAMPL8/pKa_analysis/submissions/pKa-3DS-1.csv
file_name: 
 pKa-3DS-1
<__main__.pKaSubmission object at 0x16918d660>
/Users/aakankschit/Desktop/Mobley Lab/scratches and workthroughs/SAMPL8/pKa_analysis/submissions/pKa-RobertRaddi_AdaBoost.csv
file_name: 
 pKa-RobertRaddi_AdaBoost
<__main__.pKaSubmission object at 0x168fbe050>
/Users/aakankschit/Desktop/Mobley Lab/scratches and workthroughs/SAMPL8/pKa_analysis/submissions/pKa-3DS-2.csv
file_name: 
 pKa-3DS-2
<__main__.pKaSubmission object at 0x1691fe7a0>
/Users/aakankschit/Desktop/Mobley Lab/scratches and workthroughs/SAMPL8/pKa_analysis/submissions/pKa-3DS-3.csv
file_name: 
 pKa-3DS-3
<__main__.pKaSubmission object at 0x1691ffd30>
/Users/aakankschit/Desktop/Mobley Lab/scratches and workthroughs/SAMPL8/pKa_analysis/submissions/pKa_SabatinoRodriguezPaluch_uESE.csv
file_name: 
 pKa_SabatinoRodriguezPaluch_uESE
<__main__.pKaSubmission object at 0x168fbe320>
/Users/aakankschit/

  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = clea

Macro pKa of SAMPL8-17 is [11.81378912]
Macro pKa of SAMPL8-18 is [5.25374826]
Macro pKa of SAMPL8-18 is [11.27951023]
Macro pKa of SAMPL8-19 is [3.43002472]
Macro pKa of SAMPL8-19 is [6.08619895]
Macro pKa of SAMPL8-19 is [6.74263373]
Macro pKa of SAMPL8-1 is [-0.80873812]
Macro pKa of SAMPL8-1 is [3.96612924]
Macro pKa of SAMPL8-20 is [0.27119818]
Macro pKa of SAMPL8-20 is [4.71423237]
Macro pKa of SAMPL8-20 is [10.15587409]
Macro pKa of SAMPL8-21 is [2.95502885]
Macro pKa of SAMPL8-21 is [7.81646087]
Macro pKa of SAMPL8-21 is [8.61039031]
Macro pKa of SAMPL8-22 is [-0.86839963]
Macro pKa of SAMPL8-22 is [5.51717064]
Macro pKa of SAMPL8-22 is [12.54003441]
Macro pKa of SAMPL8-23 is [4.47387971]
Macro pKa of SAMPL8-23 is [8.9259563]
Macro pKa of SAMPL8-2 is [4.27216139]
Macro pKa of SAMPL8-3 is [-1.04196089]
Macro pKa of SAMPL8-3 is [3.37357936]
Macro pKa of SAMPL8-4 is [2.48177678]
Macro pKa of SAMPL8-4 is [3.52071191]
Macro pKa of SAMPL8-5 is [-1.87491101]
Macro pKa of SAMPL8-5 is [

  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = clea


[-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0]
[-1, -1, -1, -1]
[-1, -1, 1, 1, 0]
[-1, -1, -1, 1, 1]
[-1]
[1, 1]
[-1]
Macro pKa of SAMPL8-10 is [5.34827604]
Macro pKa of SAMPL8-12 is [12.50000005]
Macro pKa of SAMPL8-14 is [8.18010091]
Macro pKa of SAMPL8-15 is [6.38632591]
Macro pKa of SAMPL8-16 is [7.23028383]
Macro pKa of SAMPL8-17 is [5.23897876]
Macro pKa of SAMPL8-18 is [3.98570435]
Macro pKa of SAMPL8-1 is [5.10032067]
Macro pKa of SAMPL8-21 is [2.93448911]
Macro pKa of SAMPL8-22 is [7.11928913]
Macro pKa of SAMPL8-23 is [3.66496882]
Macro pKa of SAMPL8-23 is [8.07287101]
Macro pKa of SAMPL8-2 is [7.24275356]
Macro pKa of SAMPL8-3 is [5.09854317]
Macro pKa of SAMPL8-4 is [3.59115035]
Macro pKa of SAMPL8-5 is [14.03147541]
Macro pKa of SAMPL8-5 is [0.30101275]
Macro pKa of SAMPL8-6 is [14.07019665]
Macro pKa of SAMPL8-6 is [-4.66299591e-06]
Macro pKa of SAMPL8-7 is [8.18271661]
Macro pKa of SAMPL8-8 is [5.78145761]
Macro pKa of SAMPL8-9 is [8.0442735]
[1, 0]
[2, 2, 2, 1, 1, 1, 

  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = clea

Macro pKa of SAMPL8-20 is [9.96563218]
Macro pKa of SAMPL8-21 is [1.28792841]
Macro pKa of SAMPL8-21 is [7.15518624]
Macro pKa of SAMPL8-21 is [8.1117398]
Macro pKa of SAMPL8-22 is [-3.33089001]
Macro pKa of SAMPL8-22 is [4.35828344]
Macro pKa of SAMPL8-22 is [12.50000007]
Macro pKa of SAMPL8-23 is [3.11861097]
Macro pKa of SAMPL8-23 is [8.49603754]
Macro pKa of SAMPL8-2 is [1.93320143]
Macro pKa of SAMPL8-3 is [-3.54851505]
Macro pKa of SAMPL8-3 is [1.77060528]
Macro pKa of SAMPL8-4 is [0.77325536]
Macro pKa of SAMPL8-4 is [1.89311031]
Macro pKa of SAMPL8-5 is [-4.50532885]
Macro pKa of SAMPL8-5 is [1.88219619]
Macro pKa of SAMPL8-6 is [1.8364024]
Macro pKa of SAMPL8-6 is [1.99628587]
Macro pKa of SAMPL8-7 is [7.02009583]
Macro pKa of SAMPL8-7 is [12.48581011]
Macro pKa of SAMPL8-8 is [1.56430879]
Macro pKa of SAMPL8-9 is [6.74249947]
Macro pKa of SAMPL8-9 is [12.50083299]
[1, 0]
[2, 2, 2, 1, 1, 1, 1, 1, 0, 0, 0, -1]
[2, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1

  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),



[1, 1, 0, 0, 0, 0, -1, -1, -2]
[1, 1, 0, 0, -1]
[1, 1, 1, 0, 0, 0, -1]
[2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, -1, -1]
[1, 1, 0, 0, 0, -1]
[2, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, -1]
[2, 2, 2, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -2, -2, -2, -2, -2]
[2, 1, 1, 1, 0, 0, 0, -1]
[1, 0, 0, 0, -1, -1]
[0, 0, -1]
[1, 1, 0, 0, 0, -1, -1, -2]
[1, 0, 0, 0, 0, 0, -1, -1, -1]
[1, 1, 0, 0, 0, -1]
[1, 0, 0, -1, -1]
[1, 1, 0, 0, -1]
[1, 1, 0]
[1, 1, 0, 0, -1]
Macro pKa of SAMPL8-10 is [9.14224759]
Macro pKa of SAMPL8-12 is [3.45485553]
Macro pKa of SAMPL8-12 is [8.8570101]
Macro pKa of SAMPL8-12 is [15.36219277]
Macro pKa of SAMPL8-14 is [4.20780645]
Macro pKa of SAMPL8-14 is [9.24956749]
Macro pKa of SAMPL8-14 is [10.34021741]
Macro pKa of SAMPL8-15 is [5.88071324]
Macro pKa of SAMPL8-16 is [7.11660623]
Macro pKa of SAMPL8-16 is [11.64450832]
Macro pKa of SAMPL8-17 is [7.72015774]
Macro pKa of SAMPL8-17 is [12.37595309]
Macro pKa o

  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = clea

Macro pKa of SAMPL8-21 is [9.22292988]
Macro pKa of SAMPL8-22 is [-0.0760354]
Macro pKa of SAMPL8-22 is [6.19667443]
Macro pKa of SAMPL8-22 is [12.50000007]
Macro pKa of SAMPL8-23 is [5.16609147]
Macro pKa of SAMPL8-23 is [9.53073791]
Macro pKa of SAMPL8-2 is [4.27216139]
Macro pKa of SAMPL8-3 is [-0.23316038]
Macro pKa of SAMPL8-3 is [4.0803568]
Macro pKa of SAMPL8-4 is [3.21036961]
Macro pKa of SAMPL8-4 is [4.23467603]
Macro pKa of SAMPL8-5 is [-1.06611214]
Macro pKa of SAMPL8-5 is [4.22844439]
Macro pKa of SAMPL8-6 is [4.05068257]
Macro pKa of SAMPL8-6 is [4.34334195]
Macro pKa of SAMPL8-7 is [8.2707501]
Macro pKa of SAMPL8-7 is [12.50018323]
Macro pKa of SAMPL8-8 is [3.85226339]
Macro pKa of SAMPL8-9 is [8.05080782]
Macro pKa of SAMPL8-9 is [12.50000006]
[1]
[-1, 0, 0]
[-2, 0, -2, -1, -1, 0, -2, -1, -1, -1, -2, -1, 0, -3, -2, 0, 0, -2, -1, -2, -1, -1, 0, 0, -1, -2, -1, -1, -1, 0, -2, -1, 0, 0]
[0, 0, 1, 1]
[1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0]
[-1, 1, 1, 0, -1]
[0, 0, 1, 1, -1]
[1, 2, 

  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),



[0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1]
[-2, -2, -1, -2, -2, -1, -1, 0, -1, -3, -1, -2, -2, -1, -2, -1, -2, -1, -1, -1, -2, -1, 0, -1, 0, -1, -1, 0, 0]
[-1, 0, 0]
[1, -1, 0, 0, -1, 1, 0]
[-1, 0, -1]
[-1, 0, -1, 0, -1, 0, -2, 0, -1, -1, -1, -2, -2, -2, -1, -1, -1, -1, -2, -1, -2]
[0, 1, -1, 0, -1, -1, 1, -1, -1, 0]
[0, -1, 1, 0, 0, 1, -1]
[-1, 1, 1, 0, 0, -1, 0, 0, -1]
[-1, 1, 1, 0]
[1, 1]
[-1, 1, 0, 1]
Macro pKa of SAMPL8-10 is [6.5796911]
Macro pKa of SAMPL8-12 is [8.]
Macro pKa of SAMPL8-14 is [13.60102037]
Macro pKa of SAMPL8-15 is [1.11499931]
Macro pKa of SAMPL8-16 is [3.69424517]
Macro pKa of SAMPL8-17 is [5.20254055]
Macro pKa of SAMPL8-17 is [18.09231025]
Macro pKa of SAMPL8-18 is [1.62490806]
Macro pKa of SAMPL8-18 is [13.60106287]
Macro pKa of SAMPL8-19 is [104.90874097]
Macro pKa of SAMPL8-19 is [3.15179999]
Macro pKa of SAMPL8-19 is [12.0094185]
Macro pKa of SAMPL8-1 is [7.61571483]
Macro pKa of SAMPL8-20 is [1.23172901]
Macro p

  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = clea


[-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -2, -2, 0]
[1, 1]
[1, 1, 1, 1, 1, 1, 1, 0]
[-1, -1]
[-1]
[-1, -1, -1, -1, -1, -1]
[-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -2, -2]
[-1, 0]
[-1, -1, 1, 1, 0]
[-1, 0]
[-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0]
[-1, -1, -1, -1]
[-1, 1, 1]
[-1, -1, 1, 1]
[-1]
[1, 1]
[-1]
Macro pKa of SAMPL8-10 is [9.21011024]
Macro pKa of SAMPL8-12 is [13.60102451]
Macro pKa of SAMPL8-14 is [7.33010748]
Macro pKa of SAMPL8-15 is [7.06233521]
Macro pKa of SAMPL8-16 is [8.67630441]
Macro pKa of SAMPL8-17 is [4.29173643]
Macro pKa of SAMPL8-18 is [5.02038446]
Macro pKa of SAMPL8-1 is [4.61029612]
Macro pKa of SAMPL8-21 is [6.30796306]
Macro pKa of SAMPL8-22 is [5.23546676]
Macro pKa of SAMPL8-23 is [4.17378815]
Macro pKa of SAMPL8-23 is [9.2247419]
Macro pKa of SAMPL8-2 is [11.39605422]
Macro pKa of SAMPL8-3 is [6.94375332]
Macro pKa of SAMPL8-4 is [2.43654049]
Macro pKa of SAMPL8-5 is [14.68225643]
Macro pKa of SAMPL8-5 is [4.4593

  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = clea

[-1, -1, -1, -1, -1, 0, 0, -1, 0, 0]
[0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1]
[-2, -2, -1, -2, -2, -1, -1, 0, -1, -3, -1, -2, -2, -1, -2, -1, -2, -1, -1, -1, -2, -1, 0, -1, 0, -1, -1, 0, 0]
[-1, 0, 0]
[1, -1, 0, 0, -1, 1, 0]
[-1, 0, -1]
[-1, 0, -1, 0, -1, 0, -2, 0, -1, -1, -1, -2, -2, -2, -1, -1, -1, -1, -2, -1, -2]
[0, 1, -1, 0, -1, -1, 1, -1, -1, 0]
[0, -1, 1, 0, 0, 1, -1]
[-1, 1, 1, 0, 0, -1, 0, 0, -1]
[-1, 1, 1, 0]
[1, 1]
[-1, 1, 0, 1]
Macro pKa of SAMPL8-10 is [5.67616762]
Macro pKa of SAMPL8-12 is [19.19258762]
Macro pKa of SAMPL8-14 is [11.5417867]
Macro pKa of SAMPL8-15 is [4.3043793]
Macro pKa of SAMPL8-16 is [4.74349822]
Macro pKa of SAMPL8-17 is [4.58266733]
Macro pKa of SAMPL8-17 is [12.38746694]
Macro pKa of SAMPL8-18 is [4.24108954]
Macro pKa of SAMPL8-18 is [9.63314461]
Macro pKa of SAMPL8-19 is [-0.88379753]
Macro pKa of SAMPL8-19 is [0.78463988]
Macro pKa of SAMPL8-19 is [11.15572025]
Macro pKa of SAMPL8-1 is [4.15683879]
Mac

  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = clea


[-1, -1, 1, 1, 0]
[-1, -1, 0]
[-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0]
[-1, -1, -1, -1]
[-1, -1, 1, 1, 0]
[-1, -1, -1, 1, 1]
[-1]
[1, 1]
[-1]
Macro pKa of SAMPL8-10 is [6.4703939]
Macro pKa of SAMPL8-12 is [12.50000007]
Macro pKa of SAMPL8-14 is [4.13087472]
Macro pKa of SAMPL8-15 is [5.60102128]
Macro pKa of SAMPL8-16 is [6.28347616]
Macro pKa of SAMPL8-17 is [4.58319548]
Macro pKa of SAMPL8-18 is [4.56133624]
Macro pKa of SAMPL8-1 is [4.6762747]
Macro pKa of SAMPL8-21 is [2.45674035]
Macro pKa of SAMPL8-22 is [7.840812]
Macro pKa of SAMPL8-23 is [1.74052878]
Macro pKa of SAMPL8-23 is [5.53777131]
Macro pKa of SAMPL8-2 is [4.9038]
Macro pKa of SAMPL8-3 is [4.75148315]
Macro pKa of SAMPL8-4 is [3.38989176]
Macro pKa of SAMPL8-5 is [9.56486346]
Macro pKa of SAMPL8-5 is [0.30101766]
Macro pKa of SAMPL8-6 is [13.9536087]
Macro pKa of SAMPL8-6 is [-1.13349829e-05]
Macro pKa of SAMPL8-7 is [7.63623064]
Macro pKa of SAMPL8-8 is [4.06700984]
Macro pKa of SAMPL8-9 is [8.0078411]
[1]
[-

  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = clea


[-1, 0, 0, 1, 1]
[-1, -1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2]
[-1, -1, -1, -1, -1, -1, 0, 0, 0, 0, 1]
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
[-3, -2, -2, -2, -2, -2, -2, -2, -2, -2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2]
[-1, 0, 0, 1, 1, 1, 2, 2]
[-1, -1, 0, 0, 0, 1, 1]
[-1, -1, 0]
[-2, -2, -2, -2, -2, -2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 0, 0, 0, 1]
[-1, -1, -1, -1, -1, 0, 0, 0, 1, 1]
[-1, -1, 0, 0, 0, 1, 1]
[-1, -1, -1, 0, 0, 0, 0, 1, 1]
[-1, 0, 1, 1]
[1, 1]
[-1, 0, 1, 1]
Macro pKa of SAMPL8-10 is [7.19904186]
Macro pKa of SAMPL8-12 is [-4.93476079]
Macro pKa of SAMPL8-12 is [4.96821752]
Macro pKa of SAMPL8-12 is [17.80086976]
Macro pKa of SAMPL8-14 is [-3.75053253]
Macro pKa of SAMPL8-14 is [5.45798019]
Macro pKa of SAMPL8-14 is [11.54904334]
Macro pKa of SAMPL8-15 is [1.45490249]
Macro pKa of SAMPL8-16 is [4.00756379]
Macro p

  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = clea

Macro pKa of SAMPL8-21 is [-5.40579715]
Macro pKa of SAMPL8-21 is [3.81420166]
Macro pKa of SAMPL8-21 is [8.27744085]
Macro pKa of SAMPL8-22 is [-7.61323562]
Macro pKa of SAMPL8-22 is [2.17980584]
Macro pKa of SAMPL8-22 is [13.60102492]
Macro pKa of SAMPL8-23 is [2.06935992]
Macro pKa of SAMPL8-23 is [10.12820696]
Macro pKa of SAMPL8-2 is [5.3264166]
Macro pKa of SAMPL8-3 is [-11.65108091]
Macro pKa of SAMPL8-3 is [4.21280519]
Macro pKa of SAMPL8-4 is [0.19924093]
Macro pKa of SAMPL8-4 is [4.64609187]
Macro pKa of SAMPL8-5 is [-2.63709507]
Macro pKa of SAMPL8-5 is [5.12178462]
Macro pKa of SAMPL8-6 is [2.8706697]
Macro pKa of SAMPL8-6 is [4.80199334]
Macro pKa of SAMPL8-7 is [4.15327191]
Macro pKa of SAMPL8-7 is [13.60102679]
Macro pKa of SAMPL8-8 is [-0.32789158]
Macro pKa of SAMPL8-9 is [5.64688678]
Macro pKa of SAMPL8-9 is [14.33992769]
[1]
[-1, 0, 0]
[-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -2, -2, 0]
[1, 1, 0, 0]
[1, 1, 1, 1, 1, 1, 1, 0]
[-1, -1]
[-1]
[-1, -1, 1, 1

  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = clea


[1, -1]
Macro pKa of SAMPL8-10 is [5.18068701]
Macro pKa of SAMPL8-12 is [5.22572768]
Macro pKa of SAMPL8-14 is [5.55610814]
Macro pKa of SAMPL8-14 is [8.05884646]
Macro pKa of SAMPL8-15 is [1.11483138]
Macro pKa of SAMPL8-16 is [3.67967221]
Macro pKa of SAMPL8-16 is [8.35759212]
Macro pKa of SAMPL8-17 is [5.53043803]
Macro pKa of SAMPL8-17 is [10.96615183]
Macro pKa of SAMPL8-18 is [1.10025842]
Macro pKa of SAMPL8-19 is [4.75251176]
Macro pKa of SAMPL8-19 is [6.95130156]
Macro pKa of SAMPL8-1 is [2.82715409]
Macro pKa of SAMPL8-20 is [0.96910179]
Macro pKa of SAMPL8-20 is [9.99705004]
Macro pKa of SAMPL8-21 is [0.88895051]
Macro pKa of SAMPL8-21 is [4.99123854]
Macro pKa of SAMPL8-21 is [6.87843676]
Macro pKa of SAMPL8-22 is [2.14222501]
Macro pKa of SAMPL8-23 is [2.52112195]
Macro pKa of SAMPL8-23 is [6.66712885]
Macro pKa of SAMPL8-2 is [2.54298139]
Macro pKa of SAMPL8-3 is [3.09675384]
Macro pKa of SAMPL8-4 is [0.74322092]
Macro pKa of SAMPL8-4 is [2.73242986]
Macro pKa of SAMPL8-

  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = clea

[1]
[-1, 0, 0, 2, 1, 2, 1, 2, 1]
[-2, 0, -2, -1, -1, 0, -2, -1, -1, -1, -2, -1, 0, -3, -2, 0, 0, -2, -1, -2, -1, -1, 0, 0, -1, -2, -1, -1, -1, 0, -2, -1, 0, 0, 2, 1, 2, 1, 2, 1, 1, 2, 1, 2, 1, 1]
[0, 0, 1, 1]
[1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0]
[-1, 1, 1, 0, -1]
[0, 0, 1, 1, -1]
[1, 2, 2, 1, 1, 1, 0, 1, 1, -1, -1, 2, 0, 0, 0, 1, 2, 1, 1, 0, 2, 1, 1, 0, 0]
[-1, -1, -1, -1, -1, 0, 0, -1, 0, 0, 1]
[0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1]
[-2, -2, -1, -2, -2, -1, -1, 0, -1, -3, -1, -2, -2, -1, -2, -1, -2, -1, -1, -1, -2, -1, 0, -1, 0, -1, -1, 0, 0, 2, 1, 2, 1, 1, 1, 2, 1, 1, 2, 1]
[-1, 0, 0, 2, 1, 2, 1, 1]
[1, -1, 0, 0, -1, 1, 0]
[-1, 0, -1]
[-1, 0, -1, 0, -1, 0, -2, 0, -1, -1, -1, -2, -2, -2, -1, -1, -1, -1, -2, -1, -2, 1]
[0, 1, -1, 0, -1, -1, 1, -1, -1, 0]
[0, -1, 1, 0, 0, 1, -1]
[-1, 1, 1, 0, 0, -1, 0, 0, -1]
[-1, 1, 1, 0]
[1, 1]
[-1, 1, 0, 1]
Macro pKa of SAMPL8-10 is [6.5796911]
Macro pKa of SAMPL8-12 is [104.90874097]
Macro pKa of SAMPL8-12 

  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = clea

Macro pKa of SAMPL8-6 is [5.89476201]
Macro pKa of SAMPL8-7 is [8.83121079]
Macro pKa of SAMPL8-7 is [22.6682406]
Macro pKa of SAMPL8-8 is [4.6123416]
Macro pKa of SAMPL8-9 is [5.93119271]
Macro pKa of SAMPL8-9 is [23.04713674]


  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
  cleared_sub = cleared_sub.append({"Molecule ID": str(pka.molecule),
