In [1]:
import numpy as np
import pandas as pd
import scipy.stats as ss


In [2]:
closest_collection = "typeIII_submission_collection_closest.csv"
hungarian_collection = "typeIII_submission_collection_hungarian.csv"

## How many predicted pKas are matched differently between closest and hungarian algorithms?

In [3]:
df_closest = pd.read_csv(closest_collection,index_col=0)
df_closest.head()

Unnamed: 0,$\Delta$pKa error (calc - exp),Molecule ID,name,pKa (calc),pKa (exp),pKa ID,pKa SEM (calc),pKa SEM (exp),participant,receipt_id
0,0.18,SM01,COSMOtherm_FINE17,9.71,9.53,SM01_pKa1,0.65,0.01,Chris Loschen,0hxtm
1,1.18,SM02,COSMOtherm_FINE17,6.21,5.03,SM02_pKa1,0.65,0.01,Chris Loschen,0hxtm
2,-6.39,SM03,COSMOtherm_FINE17,0.63,7.02,SM03_pKa1,0.65,0.01,Chris Loschen,0hxtm
3,-0.61,SM04,COSMOtherm_FINE17,5.41,6.02,SM04_pKa1,0.65,0.01,Chris Loschen,0hxtm
4,-0.06,SM05,COSMOtherm_FINE17,4.53,4.59,SM05_pKa1,0.65,0.01,Chris Loschen,0hxtm


In [4]:
df_hungarian = pd.read_csv(hungarian_collection, index_col=0)
df_hungarian.head()

Unnamed: 0,$\Delta$pKa error (calc - exp),Molecule ID,name,pKa (calc),pKa (exp),pKa ID,pKa SEM (calc),pKa SEM (exp),participant,receipt_id
0,0.18,SM01,COSMOtherm_FINE17,9.71,9.53,SM01_pKa1,0.65,0.01,Chris Loschen,0hxtm
1,1.18,SM02,COSMOtherm_FINE17,6.21,5.03,SM02_pKa1,0.65,0.01,Chris Loschen,0hxtm
2,-6.39,SM03,COSMOtherm_FINE17,0.63,7.02,SM03_pKa1,0.65,0.01,Chris Loschen,0hxtm
3,-0.61,SM04,COSMOtherm_FINE17,5.41,6.02,SM04_pKa1,0.65,0.01,Chris Loschen,0hxtm
4,-0.06,SM05,COSMOtherm_FINE17,4.53,4.59,SM05_pKa1,0.65,0.01,Chris Loschen,0hxtm


In [5]:
prediction_methods = set(df_closest["name"])
len(prediction_methods)

34

In [7]:
# Iterate through prediction methods and create a database that compares hunagarian and closest matching

matched_pKa_list = []

for method in prediction_methods:
#for method in ["Full quantum chemical calculation of free energies and fit to experimental pKa"]:
    
    submission_id = df_closest[df_closest["name"] == method]["receipt_id"].values[0]
    
    df_closest_1method = df_closest[df_closest["name"] == method].reset_index(drop=True)    
    df_hungarian_1method = df_hungarian[df_hungarian["name"] == method].reset_index(drop=True)
     
    pKa_IDs = list(df_closest_1method["pKa ID"])
  
    # Iterate through pKa_IDs to check if predicted pKas match
    for pKa_ID in pKa_IDs:
        pKa_exp = df_closest_1method[df_closest_1method["pKa ID"] == pKa_ID]["pKa (exp)"].values[0]
        pKa_pred_closest = df_closest_1method[df_closest_1method["pKa ID"] == pKa_ID]["pKa (calc)"].values[0]
        pKa_pred_hungarian = df_hungarian_1method[df_hungarian_1method["pKa ID"] == pKa_ID]["pKa (calc)"].values[0]
        closest_hungarian_diff = pKa_pred_closest - pKa_pred_hungarian
                
        matched_pKa_row = [pKa_ID, pKa_exp, pKa_pred_closest, pKa_pred_hungarian, closest_hungarian_diff, submission_id]
        matched_pKa_list.append(matched_pKa_row)
        #print(matched_pKa_row)

# Convert to pandas dataframe
df_compare_matching = pd.DataFrame(matched_pKa_list, columns = ['pKa ID', 'pKa (exp)', 
                                               'pKa (pred, closest)', 'pKa (pred, hungarian)',
                                              'closest - hungarian diff.', 'submission ID'])
df_compare_matching.head()

Unnamed: 0,pKa ID,pKa (exp),"pKa (pred, closest)","pKa (pred, hungarian)",closest - hungarian diff.,submission ID
0,SM01_pKa1,9.53,9.11,9.11,0.0,nb003
1,SM02_pKa1,5.03,3.88,3.88,0.0,nb003
2,SM03_pKa1,7.02,9.63,9.63,0.0,nb003
3,SM04_pKa1,6.02,4.53,4.53,0.0,nb003
4,SM05_pKa1,4.59,6.34,6.34,0.0,nb003


In [8]:
# Print out pKas that have different matching between hungarian and closest
df_difference_in_matching = df_compare_matching[df_compare_matching["closest - hungarian diff."] != 0]
df_difference_in_matching

Unnamed: 0,pKa ID,pKa (exp),"pKa (pred, closest)","pKa (pred, hungarian)",closest - hungarian diff.,submission ID
14,SM14_pKa1,2.58,3.94,0.14,3.8,nb003
15,SM14_pKa2,5.3,19.65,3.94,15.71,nb003
44,SM14_pKa1,2.58,3.38,-1.77,5.15,nb006
45,SM14_pKa2,5.3,24.63,3.38,21.25,nb006
52,SM18_pKa3,11.02,11.53,11.75,-0.22,nb006
144,SM18_pKa2,9.58,9.26,7.53,1.73,yqkga
166,SM14_pKa1,2.58,3.86,-0.9,4.76,nb005
167,SM14_pKa2,5.3,23.88,3.86,20.02,nb005
196,SM14_pKa1,2.58,3.74,-1.4,5.14,ds62k
197,SM14_pKa2,5.3,18.61,3.74,14.87,ds62k


In [9]:
# Why is nb006 SM14_pKa2 matched to a very different number?

# These are submitted SM14 predictions
# SM14, -1.77, 1.77
# SM14,  3.38, 1.77
# SM14, 24.63, 1.77


# SM14 Experimental values
# 2.58 ± 0.01
# 5.30 ± 0.01


df_nb006 = df_closest[df_closest['receipt_id']=='nb006']
df_nb006_SM14 = df_nb006[df_nb006['Molecule ID']=='SM14']
df_nb006_SM14

Unnamed: 0,$\Delta$pKa error (calc - exp),Molecule ID,name,pKa (calc),pKa (exp),pKa ID,pKa SEM (calc),pKa SEM (exp),participant,receipt_id
553,0.8,SM14,"EC-RISM/MP2/6-311+G(d,p)-P3NI-phi-all-1par",3.38,2.58,SM14_pKa1,1.77,0.01,Stefan Kast,nb006
554,19.33,SM14,"EC-RISM/MP2/6-311+G(d,p)-P3NI-phi-all-1par",24.63,5.3,SM14_pKa2,1.77,0.01,Stefan Kast,nb006


In [10]:
df_nb006 = df_hungarian[df_hungarian['receipt_id']=='nb006']
df_nb006_SM14 = df_nb006[df_nb006['Molecule ID']=='SM14']
df_nb006_SM14

Unnamed: 0,$\Delta$pKa error (calc - exp),Molecule ID,name,pKa (calc),pKa (exp),pKa ID,pKa SEM (calc),pKa SEM (exp),participant,receipt_id
568,-4.35,SM14,"EC-RISM/MP2/6-311+G(d,p)-P3NI-phi-all-1par",-1.77,2.58,SM14_pKa1,1.77,0.01,Stefan Kast,nb006
569,-1.92,SM14,"EC-RISM/MP2/6-311+G(d,p)-P3NI-phi-all-1par",3.38,5.3,SM14_pKa2,1.77,0.01,Stefan Kast,nb006


### Experimental pKas of molecules with pKas differently matched
SM06  
3.03 ± 0.04   
11.74 ± 0.01  

SM14  
2.58 ± 0.01   
5.30 ± 0.01  

SM18  
2.15 ± 0.02   
9.58 ± 0.03   
11.02 ± 0.04  

SM22  
2.40 ± 0.02  
7.43 ± 0.01  

### Experimental pKas of molecules with pKas equally matched even thought they have multiple pKas
SM15   
4.70 ± 0.01   
8.94 ± 0.01   

SM16  
5.37 ± 0.01   
10.65 ± 0.01  



## How many pKa predictions are matched without conserving the sequence with Hungarian method?

In [11]:
# Test for comparing rank orders - ORDERED MATCH
exp_pKas = np.array([2.4, 4.3, 7.0])
pred_pKas = np.array([2.5, 4.2, 7.2])

#exp_pKa_ranks = list(np.array([1, 3, 2]))
#pred_pKa_ranks = list(np.array([1, 3, 2]))

exp_pKa_ranks = ss.rankdata(exp_pKas)
print("exp ranks:", exp_pKa_ranks)
pred_pKa_ranks = ss.rankdata(pred_pKas)
print("pred ranks:", pred_pKa_ranks)

# Is rank order the same?
if list(exp_pKa_ranks) == list(pred_pKa_ranks):
    ordered_match = True
else:
    ordered_match = False
    
ordered_match

exp ranks: [ 1.  2.  3.]
pred ranks: [ 1.  2.  3.]


True

In [12]:
# Test for comparing rank orders - UNORDERED MATCH
exp_pKas = np.array([2.4, 4.3, 7.0])
pred_pKas = np.array([2.5, 7.0, 4.5])

#exp_pKa_ranks = list(np.array([1, 3, 2]))
#pred_pKa_ranks = list(np.array([1, 3, 2]))

exp_pKa_ranks = ss.rankdata(exp_pKas)
print("exp ranks:", exp_pKa_ranks)
pred_pKa_ranks = ss.rankdata(pred_pKas)
print("pred ranks:", pred_pKa_ranks)

# Is rank order the same?
if list(exp_pKa_ranks) == list(pred_pKa_ranks):
    ordered_match = True
else:
    ordered_match = False
    
ordered_match

exp ranks: [ 1.  2.  3.]
pred ranks: [ 1.  3.  2.]


False

In [13]:
df_hungarian.head()

Unnamed: 0,$\Delta$pKa error (calc - exp),Molecule ID,name,pKa (calc),pKa (exp),pKa ID,pKa SEM (calc),pKa SEM (exp),participant,receipt_id
0,0.18,SM01,COSMOtherm_FINE17,9.71,9.53,SM01_pKa1,0.65,0.01,Chris Loschen,0hxtm
1,1.18,SM02,COSMOtherm_FINE17,6.21,5.03,SM02_pKa1,0.65,0.01,Chris Loschen,0hxtm
2,-6.39,SM03,COSMOtherm_FINE17,0.63,7.02,SM03_pKa1,0.65,0.01,Chris Loschen,0hxtm
3,-0.61,SM04,COSMOtherm_FINE17,5.41,6.02,SM04_pKa1,0.65,0.01,Chris Loschen,0hxtm
4,-0.06,SM05,COSMOtherm_FINE17,4.53,4.59,SM05_pKa1,0.65,0.01,Chris Loschen,0hxtm


In [24]:
pKa_rank_comparison_list =[]

# Iterate through methods
for method in prediction_methods:
#for method in ["Full quantum chemical calculation of free energies and fit to experimental pKa"]:

    submission_ID = df_hungarian[df_hungarian["name"] == method]["receipt_id"].values[0]
    df_hungarian_1method = df_hungarian[df_hungarian["name"] == method].reset_index(drop=True)
     
    mol_IDs = list(df_hungarian_1method["Molecule ID"])

    #Iterate through molecules
    for mol_ID in mol_IDs:
        df_hungarian_1method_1mol = df_hungarian_1method[df_hungarian_1method["Molecule ID"] == mol_ID].reset_index(drop=True)
        
        pKa_IDs = df_hungarian_1method_1mol['pKa ID'].values
        
        #Assign Rank order of experimental and predicted pKa.
        exp_pKas = df_hungarian_1method_1mol['pKa (exp)'].values 
        exp_pKa_ranks = ss.rankdata(exp_pKas )
        
        
        pred_pKas = df_hungarian_1method_1mol['pKa (calc)'].values
        pred_pKa_ranks = ss.rankdata(pred_pKas) # rank is given to only matched pred pKas

    
        # Is rank order the same?
        if list(exp_pKa_ranks) == list(pred_pKa_ranks):
            ordered_match = True
        else:
            ordered_match = False

        
        pKa_rank_comparison_list.append([mol_ID, ordered_match, pKa_IDs, exp_pKas, pred_pKas, exp_pKa_ranks, pred_pKa_ranks, submission_ID])
    
    
# Convert to pandas dataframe
df_compare_ranks_hungarian = pd.DataFrame(pKa_rank_comparison_list, columns = ['mol ID', 'ordered match', 'pKa IDs', 'pKa (exp)', 
                                               'pKa (pred)', 'pKa rank (exp)', 'pKa rank (pred)', 'submission ID'])
df_compare_ranks_hungarian = df_compare_ranks_hungarian.astype(str)
df_compare_ranks_hungarian = df_compare_ranks_hungarian_str.drop_duplicates()
df_compare_ranks_hungarian

Unnamed: 0,mol ID,ordered match,pKa IDs,pKa (exp),pKa (pred),pKa rank (exp),pKa rank (pred),submission ID
0,SM01,True,['SM01_pKa1'],[ 9.53],[ 9.11],[ 1.],[ 1.],nb003
1,SM02,True,['SM02_pKa1'],[ 5.03],[ 3.88],[ 1.],[ 1.],nb003
2,SM03,True,['SM03_pKa1'],[ 7.02],[ 9.63],[ 1.],[ 1.],nb003
3,SM04,True,['SM04_pKa1'],[ 6.02],[ 4.53],[ 1.],[ 1.],nb003
4,SM05,True,['SM05_pKa1'],[ 4.59],[ 6.34],[ 1.],[ 1.],nb003
5,SM06,True,['SM06_pKa1' 'SM06_pKa2'],[ 3.03 11.74],[ 0.47 12.67],[ 1. 2.],[ 1. 2.],nb003
7,SM07,True,['SM07_pKa1'],[ 6.08],[ 2.67],[ 1.],[ 1.],nb003
8,SM08,True,['SM08_pKa1'],[ 4.22],[ 5.15],[ 1.],[ 1.],nb003
9,SM09,True,['SM09_pKa1'],[ 5.37],[ 4.66],[ 1.],[ 1.],nb003
10,SM10,True,['SM10_pKa1'],[ 9.02],[ 10.93],[ 1.],[ 1.],nb003


In [25]:
# Print out pKas that don't preserve increasing order when matched my Hungarian algorithm
df_unordered_matching_hungarian = df_compare_ranks_hungarian[df_compare_ranks_hungarian["ordered match"] == False]
df_unordered_matching_hungarian

Unnamed: 0,mol ID,ordered match,pKa IDs,pKa (exp),pKa (pred),pKa rank (exp),pKa rank (pred),submission ID


In [26]:
# Just SM18
df_unordered_matching_hungarian_SM18 = df_compare_ranks_hungarian[df_compare_ranks_hungarian["mol ID"] == 'SM18']
df_unordered_matching_hungarian_SM18

Unnamed: 0,mol ID,ordered match,pKa IDs,pKa (exp),pKa (pred),pKa rank (exp),pKa rank (pred),submission ID
21,SM18,True,['SM18_pKa1' 'SM18_pKa3' 'SM18_pKa2'],[ 2.15 11.02 9.58],[ 1.58 10.13 9.97],[ 1. 3. 2.],[ 1. 3. 2.],nb003
52,SM18,True,['SM18_pKa1' 'SM18_pKa3' 'SM18_pKa2'],[ 2.15 11.02 9.58],[ 0.18 11.75 11.53],[ 1. 3. 2.],[ 1. 3. 2.],nb006
83,SM18,True,['SM18_pKa1' 'SM18_pKa2' 'SM18_pKa3'],[ 2.15 9.58 11.02],[ 2.2 9.92 10.42],[ 1. 2. 3.],[ 1. 2. 3.],mkhqa
114,SM18,True,['SM18_pKa1' 'SM18_pKa2' 'SM18_pKa3'],[ 2.15 9.58 11.02],[ 2.48 9.38 10.5 ],[ 1. 2. 3.],[ 1. 2. 3.],nb007
145,SM18,True,['SM18_pKa1' 'SM18_pKa2' 'SM18_pKa3'],[ 2.15 9.58 11.02],[ 2.14 7.53 9.26],[ 1. 2. 3.],[ 1. 2. 3.],yqkga
176,SM18,True,['SM18_pKa1' 'SM18_pKa3' 'SM18_pKa2'],[ 2.15 11.02 9.58],[ 0.57 11.46 10.57],[ 1. 3. 2.],[ 1. 3. 2.],nb005
207,SM18,True,['SM18_pKa1' 'SM18_pKa2' 'SM18_pKa3'],[ 2.15 9.58 11.02],[ 0.95 9.43 12.75],[ 1. 2. 3.],[ 1. 2. 3.],ds62k
242,SM18,True,['SM18_pKa1' 'SM18_pKa2' 'SM18_pKa3'],[ 2.15 9.58 11.02],[ 2.29 9.71 10.61],[ 1. 2. 3.],[ 1. 2. 3.],37xm8
272,SM18,True,['SM18_pKa1' 'SM18_pKa2' 'SM18_pKa3'],[ 2.15 9.58 11.02],[ -0.26 5. 10.98],[ 1. 2. 3.],[ 1. 2. 3.],hytjn
303,SM18,True,['SM18_pKa1' 'SM18_pKa3' 'SM18_pKa2'],[ 2.15 11.02 9.58],[ 1.81 9.91 9.24],[ 1. 3. 2.],[ 1. 3. 2.],nb002


In [27]:
# SM18 prediction of 0hxtm method was also matched in an order.
df_unordered_matching_hungarian_SM18_0hxtm = df_unordered_matching_hungarian_SM18[df_compare_ranks_hungarian["submission ID"] == '0hxtm']
df_unordered_matching_hungarian_SM18_0hxtm

  


Unnamed: 0,mol ID,ordered match,pKa IDs,pKa (exp),pKa (pred),pKa rank (exp),pKa rank (pred),submission ID
945,SM18,True,['SM18_pKa2' 'SM18_pKa1'],[ 9.58 2.15],[ 1.84 0.5 ],[ 2. 1.],[ 2. 1.],0hxtm


There isn't any matches out of order in this set.

### Was there a out of order match in the past for Hungarian matching? Is it random for SM18 in 0hxtm submission?

In [29]:
# SAMPL6 repository branch pKa_typeIII_analysis3_hungarian
# https://github.com/MobleyLab/SAMPL6/blob/pKa_typeIII_analysis3_hungarian/physical_properties/pKa/analysis/analysis_of_typeIII_predictions/analysis_outputs_hungarian/typeIII_submission_collection.csv
hungarian_collection_a3 = 'typeIII_submission_collection_hungarian_analysis3.csv'
df_hungarian = pd.read_csv(hungarian_collection_a3, index_col=0)


pKa_rank_comparison_list =[]

# Iterate through methods
for method in prediction_methods:
#for method in ["Full quantum chemical calculation of free energies and fit to experimental pKa"]:

    submission_ID = df_hungarian[df_hungarian["name"] == method]["receipt_id"].values[0]
    df_hungarian_1method = df_hungarian[df_hungarian["name"] == method].reset_index(drop=True)
     
    mol_IDs = list(df_hungarian_1method["Molecule ID"])

    #Iterate through molecules
    for mol_ID in mol_IDs:
        df_hungarian_1method_1mol = df_hungarian_1method[df_hungarian_1method["Molecule ID"] == mol_ID].reset_index(drop=True)
        
        pKa_IDs = df_hungarian_1method_1mol['pKa ID'].values
        
        #Assign Rank order of experimental and predicted pKa.
        exp_pKas = df_hungarian_1method_1mol['pKa (exp)'].values 
        exp_pKa_ranks = ss.rankdata(exp_pKas )
        
        
        pred_pKas = df_hungarian_1method_1mol['pKa (calc)'].values
        pred_pKa_ranks = ss.rankdata(pred_pKas) # rank is given to only matched pred pKas

    
        # Is rank order the same?
        if list(exp_pKa_ranks) == list(pred_pKa_ranks):
            ordered_match = True
        else:
            ordered_match = False

        
        pKa_rank_comparison_list.append([mol_ID, ordered_match, pKa_IDs, exp_pKas, pred_pKas, exp_pKa_ranks, pred_pKa_ranks, submission_ID])
    
    
# Convert to pandas dataframe
df_compare_ranks_hungarian = pd.DataFrame(pKa_rank_comparison_list, columns = ['mol ID', 'ordered match', 'pKa IDs', 'pKa (exp)', 
                                               'pKa (pred)', 'pKa rank (exp)', 'pKa rank (pred)', 'submission ID'])

# Print out pKas that don't preserve increasing order when matched my Hungarian algorithm
df_unordered_matching_hungarian = df_compare_ranks_hungarian[df_compare_ranks_hungarian["ordered match"] == False]
df_unordered_matching_hungarian = df_unordered_matching_hungarian.astype(str)
df_unordered_matching_hungarian = df_unordered_matching_hungarian.drop_duplicates()
df_unordered_matching_hungarian

Unnamed: 0,mol ID,ordered match,pKa IDs,pKa (exp),pKa (pred),pKa rank (exp),pKa rank (pred),submission ID
145,SM18,False,['SM18_pKa1' 'SM18_pKa3' 'SM18_pKa2'],[ 2.15 11.02 9.58],[ 2.14 7.53 9.26],[ 1. 3. 2.],[ 1. 2. 3.],yqkga
334,SM18,False,['SM18_pKa3' 'SM18_pKa2' 'SM18_pKa1'],[ 11.02 9.58 2.15],[ 7.59 9.51 1.41],[ 3. 2. 1.],[ 2. 3. 1.],ryzue
574,SM18,False,['SM18_pKa1' 'SM18_pKa3' 'SM18_pKa2'],[ 2.15 11.02 9.58],[ 4.29 6.6 7.43],[ 1. 3. 2.],[ 1. 2. 3.],yc70m
945,SM18,False,['SM18_pKa1' 'SM18_pKa2'],[ 2.15 9.58],[ 1.84 0.5 ],[ 1. 2.],[ 2. 1.],0hxtm


In [30]:
# SAMPL6 repository branch pKa_typeIII_analysis5_hungarian
# https://github.com/MobleyLab/SAMPL6/blob/pKa_typeIII_analysis5/physical_properties/pKa/analysis/analysis_of_typeIII_predictions/analysis_outputs_hungarian/typeIII_submission_collection.csv
hungarian_collection_a5 = 'typeIII_submission_collection_hungarian_analysis5.csv'
df_hungarian = pd.read_csv(hungarian_collection_a5, index_col=0)


pKa_rank_comparison_list =[]

# Iterate through methods
for method in prediction_methods:
#for method in ["Full quantum chemical calculation of free energies and fit to experimental pKa"]:

    submission_ID = df_hungarian[df_hungarian["name"] == method]["receipt_id"].values[0]
    df_hungarian_1method = df_hungarian[df_hungarian["name"] == method].reset_index(drop=True)
     
    mol_IDs = list(df_hungarian_1method["Molecule ID"])

    #Iterate through molecules
    for mol_ID in mol_IDs:
        df_hungarian_1method_1mol = df_hungarian_1method[df_hungarian_1method["Molecule ID"] == mol_ID].reset_index(drop=True)
        
        pKa_IDs = df_hungarian_1method_1mol['pKa ID'].values
        
        #Assign Rank order of experimental and predicted pKa.
        exp_pKas = df_hungarian_1method_1mol['pKa (exp)'].values 
        exp_pKa_ranks = ss.rankdata(exp_pKas )
        
        
        pred_pKas = df_hungarian_1method_1mol['pKa (calc)'].values
        pred_pKa_ranks = ss.rankdata(pred_pKas) # rank is given to only matched pred pKas

    
        # Is rank order the same?
        if list(exp_pKa_ranks) == list(pred_pKa_ranks):
            ordered_match = True
        else:
            ordered_match = False

        
        pKa_rank_comparison_list.append([mol_ID, ordered_match, pKa_IDs, exp_pKas, pred_pKas, exp_pKa_ranks, pred_pKa_ranks, submission_ID])
    
    
# Convert to pandas dataframe
df_compare_ranks_hungarian = pd.DataFrame(pKa_rank_comparison_list, columns = ['mol ID', 'ordered match', 'pKa IDs', 'pKa (exp)', 
                                               'pKa (pred)', 'pKa rank (exp)', 'pKa rank (pred)', 'submission ID'])

# Print out pKas that don't preserve increasing order when matched my Hungarian algorithm
df_unordered_matching_hungarian = df_compare_ranks_hungarian[df_compare_ranks_hungarian["ordered match"] == False]
df_unordered_matching_hungarian

Unnamed: 0,mol ID,ordered match,pKa IDs,pKa (exp),pKa (pred),pKa rank (exp),pKa rank (pred),submission ID


In [31]:
# Just SM18
df_unordered_matching_hungarian_SM18 = df_compare_ranks_hungarian[df_compare_ranks_hungarian["mol ID"] == 'SM18']

# SM18 prediction of yqkga method 
df_unordered_matching_hungarian_SM18_yqkga = df_unordered_matching_hungarian_SM18[df_compare_ranks_hungarian["submission ID"] == 'yqkga']
df_unordered_matching_hungarian_SM18_yqkga

  """


Unnamed: 0,mol ID,ordered match,pKa IDs,pKa (exp),pKa (pred),pKa rank (exp),pKa rank (pred),submission ID
145,SM18,True,"[SM18_pKa1, SM18_pKa2, SM18_pKa3]","[2.15, 9.58, 11.02]","[2.14, 7.53, 9.26]","[1.0, 2.0, 3.0]","[1.0, 2.0, 3.0]",yqkga
146,SM18,True,"[SM18_pKa1, SM18_pKa2, SM18_pKa3]","[2.15, 9.58, 11.02]","[2.14, 7.53, 9.26]","[1.0, 2.0, 3.0]","[1.0, 2.0, 3.0]",yqkga
147,SM18,True,"[SM18_pKa1, SM18_pKa2, SM18_pKa3]","[2.15, 9.58, 11.02]","[2.14, 7.53, 9.26]","[1.0, 2.0, 3.0]","[1.0, 2.0, 3.0]",yqkga


Hungarian matching algorithm doesn't always make matches that break the natural order of pKa values.
Only in cases where the order preserving match and the unordered match have the same cost value, then the results are random.

The hungarian collection set of branch `pKa_typeIII_analysis5` (commit b1bef28) doesn't have any unordered matches.  
The hungarian collection set of branch `pKa_typeIII_analysis3` (commit 70d828e) has unordered matched for SM18 pKas for the following submission files:
0hxtm, yqkga, ryzue, yc70m
