In [3]:
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
from math import sqrt

def calculate_metrics(true_values, pred_values):
    mse = round(mean_squared_error(true_values, pred_values),3)
    mae = round(mean_absolute_error(true_values, pred_values),3)
    r_score = round(r2_score(true_values, pred_values),3)

    return {"mse": mse,
            "mae": mae,
            "r^2": r_score,}

In [4]:
import pandas as pd

test_acid = r'C:\work\DrugDiscovery\main_git\XAI_Chem\data\pKa_basicity_data\gnn_cv_canon_smiles\test_acid.csv'
test_basic = r'C:\work\DrugDiscovery\main_git\XAI_Chem\data\pKa_basicity_data\gnn_cv_canon_smiles\test_basic.csv'

test_acid_df = pd.read_csv(test_acid, index_col=0)
test_basic_df = pd.read_csv(test_basic, index_col=0)

test_df = pd.concat([test_acid_df, test_basic_df], axis=0)

In [5]:
for index, row in test_df.iterrows():
    SMILES = row['Smiles']
    print(SMILES, row['pKa'])

O=C(O)C1CC12CC2(F)F 3.85
O=C(O)C12CC(CO1)C2 3.59
O=C(O)C1CCCC(F)(F)C1 4.18
O=C(O)[C@@H]1C[C@@H]2C[C@@H]2C1 4.24
O=C(O)C1CC2(CC(F)C2)C1 4.39
O=C(O)C1CC12CC2 4.61
O=C(O)CCCC(F)F 4.38
FC(F)(F)C12CCC(CN1)C2 6.23
FC(F)(F)C1CCCN1 6.06
FC(F)(F)C1CCCCN1 6.41
NC1CC12CC2(F)F 7.44
NC1CCCC1(F)F 7.54
FC1(F)CCNCC1 7.75
FC1(F)C2CCC1CNC2 7.69
NC1CCC(F)(F)C1 8.83
FC1CNC1 8.55
N[C@@H]1CC[C@H]1C(F)F 8.98
C[C@@H]1C[C@@H]1N 9.15
FCC1CCCCN1 9.52
FCC1CCCN1 9.55
FCC1CCN1 9.67
NC1CC2(C1)CC(F)C2 9.81
FC(F)(F)C1CCNCC1 9.6
CCCCN 10.5
CC12CCC(CC1)CN2 11.27
CC12CCCC(CN1)C2 10.99


In [6]:
predicted_using_YANG = {
    "O=C(O)C1CC12CC2(F)F": 3.41,
    "O=C(O)C12CC(CO1)C2": 3.59,
    "O=C(O)C1CCCC(F)(F)C1": 4.13,
    "O=C(O)[C@@H]1C[C@@H]2C[C@@H]2C1": 4.9,
    "O=C(O)C1CC2(CC(F)C2)C1": 4.23,
    "O=C(O)C1CC12CC2": 4.53,
    "O=C(O)CCCC(F)F": 3.76,
    "FC(F)(F)C12CCC(CN1)C2": 10.72,
    "FC(F)(F)C1CCCN1": 9.87,
    "FC(F)(F)C1CCCCN1": 10.2,
    "NC1CC12CC2(F)F": 10.24,
    "NC1CCCC1(F)F": 9.22,
    "FC1(F)CCNCC1": 10.69,
    "FC1(F)C2CCC1CNC2": 10.43,
    "NC1CCC(F)(F)C1": 11.12,
    "FC1CNC1": 12.77,
    "N[C@@H]1CC[C@H]1C(F)F": 10.54,
    "C[C@@H]1C[C@@H]1N": 15.53,
    "FCC1CCCCN1": 11.93,
    "FCC1CCCN1": 11.65,
    "FCC1CCN1": 12.04,
    "NC1CC2(C1)CC(F)C2": 13.88,
    "FC(F)(F)C1CCNCC1": 10.45,
    "CCCCN": 15.88,
    "CC12CCC(CC1)CN2": 14.11,
    "CC12CCCC(CN1)C2": 13.61
}

In [None]:
predicted_using_YANG = {
    "O=C(O)C1CC12CC2(F)F": 3.41,
    "O=C(O)C12CC(CO1)C2": 3.59,
    "O=C(O)C1CCCC(F)(F)C1": 4.13,
    "O=C(O)[C@@H]1C[C@@H]2C[C@@H]2C1": 4.9,
    "O=C(O)C1CC2(CC(F)C2)C1": 4.23,
    "O=C(O)C1CC12CC2": 4.53,
    "O=C(O)CCCC(F)F": 3.76,
    "FC(F)(F)C12CCC(CN1)C2": 10.72,
    "FC(F)(F)C1CCCN1": 9.87,
    "FC(F)(F)C1CCCCN1": 10.2,
    "NC1CC12CC2(F)F": 10.24,
    "NC1CCCC1(F)F": 9.22,
    "FC1(F)CCNCC1": 10.69,
    "FC1(F)C2CCC1CNC2": 10.43,
    "NC1CCC(F)(F)C1": 11.12,
    "FC1CNC1": 12.77,
    "N[C@@H]1CC[C@H]1C(F)F": 10.54,
    "C[C@@H]1C[C@@H]1N": 15.53,
    "FCC1CCCCN1": 11.93,
    "FCC1CCCN1": 11.65,
    "FCC1CCN1": 12.04,
    "NC1CC2(C1)CC(F)C2": 13.88,
    "FC(F)(F)C1CCNCC1": 10.45,
    "CCCCN": 15.88,
    "CC12CCC(CC1)CN2": 14.11,
    "CC12CCCC(CN1)C2": 13.61
}

In [7]:
predicted_values = list(predicted_using_YANG.values())
true_values = test_df['pKa'].tolist()

calculate_metrics(true_values=true_values,
                  pred_values=predicted_values)

{'mse': 8.465, 'mae': 2.36, 'r^2': -0.478}