In [2]:
import pandas as pd
import numpy as np
from openff.toolkit.topology import Molecule
from openff.units import unit
import seaborn as sb
from scipy import stats
import matplotlib.pyplot as plt
from rdkit.Chem import Draw
from chargecraft.storage.storage import MoleculePropStore
from rdkit.Chem import AllChem

def equalityline(x, y, ax = None, **kwargs):
    x0, x1 = min(x), max(x)
    y0, y1 = min(y), max(y)
    lims = [min(x0, y0), max(x1, y1)]
    ax = ax or plt.gca()
    ax.plot(lims, lims, **kwargs)

def m(x, y, ax = None, **kwargs):
    
    x = np.array(x)
    y = np.array(y)
    ax = ax or plt.gca()
    m =(np.sum((x - y)**2, axis=0)/(y.shape[0]))**0.5
    ax.annotate(f'$RMSE= {float(m):.2f}$',
                xy=(.01, .99), xycoords=ax.transAxes, fontsize=8,
                color='darkred', backgroundcolor='#FFFFFF99', ha='left', va='top')   
    
def kde(x, y, ax = None, **kwargs):
    if isinstance(x, np.ndarray) and isinstance(y, np.ndarray):
        x = np.array(x)
        y = np.array(y)
    ax = ax or plt.gca()
    kernel = stats.gaussian_kde(x)(y)
    ax.scatter(x, y, c=kernel, s=5)

In [3]:
parquet = "/mnt/storage/nobackup/nca121/paper_charge_comparisons/async_chargecraft_more_workers/compare_fda_drugs/fda_drugs_comparison.parquet"
df = pd.read_parquet(parquet)
df

Unnamed: 0,am1bcc_charges,am1bcc_dipole,mol_id,charge_model_charges,charge_model_dipoles,charge_model_esp,charge_model_esp_rmse,dipole_model_charges,dipole_model_dipoles,dipole_model_esp,...,esp_model_charges,esp_model_dipoles,esp_model_esp,esp_model_esp_rmse,molecule,grid,geometry,conformer_no,smiles,energy
0,"[-0.11733333333333335, -0.12633333333333335, 0...",0.984904,a3a8dbef357823e19effae02679136f0a73fb43aacfed7...,"[-0.2393501102924347, -0.449552059173584, 0.84...",1.377313,,,"[-0.22086648643016815, -0.48414021730422974, 0...",1.019288,,...,"[-0.23057658970355988, -0.4389609694480896, 0....",1.055032,,,[C:1]([C:2]([C:3](=[O:4])[O:5][H:12])([H:10])[...,,"[0.4816105356331677, -0.28031156018474457, 0.4...",,[H][O][C](=[O])[C]([H])([H])[C]([H])([H])[C]([...,
1,"[-0.09209996666666667, 0.2392000333333333, -0....",0.571263,034077fddb72c16e12a0fde8979346928e5ec29ff061ec...,"[-0.4929315745830536, 0.23280927538871765, -0....",0.927918,,,"[-0.5102951526641846, 0.19728167355060577, -0....",0.566691,,...,"[-0.49134203791618347, 0.17633940279483795, -0...",0.521898,,,[C:1]([C:2]1([H:17])[O:3][C@:4]2([C:5]([H:18])...,,"[-3.5563456356455, 1.1691259459312888, -0.1504...",,[H][C]([H])([H])[C]1([H])[O][C@@]2([C]([H])([H...,
2,"[-0.6180428285714286, 0.6747571714285714, -0.6...",2.633033,9dbe361af618d32b3f2a51c35cea9f9c2b22e862a84d8f...,"[-0.6046799421310425, 0.7969401478767395, -0.9...",2.528747,,,"[-0.5585890412330627, 0.7985678911209106, -0.9...",2.202408,,...,"[-0.5473326444625854, 0.7788166403770447, -0.9...",2.396384,,,[O:1]=[C:2]([N:3]([H:19])[H:20])[c:4]1[c:5]([H...,,"[4.044568506287112, 1.0481069132318785, 2.4300...",,[H][c]1[c]([H])[c]2[c]([c]([H])[c]1[C](=[O])[N...,
3,"[-0.5880333, 0.6827667, -0.5083333, 0.4823667,...",4.299979,46cd1148c7e685071c9847476b5073e05ff62b2e930ec8...,"[-0.5516740679740906, 0.7629846930503845, -0.5...",3.759381,,,"[-0.49918457865715027, 0.7404887676239014, -0....",4.803611,,...,"[-0.49682196974754333, 0.7442572712898254, -0....",3.413103,,,[O:1]=[C:2]([N:3]([C:4]1=[N:5][C:6]([H:23])=[C...,,"[1.4347978321719217, 1.1291519594696937, -0.67...",,[H][C]1=[C]([N+](=[O])[O-])[S][C]([N]([H])[C](...,
4,"[-0.15095454545454545, -0.09395454545454547, -...",2.740803,8be5e491a7ff7516c5a8900cb1ff02255ba0a92c2ffd9d...,"[-0.17944848537445068, -0.08289087563753128, -...",2.612085,,,"[-0.16700813174247742, -0.08650042116641998, -...",2.367650,,...,"[-0.17906750738620758, -0.09176415950059891, -...",2.258413,,,[c:1]1([H:15])[c:2]([H:16])[c:3]([H:17])[c:4]2...,,"[-2.3019895628857534, 1.66089883668639, 0.0488...",,[H][c]1[c]([H])[c]([H])[c]2[c]([c]1[H])[O][N]=...,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
57,"[0.10972777777777779, -0.4225722222222222, 0.1...",2.683916,554332c337858c9e70056fbd6d90796cc6faf047c03b76...,"[-0.10309357941150665, -0.3437732458114624, 0....",2.486548,,,"[-0.08993108570575714, -0.2598826289176941, -0...",2.319923,,...,"[-0.10741200298070908, -0.25548937916755676, 0...",2.211292,,,[C:1]([O:2][C:3]([C@:4]([C:5]([N:6]([C:7]([c:8...,,"[-3.6950358125560223, -1.7242061778198485, -1....",,[H][c]1[c]([H])[c]([H])[c]([C]([H])([H])[N]([H...,
58,"[-0.09064, -0.06573999999999999, -0.09064, -0....",1.642161,349f896ddb9b893655d5f1729720d4a6bc6fe8a928b259...,"[-0.4440229833126068, 0.011362901888787746, -0...",1.913229,,,"[-0.45204710960388184, -0.0014575039967894554,...",1.683096,,...,"[-0.44885510206222534, 0.0028831232339143753, ...",1.686971,,,[C:1]([C:2]([C:3]([H:28])([H:29])[H:30])([C:4]...,,"[4.75571580990446, -1.2370841936522374, 1.1989...",,[H][c]1[c]([O][C]([H])([H])[H])[c]([O][C]([H])...,
59,"[0.1521000188679245, -0.7265999811320755, 0.15...",2.931095,0b300d8c6297dff5414f633b0f016907ab12caa28221f7...,"[-0.2017621397972107, -0.3542194366455078, -0....",3.909910,,,"[-0.1846119910478592, -0.28930678963661194, 0....",2.985963,,...,"[-0.1945943385362625, -0.2557322382926941, -0....",3.419286,,,[C:1]([N:2]1[C:3]([H:31])([H:32])[C:4]([H:33])...,,"[-4.490170826540723, -0.40121464383261407, 2.3...",,[H][C]1=[C]([C]([H])([H])[C@@]2([H])[N]([C]([H...,
60,"[-0.06503999999999999, -0.18103999999999998, 0...",1.892761,af130454564646da0906cc0f3daedda919ec4f1ce6768c...,"[-0.02229616791009903, -0.33492422103881836, 0...",1.616636,,,"[-0.024567941203713417, -0.31594064831733704, ...",1.610689,,...,"[-0.02720537781715393, -0.3451615273952484, 0....",1.393959,,,[c:1]1([H:18])[c:2]([H:19])[c:3]([F:17])[c:4](...,,"[3.7469412265995903, 0.9811082713921565, 0.024...",,[H][C]1=[C]([C](=[O])[N]([H])[H])[N]=[N][N]1[C...,
