In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import pytorch_lightning as pl
import numpy as np
import pandas as pd
import torch

##  Datasets
### Dataset

* One-hot-encoding atoms: types = {'H': 0, 'C': 1, 'N': 2, 'O': 3, 'F': 4}

* Atomic number

* Position in xyz 

* Type of hybridization 

* Types of bounds



### Label Description

|Index | Name | Units | Description|
 |:-----|-------|-------|-----------:|
  |0  |index  |   -            |Consecutive, 1-based integer identifier of molecule|
  |1  |mu     |   Debye        |Dipole moment|
  |2  |alpha  |   Bohr^3       |Isotropic polarizability|
  |3  |homo   |   Hartree      |Energy of Highest occupied molecular orbital (HOMO)|
  |4  |lumo   |   Hartree      |Energy of Lowest unoccupied molecular orbital (LUMO)|
 |5 | gap   |    Hartree     | Gap, difference between LUMO and HOMO|
 |6 | r2    |    Bohr^2      | Electronic spatial extent|
 |7 | zpve  |    Hartree     | Zero point vibrational energy|
 |8 | U0    |    Hartree     | Internal energy at 0 K|
 |9 | U     |    Hartree     | Internal energy at 298.15 K|
 |10 | H     |    Hartree     | Enthalpy at 298.15 K|
 |11 | G     |    Hartree     | Free energy at 298.15 K|
 |12 | Cv    |    cal/(mol K) | Heat capacity at 298.15 K|

In [3]:
targets = ["mu", "alpha", "homo", "lumo", "gap", "r2", "ZPVE", "U0", "U", "H", "G", "Cv"]

## Calculate MEA based on median

In [4]:
from shared import QM9MainDatamodule

datamodule = QM9MainDatamodule()
datamodule.setup()

In [5]:
from shared import calculate_metrics
ys =  datamodule.train_set.data.y
ys = (ys - ys.mean(axis=0))/ ys.std(axis=0)
ms = ys.median(axis=0).values

maes, std_maes, _, std_mae = calculate_metrics(ms, ys)
print("std. MAE in % based on median:", std_mae * 100)

std. MAE in % based on median: 78.12502980232239


In [7]:
medians = pd.DataFrame({"target":targets, "always_median_model":maes})

In [8]:
from ipynb.fs.full.train_dimenet import GraphQM9Datamodule, DimeNet
from ipynb.fs.full.train_schnet import SchNetModule
from ipynb.fs.full.train_ff import FeedforwadModule, FlattenQM9Datamodule

def get_dict_with_prefix(d, prefix):
    return {k[len(prefix):]: v for k, v in d.items() if k.startswith(prefix)}

In [None]:
path = "/home/azapala/quantum_mechanics_of_molecules/lightning_logs/schnet/with_gradient_clip/checkpoints/schnet-epoch199-val_loss0.00.ckpt"
logger = pl.loggers.tensorboard.TensorBoardLogger("./lightning_logs/", name='schnet', version='with_gradient_clip')

datamodule = GraphQM9Datamodule()
model = SchNetModule.load_from_checkpoint(path)
trainer = pl.Trainer(gpus=[1], logger=logger)
res = trainer.test(model, datamodule)

schnet = pd.DataFrame.from_dict(get_dict_with_prefix(res[0], "maes/"), orient="index",columns=["%maes_shcnet"])

In [10]:
import torch.nn.functional as F
from shared import Module

class DimeNetModule(Module):
    def __init__(self, **kwargs):
        super().__init__()
        kwargs.pop('lr',0)
        self.net = DimeNet(**kwargs)
        
    def step(self, batch, batch_idx):
        z, pos, y, g_batch = batch.z, batch.pos, batch.y, batch.batch
        output = self.net(z, pos, g_batch)
        loss = F.l1_loss(output, y)
        return loss, output.detach(), y.detach()

In [None]:
path = "/home/azapala/quantum_mechanics_of_molecules/lightning_logs/dimenet/with_gradient_clip/checkpoints/dimenet-epoch53-val_loss0.00.ckpt"
logger = pl.loggers.tensorboard.TensorBoardLogger("./lightning_logs/", name='dimenet', version='with_gradient_clip')

datamodule = GraphQM9Datamodule()
model = DimeNetModule.load_from_checkpoint(path)
trainer = pl.Trainer(gpus=[1], logger=logger)
res = trainer.test(model, datamodule)

dimenet = pd.DataFrame.from_dict(get_dict_with_prefix(res[0], "maes/"), orient="index",columns=["%maes_dimenet"])

In [None]:
path = "/home/azapala/quantum_mechanics_of_molecules/lightning_logs/feedforward_2_hidden_layers/version_0/checkpoints/feedforward-epoch42-val_loss0.00.ckpt"
logger = pl.loggers.tensorboard.TensorBoardLogger("./lightning_logs/", name='feedforward_2_hidden_layers', version='version_0')

datamodule = FlattenQM9Datamodule()
model = FeedforwadModule.load_from_checkpoint(path)
trainer = pl.Trainer(gpus=[1], logger=logger)
res = trainer.test(model, datamodule)

ff = pd.DataFrame.from_dict(get_dict_with_prefix(res[0], "maes/"), orient="index",columns=["%maes_ff"])

In [13]:
df_res = pd.concat([dimenet, schnet, medians.set_index('target'), ff], axis=1)
df_res

Unnamed: 0,%maes_dimenet,%maes_shcnet,always_median_model,%maes_ff
mu,0.07011,0.063166,0.771168,0.449393
alpha,0.029109,0.03083,0.76908,0.107129
homo,0.089074,0.095565,0.73481,0.347778
lumo,0.054858,0.054222,0.824307,0.2015
gap,0.061586,0.067103,0.835977,0.237089
r2,0.022543,0.016228,0.710194,0.153225
ZPVE,0.009634,0.011914,0.795175,0.023927
U0,0.011863,0.012979,0.786488,0.037447
U,0.012118,0.011558,0.78664,0.037729
H,0.012576,0.01122,0.786702,0.037535


In [16]:
df_res.mean().to_frame("std_mea")

Unnamed: 0,std_mea
%maes_dimenet,0.034321
%maes_shcnet,0.034562
always_median_model,0.78125
%maes_ff,0.149537
