### LogP dataset:

In [None]:
import torch.optim

from dglchem.datasets import LogP
data = LogP(log=True) 

### Example outputs:

In [None]:
data.draw_smile(10)

### Analysis:

In [None]:
from dglchem.utils import DataSet
from dglchem.utils import compound_nums_chart

data = DataSet(file_path='/Users/faerte/Desktop/dgl_chem/notebooks/data/processed/BradleyDoublePlus.pickle')
compound_nums_chart(data.smiles, fig_size=(10,4))

In [None]:
from dglchem.utils import DataSet
data = DataSet(file_path='/Users/faerte/Desktop/dgl_chem/notebooks/data/processed/BradleyDoublePlus.pickle')
data.weight_vs_target_plot(pre_standardization=True)

In [None]:
%matplotlib inline

from dglchem.utils import DataSet
data = DataSet(file_path='/Users/faerte/Desktop/dgl_chem/notebooks/data/processed/BradleyDoublePlus.pickle')
data.weight_vs_target_plot(fig_height=3, save_fig=True)

In [None]:
%matplotlib inline

from dglchem.utils import DataSet
data = DataSet(file_path='/Users/faerte/Desktop/dgl_chem/notebooks/data/processed/BradleyDoublePlus.pickle')

results, figures = data.analysis(download=True, plots=['atom_type_frequency','total_num_h_frequency'], fig_size=[8,4])

### Butina clustering:

In [None]:
from dglchem.datasets import BradleyDoublePlus
from dglchem.utils.data_splitting import taylor_butina_clustering
data = BradleyDoublePlus()

clusters = taylor_butina_clustering(data, threshold=0.5)

## GNN Model:

In [None]:
from dglchem.utils import DataSet

data = DataSet(file_path='/Users/faerte/Desktop/dgl_chem/notebooks/data/processed/BradleyDoublePlus.pickle')

train, test, val = data.get_splits()
print(train[1:10])

In [None]:
print(f'Node feature dimension: {data.num_node_features}')
print(f'Edge feature dimension: {data.num_edge_features}')

### Model definition:

In [None]:
from dglchem.utils import DataSet
from dglchem.models import MGConv
import torch

model = MGConv(num_layers=5, edge_hidden_dim=64, node_hidden_dim=64, 
               node_dim=data.num_node_features, edge_dim=data.num_edge_features)
print(model)

device = torch.device('cpu')

### Loss and Optimizer

In [None]:
from torch import nn

loss_func = nn.functional.mse_loss
optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=5e-4)

### Training

In [None]:
from dglchem.utils import train_model

train_loss, val_loss = train_model(model = model,
                                   loss_func = loss_func,
                                   optimizer = optimizer,
                                   train_data_loader= train,
                                   val_data_loader = val,
                                   epochs=100,
                                   early_stopping=True,
                                   patience=3)

### Loss plot

In [None]:
from dglchem.utils import loss_plot
loss_plot([train_loss, val_loss], ['train loss', 'test loss'])

### Testing

In [None]:
from dglchem.utils import test_model
test_model(model = model,
           test_data_loader= test,
           loss_func= loss_func)