# Quick Success: Materials Search with Deep Learning

The goal of this quick success is simple, yet an actual implementation may take some time. We are going to write an Artificial Neural Network to predict the materials property. As a basic library for design the network we will use Torch which is the most convenient neural network environment when the work involves defining new layers.

The list of files in your current directory should be:

- This notebook
- quicksuccess_mining.ipynb
- quicksuccess_modules.ipynb
- quicksuccess_net.ipynb
- qucksuccess_train.ipynb

Main libraries:

In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import time
import torch
from torch.utils.data.sampler import SubsetRandomSampler
from IPython import display

In [10]:
# (re-)load modules
%run quicksuccess_mining.ipynb
%run quicksuccess_modules.ipynb
%run quicksuccess_net.ipynb
%run quicksuccess_train.ipynb

## Data mining

In [3]:
dataset = CIFData(root_dir = "Dataset",
                  max_num_nbr = 12,
                  radius=8,
                  dmin=0,
                  step=0.2)

In [5]:
orig_atom_fea_len = dataset[0][0][0].shape[-1]
nbr_fea_len = dataset[0][0][1].shape[-1]
print("Number of structures: {}".format(len(dataset)))
print("Number of features describing one atom: {}".format(orig_atom_fea_len))
print("Number of features describing neighbours: {}".format(nbr_fea_len))

Number of structures: 291
Number of features describing one atom: 92
Number of features describing neighbours: 41


In [7]:
indices = list(range(len(dataset)))
train_val_ratio = 0.8
train_sampler = SubsetRandomSampler(indices[:int(train_val_ratio*len(dataset))])
val_sampler = SubsetRandomSampler(indices[int(train_val_ratio*len(dataset)):])
train_loader = DataLoader(dataset, batch_size=64,
                          sampler=train_sampler,
                          collate_fn=collate_pool)
val_loader = DataLoader(dataset, batch_size=64,
                        sampler=val_sampler,
                        collate_fn=collate_pool)

## Creating model

In [8]:
model = CrystalGraphConvNet(orig_atom_fea_len, nbr_fea_len)
criterion = torch.nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=0)
normalizer = Normalizer(torch.zeros(1))
scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[100], gamma=0.1)
model

CrystalGraphConvNet(
  (embedding): Linear(in_features=92, out_features=64, bias=True)
  (convs): ModuleList(
    (0): ConvLayer(
      (fc_full): Linear(in_features=169, out_features=128, bias=True)
      (sigmoid): Sigmoid()
      (softplus1): Softplus(beta=1, threshold=20)
      (bn1): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (bn2): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (softplus2): Softplus(beta=1, threshold=20)
    )
    (1): ConvLayer(
      (fc_full): Linear(in_features=169, out_features=128, bias=True)
      (sigmoid): Sigmoid()
      (softplus1): Softplus(beta=1, threshold=20)
      (bn1): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (bn2): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (softplus2): Softplus(beta=1, threshold=20)
    )
    (2): ConvLayer(
      (fc_full): Linear(in_features=169, out_featu

In [None]:
epochs = 2
loss_save = []

for epoch in range(epochs):
        # train for one epoch
        train(train_loader, model, criterion, optimizer, epoch, normalizer)

        # evaluate on validation set
        mae_error = validate(val_loader, model, criterion, normalizer)

        if mae_error != mae_error:
            print('Exit due to NaN')
            sys.exit(1)

        scheduler.step()

        # remember the best mae_eror and save checkpoint
        is_best = mae_error < best_mae_error
        best_mae_error = min(mae_error, best_mae_error)
        save_checkpoint({
            'epoch': epoch + 1,
            'state_dict': model.state_dict(),
            'best_mae_error': best_mae_error,
            'optimizer': optimizer.state_dict(),
            'normalizer': normalizer.state_dict(),
        }, is_best)

In [None]:
plt.plot(np.arange(len(loss_save)), loss_save)
plt.show()