# Check our data

In [3]:
import pandas as pd
import numpy as np
from pymatgen import Element

In [4]:
energy_data = pd.read_csv('mat_energy.csv')
our_atoms = []
for i in range(len(energy_data)):
    tmp = energy_data['material'][i].split('/')[0]
    our_atoms.append(tmp.split('3')[0])
    our_atoms.append(tmp.split('3')[1])
our_atoms = list(set(our_atoms))
print (our_atoms)

['Ru', 'Al', 'Mg', 'Be', 'Rh', 'Pd', 'Cd', 'V', 'Au', 'Ag', 'Mo', 'Os', 'Hf', 'Ta', 'Ni', 'Fe', 'Tl', 'Co', 'Pb', 'Mn', 'W', 'Ti', 'Cu', 'Cr', 'Ir', 'Pt', 'Nb', 'Ba', 'Zn', 'Re', 'Na', 'Lu', 'Y', 'Li', 'Zr', 'Rb', 'Sc', 'Tc', 'K']


In [5]:
our_atoms_no = []
for i in range(len(our_atoms)):
    our_atoms_no.append(Element(our_atoms[i]).data['Atomic no'])
#print (our_atoms_no)
print (sorted(our_atoms_no))

[3, 4, 11, 12, 13, 19, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 37, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 56, 71, 72, 73, 74, 75, 76, 77, 78, 79, 81, 82]


In [8]:
Element(our_atoms[0]).data

{'Atomic mass': 101.07,
 'Atomic no': 44,
 'Atomic orbitals': {'1s': -782.918621,
  '2p': -102.333649,
  '2s': -110.536054,
  '3d': -10.195668,
  '3p': -16.145217,
  '3s': -19.366692,
  '4d': -0.210375,
  '4p': -1.667549,
  '4s': -2.628363,
  '5s': -0.152834},
 'Atomic radius': 1.3,
 'Atomic radius calculated': 1.78,
 'Boiling point': '4423 K',
 'Brinell hardness': '2160 MN m<sup>-2</sup>',
 'Bulk modulus': '220 GPa',
 'Coefficient of linear thermal expansion': '6.4 x10<sup>-6</sup>K<sup>-1</sup>',
 'Common oxidation states': [3, 4],
 'Critical temperature': 'no data K',
 'Density of solid': '12370 kg m<sup>-3</sup>',
 'Electrical resistivity': '7.1 10<sup>-8</sup> &Omega; m',
 'Electronic structure': '[Kr].4d<sup>7</sup>.5s<sup>1</sup>',
 'ICSD oxidation states': [2, 3, 4, 5, 6],
 'Ionic radii': {'3': 0.82, '4': 0.76, '5': 0.705, '7': 0.52, '8': 0.5},
 'Liquid range': '1816 K',
 'Melting point': '2607 K',
 'Mendeleev no': 62,
 'Mineral hardness': '6.5',
 'Molar volume': '8.17 cm<sup>3

# Material to Vector (Make data to predict adsorption energy)

In [9]:
class1 = {'fcc': 0, 'hcp': 1}
class2 = {'homo': 0, 'hetero': 1}

In [10]:
def make_dataset(file_name):
    atom2vec = pd.read_csv(file_name, index_col=0).transpose()
    dataset_str1 = []
    dataset_str2 = []
    dataset_str3 = []
    dataset_str4 = []

    for i in range(len(energy_data)):
        tmp = energy_data.iloc[i]['material'].split('/')
        atom1 = list(atom2vec[tmp[0].split('3')[0]])
        atom2 = list(atom2vec[tmp[0].split('3')[1]])
        fcc_or_hcp = class1[tmp[1]]
        homo_or_hetero = class2[tmp[2]]

        input_vec = np.float32(atom1+atom2)
        output_energy = np.float32(energy_data.iloc[i]['energy'])

        if fcc_or_hcp == 0 and homo_or_hetero == 0:
            dataset_str1.append([input_vec, output_energy])
        elif fcc_or_hcp == 0 and homo_or_hetero == 1:
            dataset_str2.append([input_vec, output_energy])
        elif fcc_or_hcp == 1 and homo_or_hetero == 0:
            dataset_str3.append([input_vec, output_energy])
        else:
            dataset_str4.append([input_vec, output_energy])
    
    return dataset_str1, dataset_str2, dataset_str3, dataset_str4

In [11]:
dataset_str1, dataset_str2, dataset_str3, dataset_str4 = make_dataset('atom2vec_AE20.csv')

# Make train/valid/test dataset

In [9]:
import torch
import torch.nn.functional as F

from torch import nn
from torch.utils.data.dataset import random_split

In [10]:
torch.backends.cudnn.benchmark = True
torch.backends.cudnn.enabled = True

# gpu_option
gpu_use = 1
which_gpu = 0

In [11]:
batch_size = 100

In [12]:
def make_loader(dataset):
    torch.manual_seed(0)
    train, valid, test = random_split(dataset, [int(len(dataset)*0.8), int(len(dataset)*0.1), int(len(dataset)*0.1)])
    
    # Data Loader (Input Pipeline)
    train_loader = torch.utils.data.DataLoader(dataset=train, batch_size=batch_size, shuffle=True)
    valid_loader = torch.utils.data.DataLoader(dataset=valid, batch_size=batch_size, shuffle=True)
    test_loader = torch.utils.data.DataLoader(dataset=test, batch_size=batch_size, shuffle=False)
    
    return train_loader, valid_loader, test_loader

In [13]:
train_loader1, valid_loader1, test_loader1 = make_loader(dataset_str1)
train_loader2, valid_loader2, test_loader2 = make_loader(dataset_str2)
train_loader3, valid_loader3, test_loader3 = make_loader(dataset_str3)
train_loader4, valid_loader4, test_loader4 = make_loader(dataset_str4)

# Model

In [14]:
import time

In [15]:
class pred_net(nn.Module):
    def __init__(self):
        super(pred_net, self).__init__()
        self.hidden = nn.Linear(64, 10)
        self.out = nn.Linear(10, 1)

    def forward(self, x):
        x = F.relu(self.hidden(x))
        out = self.out(x)
        out = out.view(-1)
        return out

In [16]:
num_epochs = 50000+1
learning_rate = 1e-4
criterion = nn.MSELoss()

In [23]:
def fit(model,train_loader,valid_loader,criterion,learning_rate,num_epochs):
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, betas=(0.9, 0.999), weight_decay=1e-6)
    
    for epoch in range(num_epochs):
        model.train()
        acc = []
        train_error = 0
        
        for i, data in enumerate(train_loader):
            atom2vec = data[0].type(torch.FloatTensor).cuda(which_gpu)
            real_energy = data[1].type(torch.FloatTensor).cuda(which_gpu)
            
            optimizer.zero_grad()
            pred_energy = model(atom2vec)
            
            loss = criterion(pred_energy, real_energy)
            loss.backward()
            optimizer.step()

            train_error += loss.item()
       
        if epoch % 10000 == 0:
            avg_train_error = train_error/len(train_loader)
            print ("epoch : [%d/%d], The average loss of train error %.4f" % (epoch+1, num_epochs, avg_train_error))
            eval_loss, output_all, label_all = eval(model, valid_loader, criterion)
        
def eval(model,valid_loader,criterion):

    eval_loss = 0.0
    output_all = []
    label_all = []

    model.eval()
    for i, data in enumerate(valid_loader):
        atom2vec = data[0].type(torch.FloatTensor).cuda(which_gpu)
        real_energy = data[1].type(torch.FloatTensor).cuda(which_gpu)

        pred_energy = model(atom2vec)
        loss = criterion(pred_energy, real_energy)
        eval_loss += loss.item()

        output_all.append(pred_energy.data.cpu().numpy())
        label_all.append(real_energy.data.cpu().numpy())

    avg_loss = eval_loss/len(valid_loader)
    print ('The average loss of valid error: {:.4f} \n'. format(avg_loss))

    return avg_loss, output_all, label_all

In [24]:
# load model
model1 = pred_net().cuda(which_gpu)
# run
start_time = time.time()
fit(model1,train_loader1,valid_loader1,criterion,learning_rate,num_epochs)
print("--- %s seconds ---" % (time.time() - start_time))

# evaluation
avg_loss, pred_all, real_all = eval(model1,test_loader1,criterion)

epoch : [1/50001], The average loss of train error 1.6584
The average loss of valid error: 2.2451 

epoch : [10001/50001], The average loss of train error 0.2204
The average loss of valid error: 0.3303 

epoch : [20001/50001], The average loss of train error 0.2511
The average loss of valid error: 0.3224 

epoch : [30001/50001], The average loss of train error 0.1676
The average loss of valid error: 0.3362 

epoch : [40001/50001], The average loss of train error 0.1320
The average loss of valid error: 0.3994 

epoch : [50001/50001], The average loss of train error 0.1267
The average loss of valid error: 0.4648 

--- 304.7345356941223 seconds ---
The average loss of valid error: 0.5773 



In [25]:
pred = list(pred_all[0])
real = list(real_all[0])

idx = 5
print (pred[idx], real[idx])

2.2146153 1.6735339


---

In [26]:
# load model
model2 = pred_net().cuda(which_gpu)
# run
start_time = time.time()
fit(model2,train_loader2,valid_loader2,criterion,learning_rate,num_epochs)
print("--- %s seconds ---" % (time.time() - start_time))

# evaluation
avg_loss, pred_all, real_all = eval(model2,test_loader2,criterion)

epoch : [1/50001], The average loss of train error 1.8616
The average loss of valid error: 1.7083 

epoch : [10001/50001], The average loss of train error 0.1658
The average loss of valid error: 0.2958 

epoch : [20001/50001], The average loss of train error 0.1228
The average loss of valid error: 0.2903 

epoch : [30001/50001], The average loss of train error 0.1769
The average loss of valid error: 0.3075 

epoch : [40001/50001], The average loss of train error 0.1043
The average loss of valid error: 0.3112 

epoch : [50001/50001], The average loss of train error 0.1219
The average loss of valid error: 0.3313 

--- 311.1681635379791 seconds ---
The average loss of valid error: 0.2306 



In [27]:
pred = list(pred_all[0])
real = list(real_all[0])

idx = 0
print (pred[idx], real[idx])

2.702235 2.80992


---

In [28]:
# load model
model3 = pred_net().cuda(which_gpu)
# run
start_time = time.time()
fit(model3,train_loader3,valid_loader3,criterion,learning_rate,num_epochs)
print("--- %s seconds ---" % (time.time() - start_time))

# evaluation
avg_loss, pred_all, real_all = eval(model3,test_loader3,criterion)

epoch : [1/50001], The average loss of train error 3.2894
The average loss of valid error: 3.8498 

epoch : [10001/50001], The average loss of train error 0.1964
The average loss of valid error: 0.5860 

epoch : [20001/50001], The average loss of train error 0.1539
The average loss of valid error: 0.6685 

epoch : [30001/50001], The average loss of train error 0.1348
The average loss of valid error: 0.7260 

epoch : [40001/50001], The average loss of train error 0.1239
The average loss of valid error: 0.7640 

epoch : [50001/50001], The average loss of train error 0.1542
The average loss of valid error: 0.8499 

--- 309.71791529655457 seconds ---
The average loss of valid error: 0.7631 



In [29]:
pred = list(pred_all[0])
real = list(real_all[0])

idx = 0
print (pred[idx], real[idx])

2.7012196 2.9374459


---

In [30]:
# load model
model4 = pred_net().cuda(which_gpu)
# run
start_time = time.time()
fit(model4,train_loader4,valid_loader4,criterion,learning_rate,num_epochs)
print("--- %s seconds ---" % (time.time() - start_time))

# evaluation
avg_loss, pred_all, real_all = eval(model4,test_loader4,criterion)

epoch : [1/50001], The average loss of train error 2.3100
The average loss of valid error: 2.0714 

epoch : [10001/50001], The average loss of train error 0.1654
The average loss of valid error: 0.1900 

epoch : [20001/50001], The average loss of train error 0.1393
The average loss of valid error: 0.1483 

epoch : [30001/50001], The average loss of train error 0.1064
The average loss of valid error: 0.1596 

epoch : [40001/50001], The average loss of train error 0.0969
The average loss of valid error: 0.1665 

epoch : [50001/50001], The average loss of train error 0.1048
The average loss of valid error: 0.1812 

--- 309.3552613258362 seconds ---
The average loss of valid error: 0.3946 



In [31]:
pred = list(pred_all[0])
real = list(real_all[0])

idx = 0
print (pred[idx], real[idx])

2.9658234 3.03737
