## CONTINUOUS_EMBEDDINGS

In [1]:
import os
import pickle as pkl
import sys
import torch
import torch.nn.functional as F
import pandas as pd

sys.path.append(os.path.expanduser('../../ocpmodels/datasets/embeddings'))

In [2]:
def isNaN(num):
    """
    check if NuN
    """
    return num!= num


def convert(group):
    """
    conbert atom Mendeleev long table groups number to short.
    arg: scalar number in range [1, 18]
    """
    if not isNaN(group):
        if group > 7:
            group = 8
        elif group > 10:
            group -= 10

    return group


def fenc(cemb_v, prop_index_k, enc='enc', num_classes=0):
    """
    convert value of atom_prop from list of values in cont embeddings
    to torch.tensor(-1) with F.one_hot encoding if specified
    
    args:: 
    cemb_v : list of values,
    prop_index_k: index of property,
    enc: use F.one_hot only if 'enc'
    num_classes: number of classes for F.one_hot
    
    """
    
    vec = torch.tensor(cemb_v[prop_index_k])
    if enc=='enc':
        if isNaN(vec):
            res = torch.zeros(num_classes)
        else:
            res = F.one_hot(vec.long(), num_classes=num_classes)
    else:
            res = vec

    return res.reshape(-1)

In [3]:
from continuous_embeddings import CONTINUOUS_EMBEDDINGS as cemb
for cemb_v in cemb.values():
    cemb_v.append(convert(cemb_v[0]))

prop_index = {
    0: ['group_l',  'enc', 19],
    1: ['period', 'enc', 8],
    7: ['block',  'enc', 5],
    2: ['electroneg', 'float', 0], 
    3: ['radius', 'float', 0],
    4: ['valence', 'float', 0], 
    5: ['ionization', 'float', 0],  
    6: ['affinity', 'float', 0],
    8: ['volume', 'float', 0],
    9: ['group_s', 'enc', 9]
}      

cemb_v_ts_all = torch.tensor([])

for cemb_k, cemb_v in cemb.items():
    
    cemb_v_ts = torch.Tensor()
    
    for prop_index_k, prop_index_v in prop_index.items():
        
        emb = fenc(cemb_v, prop_index_k, prop_index_v[1], prop_index_v[2])
        cemb_v_ts = torch.cat((cemb_v_ts, emb), dim=0)
        
    cemb_v_ts_all = torch.cat((cemb_v_ts_all, cemb_v_ts.reshape(1,-1)), dim=0)

print(cemb_v_ts_all.size())

torch.Size([101, 47])


In [4]:
with open('custom_embedding_v1.pickle', 'wb') as f:
    f.write(pkl.dumps(cemb_v_ts_all))

In [5]:
from continuous_embeddings import CONTINUOUS_EMBEDDINGS as cemb
df = pd.DataFrame.from_dict(cemb, orient='index', columns=[v[0] for v in prop_index.values()])

In [6]:
df

Unnamed: 0,group_l,period,block,electroneg,radius,valence,ionization,affinity,volume,group_s
0,,,,,,,,,,
1,1.0,1.0,2.187771,31.0,1.0,13.598434,0.754195,1.0,14.100000,1.0
2,18.0,1.0,1.000000,28.0,2.0,24.587387,-19.700001,1.0,31.799999,8.0
3,1.0,2.0,0.048868,128.0,1.0,5.391715,0.618049,1.0,13.100000,1.0
4,2.0,2.0,0.126847,96.0,2.0,9.322699,-2.400000,1.0,5.000000,2.0
...,...,...,...,...,...,...,...,...,...,...
96,10.0,7.0,0.690009,169.0,2.0,5.991400,-3.000000,4.0,18.280001,8.0
97,,,,,,,,,,
98,,,,,,,,,,
99,,,,,,,,,,


In [9]:
at_prop = pd.read_csv('atomic_properties.csv', header=0)

at_prop