In [None]:
# import library ที่จำเป็น
import pandas as pd
import numpy as np
import os
from pymatgen.io.xyz import XYZ
from pymatgen.core import Element


In [None]:
# ใช้ pymatgen อ่านไฟล์ .xyz ของ linkers และเตรียมจำนวนอะตอมในแต่ละ functional groups

fg_str = """Name	C	O	H	V	N	P	Ba	Cr	Zn	Cu	I	S	F	Br	Cl	Ni
F	0	0	0	0	0	0	0	0	0	0	0	0	1	0	0	0
Cl	0	0	0	0	0	0	0	0	0	0	0	0	0	0	1	0
Br	0	0	0	0	0	0	0	0	0	0	0	0	0	1	0	0
I	0	0	0	0	0	0	0	0	0	0	1	0	0	0	0	0
Me	1	0	3	0	0	0	0	0	0	0	0	0	0	0	0	0
Et	2	0	5	0	0	0	0	0	0	0	0	0	0	0	0	0
Pr	3	0	7	0	0	0	0	0	0	0	0	0	0	0	0	0
HCO	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0
COOH	1	2	1	0	0	0	0	0	0	0	0	0	0	0	0	0
OH	0	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0
OMe	1	1	3	0	0	0	0	0	0	0	0	0	0	0	0	0
OEt	2	1	5	0	0	0	0	0	0	0	0	0	0	0	0	0
OPr	3	1	7	0	0	0	0	0	0	0	0	0	0	0	0	0
NH2	0	0	2	0	1	0	0	0	0	0	0	0	0	0	0	0
CN	1	0	0	0	1	0	0	0	0	0	0	0	0	0	0	0
NHMe	1	0	4	0	1	0	0	0	0	0	0	0	0	0	0	0
NO2	0	2	0	0	1	0	0	0	0	0	0	0	0	0	0	0
Ph	6	0	5	0	0	0	0	0	0	0	0	0	0	0	0	0
SO3H	0	3	1	0	0	0	0	0	0	0	0	1	0	0	0	0
H	0	0	1	0	0	0	0	0	0	0	0	0	0	0	0	0"""

fg = {' ':{}}

atom_labels = ['C', 'O', 'H', 'V', 'N', 'P', 'Ba', 'Cr', 'Zn', 'Cu', 'I', 'S', 'F', 'Br', 'Cl', 'Ni']
total = {a:0 for a in atom_labels if a not in ['V', 'P', 'Ba', 'Cr', 'Zn', 'Cu', 'Ni']}

for i in fg_str.split('\n')[1:]:
    i = i.split('\t')
    fg[i[0]] = {}
    for j, a in enumerate(atom_labels):
        if a not in ['V', 'P', 'Ba', 'Cr', 'Zn', 'Cu', 'Ni']:
            fg[i[0]][a] = int(i[j+1])
            total[a] += int(i[j+1])
            
    fg[i[0]] = {a:fg[i[0]][a] for a in fg[i[0]] if fg[i[0]][a] > 0}


atom_labels = {'Cu', 'V', 'Zn', 'O', 'Ni', 'Cr', 'N', 'H', 'C', 'Ba'}
metal_linker = {}
for s in os.listdir('metal_linker/'):
    i = int(s.split('_')[-1].replace('.xyz', ''))
    a = XYZ.from_file('metal_linker/'+s)
    el = [str(Element.from_Z(j)) for j in a.molecule.atomic_numbers]
    metal_linker[i] = {
        'nelec':a.molecule.nelectrons, 
        'natom':len(el),
        'metal':[i for i in set(el) if Element(i).is_metal][0],
        'el':{}
    }
    for b in atom_labels:
        metal_linker[i]['el'][b] = el.count(b)


atom_labels = {'N', 'H', 'C', 'O'}
organic_linker = {}
for s in os.listdir('organic_linker/'):
    i = int(s.split('_')[-1].replace('.xyz', ''))      
    a = XYZ.from_file('organic_linker/'+s)
    el = [str(Element.from_Z(j)) for j in a.molecule.atomic_numbers]
    organic_linker[i] = {
        'nelec':a.molecule.nelectrons, 
        'natom':len(el),
        'el':{}
    }
    for b in atom_labels:
        organic_linker[i]['el'][b] = el.count(b)

# สัดส่วน node, edges ของ linkers ใน topology ต่างๆ เกิดจากการนับ
topo = {
    'acs':{'m':1., 'n':3.}, 
    'bcu':{'m':1., 'n':4.}, 
    'etb':{'m':1., 'n':2.}, 
    'fof':{'m':1., 'n':1.}, 
    'nbo':{'m':1., 'n':2.}, 
    'pcu':{'m':1., 'n':3.}, 
    'pts':{'m':1., 'n':1.}, 
    'rht':{'m':3., 'n':2.}, 
    'sra':{'m':1., 'n':2.}, 
    'tbo':{'m':3., 'n':4.}, 
    'the':{'m':3., 'n':8.}
}

In [None]:
# ฟังก์ชั่นสำหรับ estimate จำนวน organic linkers, functional groups และ metal linkers
# มี 2 version คือ 
# 1 เวอร์ชั่นแบบไม่ล็อกค่า organic linkers
# 2 เวอร์ชั่นแบบล็อกค่า organic linkers โดย ใช้สัดส่วน nodes, edges ในแต่ละ topology ซึ่งมีความสัมพันธ์คือ จำนวน organic linkers รวม = n*(จำนวน metal linker) / m
# จำนวน metal linker หาได้โดยนำ metal element ใน metal linkers มาหาร ส่วน n, m มาจากสัดส่วนในแต่ละ topology
# หลักการคือ จะ fill จำนวนอะตอมเพิ่มไปเรื่อยๆ และนำจำนวนอะตอมรวมไปลบกับ จำนวนอะตอมใน structure แล้วมาคูณด้วย weight แยกตามประเภทอะตอม
# จะเลือก composition ที่มี weight น้อยที่สุด
# หากสามารถคำนวณหา functional groups ได้ก็จำหาก่อน เนื่องจากมีตัวที่สามารถหาได้โดยตรงเช่น ธาตุใน กลุ่ม Halides และ S 

def fn1IsNotNone_fn2IsNotNone_Lmax(M, fn1, fn2, FG_MAX, Lmax, cal_err):
    
    err_min = 1e6
    candidate = None
    
    for i1 in range(1, Lmax):
        
        err_prev2 = 1e6
        
        for i2 in range(1, Lmax):

            if i1+i2 ==Lmax: 

                err = cal_err(i1, i2, fn1, fn2)               
                derr = np.abs(err-err_min) 

                if err < err_min:
                    err_min = err
                    candidate = [M, i1, i2, fn1, fn2, err]
                elif derr < err_prev2:
                    err_prev2 = derr 
                else:
                    break
                
                break

    return candidate
        

def fn1IsNotNone_fn2IsNone_Lmax(M, fn1, fn2, FG_MAX, Lmax, cal_err):
    
    err_min = 1e6
    candidate = None

    for i1 in range(1, Lmax):

        for i2 in range(1, Lmax):

            if i1+i2 ==Lmax: 

                err_prev4 = 1e6                     
                for i4 in range(1, FG_MAX):                

                    err = cal_err(i1, i2, fn1, i4)                        
                    derr = np.abs(err-err_min) 

                    if err < err_min:
                        err_min = err
                        candidate = [M, i1, i2, fn1, i4, err]
                    elif derr < err_prev4:
                        err_prev4 = derr 
                    else:
                        break
                #end for i4
                
                break


    return candidate
    
    
def fn1IsNone_fn2IsNotNone_Lmax(M, fn1, fn2, FG_MAX, Lmax, cal_err):
    
    err_min = 1e6
    candidate = None

    for i1 in range(1, Lmax):

        for i2 in range(1, Lmax):

            if i1+i2 ==Lmax: 

                err_prev3 = 1e6                     
                for i3 in range(1, FG_MAX):                

                    err = cal_err(i1, i2, i3, fn2)       
                    derr = np.abs(err-err_min)

                    if err < err_min:
                        err_min = err
                        candidate = [M, i1, i2, i3, fn2, err]
                    elif derr < err_prev3:
                        err_prev3 = derr 
                    else:
                        break
                
                break

    return candidate


def fn1IsNone_fn2IsNone_Lmax(M, fn1, fn2, FG_MAX, Lmax, cal_err):
    
    err_min = 1e6
    candidate = None    
    
    for i1 in range(1, Lmax):

        for i2 in range(1, Lmax):

            if i1+i2==Lmax:
                   
                for i3 in range(1, FG_MAX):                    
                    err_prev4 = 1e6                     
                    for i4 in range(1, FG_MAX):                

                        err = cal_err(i1, i2, i3, i4)     
                        derr = np.abs(err-err_min)

                        if err < err_min:
                            err_min = err
                            candidate = [M, i1, i2, i3, i4, err]
                        elif derr < err_prev4:
                            err_prev4 = derr 
                        else:
                            break
                    #end for i4            

                #end for i3
                
                    
                break
                

    return candidate

#--------------------


def fn1IsNotNone_fn2IsNotNone(M, fn1, fn2, FG_MAX, Lmax, cal_err):
    
    err_min = 1e6
    candidate = None

    for i1 in range(1, FG_MAX):
        err_prev2 = 1e6
        for i2 in range(1, FG_MAX):

            err = cal_err(i1, i2, fn1, fn2)               
            derr = np.abs(err-err_min) 

            if err < err_min:
                err_min = err
                candidate = [M, i1, i2, fn1, fn2, err]
            elif derr < err_prev2:
                err_prev2 = derr 
            else:
                break  

        if i2 < 3 and i2 < FG_MAX - 1:
            break

    return candidate
        

def fn1IsNotNone_fn2IsNone(M, fn1, fn2, FG_MAX, Lmax, cal_err):
    
    err_min = 1e6
    candidate = None

    for i1 in range(1, FG_MAX):

        for i2 in range(1, FG_MAX):

            err_prev4 = 1e6                     
            for i4 in range(1, FG_MAX):                

                err = cal_err(i1, i2, fn1, i4)                        
                derr = np.abs(err-err_min) 

                if err < err_min:
                    err_min = err
                    candidate = [M, i1, i2, fn1, i4, err]
                elif derr < err_prev4:
                    err_prev4 = derr 
                else:
                    break
                    
            #end for i4

            if i4 < 3 and i4 < FG_MAX - 1:
                break

        if i2 < 3 and i2 < FG_MAX - 1:
            break

    return candidate
    
    
def fn1IsNone_fn2IsNotNone(M, fn1, fn2, FG_MAX, Lmax, cal_err):
    
    err_min = 1e6
    candidate = None
    
    for i1 in range(1, FG_MAX):

        for i2 in range(1, Lmax):

            err_prev3 = 1e6                     
            for i3 in range(1, FG_MAX):                

                err = cal_err(i1, i2, i3, fn2)       
                derr = np.abs(err-err_min)

                if err < err_min:
                    err_min = err
                    candidate = [M, i1, i2, i3, fn2, err]
                elif derr < err_prev3:
                    err_prev3 = derr 
                else:
                    break

            if i3 < 3 and i3 < FG_MAX - 1:
                break

        if i2 < 3 and i2 < FG_MAX - 1:
            break

    return candidate


def fn1IsNone_fn2IsNone(M, fn1, fn2, FG_MAX, Lmax, cal_err):
    
    err_min = 1e6
    candidate = None
    
    for i1 in range(1, FG_MAX):

        for i2 in range(1, FG_MAX):
                  
            for i3 in range(1, FG_MAX):                    
                err_prev4 = 1e6                     
                for i4 in range(1, FG_MAX):                

                    err = cal_err(i1, i2, i3, i4)     
                    derr = np.abs(err-err_min)

                    if err < err_min:
                        err_min = err
                        candidate = [M, i1, i2, i3, i4, err]
                    elif derr < err_prev4:
                        err_prev4 = derr 
                    else:
                        break
                
                #end for i4

                if i4 < 3 and i4 < FG_MAX - 1:
                    break                    

            #end for i3
            
            if i3 < 3 and i3 < FG_MAX - 1:
                break                        

        if i2 < 3 and i2 < FG_MAX - 1:
            break

    return candidate


def getNumOLFG(el_cif, linker1, linker2, linker3, f1, f2, m, n):
    el_ml = {a:metal_linker[linker1]['el'][a] for a in metal_linker[linker1]['el'] if metal_linker[linker1]['el'][a] > 0 }
    el_ol1 = {a:organic_linker[linker2]['el'][a] for a in organic_linker[linker2]['el'] if organic_linker[linker2]['el'][a] > 0 }
    el_ol2 = {a:organic_linker[linker3]['el'][a] for a in organic_linker[linker3]['el'] if organic_linker[linker3]['el'][a] > 0 }    
    el_fg1 = {a:fg[f1][a] for a in fg[f1] if fg[f1][a] > 0 }
    el_fg2 = {a:fg[f2][a] for a in fg[f2] if fg[f2][a] > 0 }
    
    M = el_cif[metal_linker[linker1]['metal']]/el_ml[metal_linker[linker1]['metal']]
    
    Lmax = int(M*n/m)
    
    fn1 = None
    fn2 = None
    
    if f1 == ' ':
        fn1 = 0
        fn2 = 0
    else:
        if f1 in ['F', 'Cl', 'Br', 'I']:
            fn1 = el_cif[f1]
        elif f1 == 'SO3H':
            fn1 = el_cif['S']
        
        if f2 == ' ':
            fn2 = 0
        else:
            if f2 in ['F', 'Cl', 'Br', 'I']:
                fn2 = el_cif[f2]
            elif f2 == 'SO3H':
                fn2 = el_cif['S']
    

    Natom = np.sum([el_cif[a] for a in el_cif]) - np.sum([M*el_ml[a] for a in el_ml])
    total_mass = np.sum([el_cif[a]*atom_mass[a] for a in el_cif])
    total_atom = np.sum([el_cif[a] for a in el_cif])
    
    # เตรียมโครงสร้างข้อมูลตาม ecif ไม่มีใส่ 0 จะได้ไม่ต้องเช็คทุกครั้ง
    
    el_ml = {a:(el_ml[a] if a in el_ml.keys() else 0) for a in el_cif}
    el_ol1 = {a:(el_ol1[a] if a in el_ol1.keys() else 0) for a in el_cif}
    el_ol2 = {a:(el_ol2[a] if a in el_ol2.keys() else 0) for a in el_cif}
    el_fg1 = {a:(el_fg1[a] if a in el_fg1.keys() else 0) for a in el_cif}
    el_fg2 = {a:(el_fg2[a] if a in el_fg2.keys() else 0) for a in el_cif}    
    
    def cal_err(c1, c2, c3, c4):
        return np.sum([np.abs(M*el_ml[a]+c1*el_ol1[a]+c2*el_ol2[a]+c3*el_fg1[a]+c4*el_fg2[a]-el_cif[a])*atom_mass[a] for a in el_cif])/total_mass


    Fgmax = int(total_atom-Natom)
    
    if fn1 is not None and fn2 is not None: #fn1 & fn2 มีค่า  
        n1 = [1, 1, 1, 1, 1, 1]
#         n1 = fn1IsNotNone_fn2IsNotNone(M, fn1, fn2, Fgmax, Fgmax, cal_err)      
        n2 = fn1IsNotNone_fn2IsNotNone_Lmax(M, fn1, fn2, Fgmax, Lmax, cal_err)
    elif fn1 is not None:#fn1 มีค่า
        n1 = [1, 1, 1, 1, 1, 1]
#         n1 = fn1IsNotNone_fn2IsNone(M, fn1, fn2, Fgmax, Fgmax, cal_err)        
        n2 = fn1IsNotNone_fn2IsNone_Lmax(M, fn1, fn2, Fgmax, Lmax, cal_err) 
    elif fn2 is not None:#fn2 มีค่า
        n1 = [1, 1, 1, 1, 1, 1]
#         n1 = fn1IsNone_fn2IsNotNone(M, fn1, fn2, Fgmax, Fgmax, cal_err)    
        n2 = fn1IsNone_fn2IsNotNone_Lmax(M, fn1, fn2, Fgmax, Lmax, cal_err)  
    else:#fn1 & fn2 ไม่มีค่า
        n1 = [1, 1, 1, 1, 1, 1]
#         n1 = fn1IsNone_fn2IsNone(M, fn1, fn2, Fgmax, Fgmax, cal_err)     
        n2 = fn1IsNone_fn2IsNone_Lmax(M, fn1, fn2, Fgmax, Lmax, cal_err)
    
    return (n1, n2)

In [None]:
# อ่าน features ที่ extract จาก cif จาก cif_to_npy.ipynb
structures = np.load('train_structures.npy', allow_pickle=True)

In [None]:
atom_labels = {'Br', 'F', 'Cr', 'Cu', 'N', 'H', 'Zn', 'O', 'P', 'V', 'S', 'Ba', 'C', 'I', 'Ni', 'Cl'}
atom_ol = {'N', 'H', 'C', 'O'}
atom_ml = {'Cu', 'V', 'Zn', 'O', 'Ni', 'Cr', 'N', 'H', 'C', 'Ba'}
atom_mass = {a:Element(a).atomic_mass for a in atom_labels}

# นิยามค่า maximum ต่างๆ เพื่อใช้ normalize
MAX_NUM_ATOMS = 2208
MAX_VOL = 85000
MAX_CELL_LENGTH = 61

halides = ['F', 'Cl', 'Br', 'I']
metals = ['Cu', 'Zn', 'V', 'Ni', 'Cr', 'Ba']
electronegative_atoms = ['O', 'N', 'F', 'Cl']

data_ = []

for i, mof in enumerate(structures):
    
    if len(data[data['MOFname']==mof['name']]) == 0:
        continue    
    
    cell = mof['cell']
    el = mof['elements']
    
    _data = data[data['MOFname']==mof['name']]
    
    linker1 = _data['metal_linker'].values[0]
    linker2 = _data['organic_linker1'].values[0]
    linker3 = _data['organic_linker2'].values[0]
    f1 = _data['functional_groups-1'].values[0]
    f2 = _data['functional_groups-2'].values[0]
    t = _data['topology'].values[0]
    m = topo[t]['m']
    n = topo[t]['n']
    
    el_cif = {}
    for a in el:
        el_cif[a] = el.count(a)
        
    #sc คือการคำนวณหาจำนวน 2**n เท่าของจำนวนอะตอมตัวต่ำสุด
    sc = 2.
    endloop = False
    while True:
        for e in el_cif:
            if el_cif[e] % sc > 0:
                endloop = True
                sc/=2
                break
        if endloop:
            break
        sc*=2   
            
    ([n_ml, n_ol1, n_ol2, n_fg1, n_fg2, err], [_n_ml, _n_ol1, _n_ol2, _n_fg1, _n_fg2, _err]) = getNumOLFG(el_cif, linker1, linker2, linker3, f1, f2, m, n)
    

    # นับจำนวนอะตอมต่างๆ โดย N คิดเป็นครึ่งหนึ่งของ C และธาตุกลุ่ม halides นับเป็น Hydrogen
    N_C = el.count('C') + 0.5*el.count('N')
    N_H = el.count('H') + np.sum([el.count(i) for i in halides])
    N_O = el.count('O')
    N_N = el.count('N')    
    N_metal = np.sum([el.count(i) for i in metals])
    N_elec = np.sum([el.count(i) for i in electronegative_atoms])
    W_elec = np.sum([el.count(i)*atom_mass[i] for i in electronegative_atoms])
    total_degree_unsat = ((2*N_C) + 2 - N_H)/2
    degree_unsat_per_c = total_degree_unsat/N_C
    
    for i, e in enumerate(el):
        
    
    for e in mof['coords']:
        

    d = {
        'MOFname':mof['name'],
        'NUM_ATOMS':len(el), 
        'a':cell[0]/MAX_CELL_LENGTH, 
        'b':cell[1]/MAX_CELL_LENGTH, 
        'c':cell[2]/MAX_CELL_LENGTH, 
        'abc':np.mean(cell[0:3])/MAX_CELL_LENGTH,
        'alpha':cell[3]/(np.pi), 
        'beta':cell[4]/(np.pi), 
        'gamma':cell[5]/(np.pi), 
        'alpha-beta-gamma':np.mean(cell[3:6])/np.pi,
        'DEG_UNSAT': total_degree_unsat, 
        'DEG_UNSAT_C':degree_unsat_per_c, 
        'METAL_C':N_metal/N_C,
        'O_METAL':2*N_O/N_metal,
        'ELEC_FRAC':N_elec/len(el),
        'W_ELEC':W_elec/len(el),
        'O_C':N_O/N_C,
        'N_C':N_N/N_C,
        'N_O':2*N_N/(N_N+N_O),
        'sc':sc,
        'n_ml':n_ml,
        'n_m':n/m,
        'n_ml_m_n':(n_ml)*m/n,        
        
#         'n_ol1':n_ol1,
#         'n_ol2':n_ol2,
#         'n_fg1':n_fg1,
#         'n_fg2':n_fg2,     
#         'err':err,
    
        '_n_ol1':_n_ol1,
        '_n_ol2':_n_ol2,
        '_n_fg1':_n_fg1,
        '_n_fg2':_n_fg2,       
        '_err':_err,
    
    }
    
    for a in atom_labels:
        d[a] = el.count(a)/len(el)

    if i%1000==0:
        print(i)
        
    data_.append(d)

structure_data = pd.DataFrame(data_4)
structure_data.to_csv('train_features_full.csv', index=False)
print('FINISH')

In [None]:
# อ่าน features ที่ extract จาก cif จาก cif_to_npy.ipynb
structures = np.load('test_structures.npy', allow_pickle=True)

In [None]:
#

atom_labels = {'Br', 'F', 'Cr', 'Cu', 'N', 'H', 'Zn', 'O', 'P', 'V', 'S', 'Ba', 'C', 'I', 'Ni', 'Cl'}
atom_ol = {'N', 'H', 'C', 'O'}
atom_ml = {'Cu', 'V', 'Zn', 'O', 'Ni', 'Cr', 'N', 'H', 'C', 'Ba'}
atom_mass = {a:Element(a).atomic_mass for a in atom_labels}

# นิยามค่า maximum ต่างๆ เพื่อใช้ normalize
MAX_NUM_ATOMS = 2208
MAX_VOL = 85000
MAX_CELL_LENGTH = 61

halides = ['F', 'Cl', 'Br', 'I']
metals = ['Cu', 'Zn', 'V', 'Ni', 'Cr', 'Ba']
electronegative_atoms = ['O', 'N', 'F', 'Cl']

data_ = []

for i, mof in enumerate(structures):
    
    if len(data[data['MOFname']==mof['name']]) == 0:
        continue    
    
    cell = mof['cell']
    el = mof['elements']
    
    _data = data[data['MOFname']==mof['name']]
    
    linker1 = _data['metal_linker'].values[0]
    linker2 = _data['organic_linker1'].values[0]
    linker3 = _data['organic_linker2'].values[0]
    f1 = _data['functional_groups-1'].values[0]
    f2 = _data['functional_groups-2'].values[0]
    t = _data['topology'].values[0]
    m = topo[t]['m']
    n = topo[t]['n']
    
    el_cif = {}
    for a in el:
        el_cif[a] = el.count(a)
        
    #sc คือการคำนวณหาจำนวน 2**n เท่าของจำนวนอะตอมตัวต่ำสุด
    sc = 2.
    endloop = False
    while True:
        for e in el_cif:
            if el_cif[e] % sc > 0:
                endloop = True
                sc/=2
                break
        if endloop:
            break
        sc*=2   
            
    ([n_ml, n_ol1, n_ol2, n_fg1, n_fg2, err], [_n_ml, _n_ol1, _n_ol2, _n_fg1, _n_fg2, _err]) = getNumOLFG(el_cif, linker1, linker2, linker3, f1, f2, m, n)
    
    # นับจำนวนอะตอมต่างๆ โดย N คิดเป็นครึ่งหนึ่งของ C และธาตุกลุ่ม halides นับเป็น Hydrogen
    N_C = el.count('C') + 0.5*el.count('N')
    N_H = el.count('H') + np.sum([el.count(i) for i in halides])
    N_O = el.count('O')
    N_N = el.count('N')    
    N_metal = np.sum([el.count(i) for i in metals])
    N_elec = np.sum([el.count(i) for i in electronegative_atoms])
    W_elec = np.sum([el.count(i)*atom_mass[i] for i in electronegative_atoms])
    total_degree_unsat = ((2*N_C) + 2 - N_H)/2
    degree_unsat_per_c = total_degree_unsat/N_C
    
    for i, e in enumerate(el):
        
    
    for e in mof['coords']:
        

    d = {
        'MOFname':mof['name'],
        'NUM_ATOMS':len(el), 
        'a':cell[0]/MAX_CELL_LENGTH, 
        'b':cell[1]/MAX_CELL_LENGTH, 
        'c':cell[2]/MAX_CELL_LENGTH, 
        'abc':np.mean(cell[0:3])/MAX_CELL_LENGTH,
        'alpha':cell[3]/(np.pi), 
        'beta':cell[4]/(np.pi), 
        'gamma':cell[5]/(np.pi), 
        'alpha-beta-gamma':np.mean(cell[3:6])/np.pi,
        'DEG_UNSAT': total_degree_unsat, 
        'DEG_UNSAT_C':degree_unsat_per_c, 
        'METAL_C':N_metal/N_C,
        'O_METAL':2*N_O/N_metal,
        'ELEC_FRAC':N_elec/len(el),
        'W_ELEC':W_elec/len(el),
        'O_C':N_O/N_C,
        'N_C':N_N/N_C,
        'N_O':2*N_N/(N_N+N_O),
        'sc':sc,
        'n_ml':n_ml,
        'n_m':n/m,
        'n_ml_m_n':(n_ml)*m/n,        
        
#         'n_ol1':n_ol1,
#         'n_ol2':n_ol2,
#         'n_fg1':n_fg1,
#         'n_fg2':n_fg2,     
#         'err':err,
        
        '_n_ol1':_n_ol1,
        '_n_ol2':_n_ol2,
        '_n_fg1':_n_fg1,
        '_n_fg2':_n_fg2,       
        '_err':_err,
    
    }
    
    for a in atom_labels:
        d[a] = el.count(a)/len(el)

    if i%1000==0:
        print(i)
        
    data_.append(d)

structure_data = pd.DataFrame(data_4)
structure_data.to_csv('test_features_full.csv', index=False)
print('FINISH')