In [22]:
from pymatgen.io.cif import CifParser
import numpy as np
import json
import os
import math

In [23]:
os.getcwd()

'C:\\Users\\GrzegorzKaszuba\\PycharmProjects\\cdvae-main\\panna_config'

In [24]:
def cif_to_json(cif_file):
    """
    gets a cif file as an input and converts the info 
    into a json file, which will be used later for 
    Parinnelo vector (gvector) calculation
    """
    # Read the CIF file using pymatgen's CIFParser
    cif_parser = CifParser(cif_file)
    structure = cif_parser.get_structures(primitive=False)[0]

    # Extract relevant information
    atoms_list = []
    for i, site in enumerate(structure):
        atom_id = i + 1
        atom_symbol = site.specie.symbol
        fract_x, fract_y, fract_z = site.frac_coords
        atoms_list.append([atom_id, atom_symbol, [fract_x, fract_y, fract_z], [0, 0, 0]])  # Assume forces as [0, 0, 0]

    # Create the JSON dictionary
    json_dict = {
        "atoms": atoms_list,
        "atomic_position_unit": "cartesian",
        "lattice_vectors": structure.lattice.matrix.tolist(),
        "energy": [0, "Ha"]
    }

    # Convert to JSON string
    json_str = json.dumps(json_dict, indent=4)

    # Write JSON to the output file with .example extension
    base_name = os.path.splitext(os.path.basename(cif_file))[0]
    output_file = f"{base_name}.example"
    with open(output_file, 'w') as f:
        f.write(json_str)

In [25]:
def modify_file(file_path, cif_file):
    """
    Modifies the config file (gvec.ini) for each 
    cif file.
    """
    # get the atom types
    cif_parser = CifParser(cif_file)
    structure = cif_parser.get_structures(primitive=False)[0]
    atom_types = sorted(set([site.specie.symbol for site in structure]))
    
    # Read the content of the file
    with open(file_path, 'r') as f:
        lines = f.readlines()

    # Find the line that starts with "species"
    for i, line in enumerate(lines):
        if line.strip().startswith("species"):
            # Modify the line with the new species list
            lines[i] = f"""species = {", ".join(atom_types)}\n"""
            break

    # Write the modified content back to the file
    with open(file_path, 'w') as f:
        f.writelines(lines)

In [30]:
# create the json files
cif_dir = "./cif"
for cif_file in os.listdir(cif_dir):
    if cif_file.endswith(".cif"):
        cif_file_path = os.path.join(cif_dir, cif_file)
        cif_to_json(cif_file_path)
    
        # create the corresponding config file
        modify_file("./gvec.ini", "./cif/"+cif_file)
        # genrate corresponding gvectors (files with .bin extention)
        !python panna/src/panna/gvect_calculator.py --config ./gvec.ini

INFO - 
    ____   _    _   _ _   _    _           
   |  _ \ / \  | \ | | \ | |  / \     
   | |_) / _ \ |  \| |  \| | / _ \     
   |  __/ ___ \| |\  | |\  |/ ___ \    
   |_| /_/   \_\_| \_|_| \_/_/   \_\ 

 Properties from Artificial Neural Network Architectures

INFO - PBC will be determined by lattice parameters in the json file for each example
INFO - Radial Gaussian centers are set by Rs0_rad, Rc_rad, RsN_rad
INFO - Angular descriptor centers are set by ThetasN
INFO - Radial-angular Gaussian centers are set by Rs0_ang, Rc_ang, RsN_ang
INFO - g type: mBP
INFO - g DOI: 10.1016/j.cpc.2020.107402
INFO - g size: 40
INFO - the computation has been restarted (a file named gvect_already_computed.dat has been found).Already computed files will not be recomputed.
INFO - computed keys 10/10
INFO - --start--
INFO - ----run----
INFO - 
    ____   _    _   _ _   _    _           
   |  _ \ / \  | \ | | \ | |  / \     
   | |_) / _ \ |  \| |  \| | / _ \     
   |  __/ ___ \| |\  | |\  |/ ___ 

In [33]:
def gvector (gvector):
    with open(gvector, "rb") as binary_file:
        bin_version = int.from_bytes(binary_file.read(4),
                                     byteorder='little',
                                    signed=False)
        if bin_version != 0:
            print("Version not supported!")
            exit(1)
        # converting to int to avoid handling little/big endian
        flags = int.from_bytes(binary_file.read(2),
                               byteorder='little',
                               signed=False)
        n_atoms = int.from_bytes(binary_file.read(4),
                                 byteorder='little',
                                 signed=False)
        g_size = int.from_bytes(binary_file.read(4),
                                byteorder='little',
                                signed=False)
        payload = binary_file.read()
        data = np.frombuffer(payload, dtype='<f4')
        en = data[0]
        gvect_size = n_atoms * g_size
        spec_tensor = np.reshape((data[1:1+n_atoms]).astype(np.int32),
                                 [1, n_atoms])
        gvect_tensor = np.reshape(data[1+n_atoms:1+n_atoms+gvect_size],
                    [n_atoms, g_size])
    return (gvect_tensor)

In [36]:
structure_1 = gvector("./Cr.bin")
structure_2 = gvector("./Fe42Cr58_8.bin")
structure_4 = gvector("./Fe42Cr58_0.bin")
structure_3 = gvector("./Ni19Fe11Cr70_4.bin")
structure_5 = gvector("./Zn_test0.bin")
structure_6 = gvector("./Zn_test1.bin")
structure_7 = gvector("./Zn_test_next0.bin")
structure_8 = gvector("./Zn_test_next1.bin")
structure_9 = gvector("./Al.bin")
structure_10 = gvector("./Al_rotated.bin")

In [37]:
print(len(structure_5)) # number of atoms in structure_1
print(structure_5[0]) # gvector of the 1st atom in structure_1
print(len(structure_5[0])) # dimentions of the gvector of the 1st atom in structure_1

54
[2.9347834e+01 2.4922567e+01 1.7698858e+00 4.6712998e-03 2.7403803e-07
 2.9012017e-13 5.0831820e-21 1.4109446e-30 2.0227106e+02 5.0944476e+02
 5.4221948e+02 3.8182495e+02 1.8402454e+02 6.2780087e+01 2.5035017e+01
 7.6397634e+00 1.3455638e+02 2.9238635e+02 2.6474658e+02 1.2896338e+02
 3.9993378e+01 7.7947292e+00 2.6300197e+00 5.4655474e-01 1.5594271e+00
 2.9609280e+00 2.2038701e+00 7.0511818e-01 1.3090208e-01 1.2740296e-02
 2.9011881e-03 3.8018206e-04 2.2306811e-04 3.7946622e-04 2.3215004e-04
 4.8435963e-05 5.1072611e-06 2.5908912e-07 2.8444058e-08 2.2952307e-09]
40


In [42]:
# to see the distance of two structure_1 from itself
dis_mat = []
for i in range (len(structure_10)):
    dis_mat.append(np.linalg.norm(structure_10[i:i+1] - structure_9, axis=1))

min_dis = []
a = np.copy(dis_mat)    
for j in range (len(a)):
    a[j].sort();
    min_dis.append(a[j][0])
    
min_dis

[4807.997,
 4956.372,
 4946.917,
 4934.057,
 5311.956,
 4949.9,
 4890.385,
 5260.064,
 5255.256,
 5043.2666,
 5484.3247,
 5056.8193,
 4858.2188,
 5057.638,
 5072.2246,
 4862.0894,
 5122.5845,
 4864.161,
 4908.9395,
 5172.6636,
 5037.4536,
 5164.74,
 5424.36,
 5287.333,
 5240.1914,
 5598.357,
 5469.609,
 5441.413,
 5625.391,
 5477.478,
 5118.517,
 5436.5566,
 5229.733,
 5074.275,
 5244.081,
 5124.5747,
 4893.5747,
 4992.5566,
 4890.228,
 5004.6914,
 5088.61,
 5145.505,
 5183.4146,
 5314.1655,
 5092.9785,
 5129.6133,
 5240.4854,
 5415.5986,
 5077.0205,
 5109.3413,
 4965.936,
 4924.501,
 4952.7427,
 5063.8276,
 4939.761,
 5102.28,
 5228.5425,
 5281.5327,
 5400.2188,
 5180.1523,
 5255.4966,
 5500.3003,
 5626.922,
 5478.2773,
 5571.9614,
 5369.934,
 5114.8584,
 5292.2065,
 5445.5415,
 5089.0996,
 5159.121,
 5003.8965,
 5288.048,
 5456.577,
 5407.7886,
 5600.05,
 5511.988,
 5591.4404,
 5682.3647,
 5881.481,
 5827.596,
 5881.4814,
 5682.364,
 5827.596,
 5511.9873,
 5600.0483,
 5591.4395,
 545