# Crystal Dataset

In [1]:
import pymatgen
import pymatgen.core.structure
import numpy as np
import os

 Prepare some test data. The data structure is as follows:
 
 ```bash
 ├── data_directory
    ├── file_directory
    │   ├── *.cif
    │   ├── *.cif
    │   └── ...
    ├── file_name.csv
    └── file_name.pymatgen.json
 ```

In [2]:
test_data = [
    pymatgen.core.Structure(lattice=np.array([[4.34157255, 0., 2.50660808], [1.44719085, 4.09327385, 2.50660808], [0., 0., 5.01321616]]), species=["Te", "Ba"], coords=np.array([[0.5, 0.5, 0.5], [0. , 0. , 0. ]])),
    pymatgen.core.Structure(lattice=np.array([[2.95117784, 0., 1.70386332], [0.98372595, 2.78239715, 1.70386332], [0., 0., 3.40772664]]), species=["B", "As"], coords=np.array([[0.25, 0.25, 0.25], [0. , 0. , 0. ]])),
    pymatgen.core.Structure(lattice=np.array([[4.3015, 0., 0.],[-2.15075, 3.725208, 0.], [0., 0., 5.2703]]), species=["Ba", "Ga", "Si", "H"], coords=np.array([[0., 0., 0.],[0.6666, 0.3333, 0.5423], [0.3334, 0.6667, 0.4555], [0.6666, 0.3333, 0.8759]])),
]
os.makedirs("ExampleCrystal", exist_ok=True)
os.makedirs("ExampleCrystal/CifFiles", exist_ok=True)
for i, x in enumerate(test_data):
    x.to(filename="ExampleCrystal/CifFiles/file_%s.cif" % i, fmt="cif")
csv_data = "".join([
    "file_name,index,label\n",  # Need header!
    "file_0.cif, 0, 98.58577122703691\n",
    "file_1.cif, 1, 701.5857233477558\n",
    "file_2.cif, 2, 1138.5856886491724"
])
with open("ExampleCrystal/data.csv", "w") as f:
    f.write(csv_data)

In [3]:
from kgcnn.data.crystal import CrystalDataset



In [4]:
dataset = CrystalDataset(
    data_directory="ExampleCrystal/", 
    dataset_name="ExampleCrystal", 
    file_name="data.csv", 
    file_directory="CifFiles"
)

Generate a json-serialized list of structures via `prepare_data`

In [5]:
dataset.prepare_data(file_column_name="file_name", overwrite=True)

INFO:kgcnn.data.ExampleCrystal:Read 3 cif-file via pymatgen ...
INFO:kgcnn.data.ExampleCrystal: ... load structure 0 from 3
INFO:kgcnn.data.ExampleCrystal:Exporting as dict for pymatgen ...
INFO:kgcnn.data.ExampleCrystal:Saving structures as .json ...


<kgcnn.data.crystal.CrystalDataset at 0x2064ffe0c40>

Read in memory with `read_in_memory`.

In [6]:
dataset.read_in_memory(label_column_name="label")
print(dataset[0])

INFO:kgcnn.data.ExampleCrystal:Making node features from structure...
INFO:kgcnn.data.ExampleCrystal:Reading structures from .json ...
INFO:kgcnn.data.ExampleCrystal: ... read structures 0 from 3


{'graph_labels': array(98.58577123), 'node_coordinates': array([[0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
       [1.31245659e-09, 6.13991078e+00, 2.27324426e-09]]), 'node_frac_coordinates': array([[0. , 0. , 0. ],
       [0.5, 0.5, 0.5]]), 'graph_lattice': array([[ 1.44719085e+00,  4.09327385e+00,  2.50660808e+00],
       [ 1.44719085e+00,  4.09327385e+00, -2.50660808e+00],
       [-2.89438170e+00,  4.09327385e+00,  1.51549528e-09]]), 'abc': array([5.01321616, 5.01321616, 5.01321616]), 'charge': array([0.]), 'volume': array([89.0910946]), 'node_number': array([56, 52])}


Read pymatgen only via `load_pymatgen_structures`. The structures are not assigned to the dataset but returned by the function.

In [9]:
dataset.load_pymatgen_structures()

INFO:kgcnn.data.ExampleCrystal:Reading structures from .json ...


[Structure Summary
 Lattice
     abc : 5.013216160673185 5.013216156734002 5.01321616
  angles : 59.999999978449296 60.00000000444202 59.999999976968624
  volume : 89.09109455639522
       A : 4.34157255 0.0 2.50660808
       B : 1.44719085 4.09327385 2.50660808
       C : 0.0 0.0 5.01321616
     pbc : True True True
 PeriodicSite: Te (2.8944, 2.0466, 5.0132) [0.5000, 0.5000, 0.5000]
 PeriodicSite: Ba (0.0000, 0.0000, 0.0000) [0.0000, 0.0000, 0.0000],
 Structure Summary
 Lattice
     abc : 3.407726640525688 3.4077266407787095 3.40772664
  angles : 60.000000007559144 60.000000005103 59.99999994981319
  volume : 27.982032094119386
       A : 2.95117784 0.0 1.70386332
       B : 0.98372595 2.78239715 1.70386332
       C : 0.0 0.0 3.40772664
     pbc : True True True
 PeriodicSite: B (0.9837, 0.6956, 1.7039) [0.2500, 0.2500, 0.2500]
 PeriodicSite: As (0.0000, 0.0000, 0.0000) [0.0000, 0.0000, 0.0000],
 Structure Summary
 Lattice
     abc : 4.3015 4.301499762381023 5.2703
  angles : 90.0 90.

Or save them directly to json without collecting individual files.

In [8]:
dataset.save_pymatgen_structures(test_data)

INFO:kgcnn.data.ExampleCrystal:Exporting as dict for pymatgen ...
INFO:kgcnn.data.ExampleCrystal:Saving structures as .json ...
