# Crystal Dataset

In [9]:
import pymatgen
import pymatgen.core.structure
import numpy as np
import os

Prepare some test data.

In [10]:
test_data = [
    pymatgen.core.Structure(lattice=np.array([[4.34157255, 0., 2.50660808], [1.44719085, 4.09327385, 2.50660808], [0., 0., 5.01321616]]), species=["Te", "Ba"], coords=np.array([[0.5, 0.5, 0.5], [0. , 0. , 0. ]])),
    pymatgen.core.Structure(lattice=np.array([[2.95117784, 0., 1.70386332], [0.98372595, 2.78239715, 1.70386332], [0., 0., 3.40772664]]), species=["B", "As"], coords=np.array([[0.25, 0.25, 0.25], [0. , 0. , 0. ]])),
    pymatgen.core.Structure(lattice=np.array([[4.3015, 0., 0.],[-2.15075, 3.725208, 0.], [0., 0., 5.2703]]), species=["Ba", "Ga", "Si", "H"], coords=np.array([[0., 0., 0.],[0.6666, 0.3333, 0.5423], [0.3334, 0.6667, 0.4555], [0.6666, 0.3333, 0.8759]])),
]
os.makedirs("ExampleCrystal", exist_ok=True)
os.makedirs("ExampleCrystal/CifFiles", exist_ok=True)
for i, x in enumerate(test_data):
    x.to(filename="ExampleCrystal/CifFiles/file_%s.cif" % i, fmt="cif")
csv_data = "".join([
    "file_name,index,label\n",  # Need header!
    "file_0.cif, 0, 98.58577122703691\n",
    "file_1.cif, 1, 701.5857233477558\n",
    "file_2.cif, 2, 1138.5856886491724"
])
with open("ExampleCrystal/data.csv", "w") as f:
    f.write(csv_data)

In [11]:
### 0. Crystal dataset

Data is organized like:

 ```bash
 ├── data_directory
    ├── file_directory
    │   ├── *.cif
    │   ├── *.cif
    │   └── ...
    ├── file_name.csv
    └── file_name.pymatgen.json
 ```

SyntaxError: invalid syntax (2826302219.py, line 3)

In [3]:
from kgcnn.data.crystal import CrystalDataset



In [4]:
dataset = CrystalDataset(
    data_directory="ExampleCrystal/", 
    dataset_name="ExampleCrystal", 
    file_name="data.csv", 
    file_directory="CifFiles"
)

### 1. Generate a json-serialized list of structures via `prepare_data`

In [5]:
dataset.prepare_data(file_column_name="file_name", overwrite=True)

INFO:kgcnn.data.ExampleCrystal:Read 3 cif-file via pymatgen ...
INFO:kgcnn.data.ExampleCrystal: ... load structure 0 from 3
INFO:kgcnn.data.ExampleCrystal:Exporting as dict for pymatgen ...
INFO:kgcnn.data.ExampleCrystal:Saving structures as .json ...


<kgcnn.data.crystal.CrystalDataset at 0x2ceb8d3db50>

### 2. Read in memory with `read_in_memory`.

In [6]:
dataset.read_in_memory(label_column_name="label")
print(dataset[0])

INFO:kgcnn.data.ExampleCrystal:Making node features from structure...
INFO:kgcnn.data.ExampleCrystal:Reading structures from .json ...
INFO:kgcnn.data.ExampleCrystal: ... read structures 0 from 3


{'graph_labels': array(98.58577123), 'node_coordinates': array([[0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
       [1.31245659e-09, 6.13991078e+00, 2.27324426e-09]]), 'node_frac_coordinates': array([[0. , 0. , 0. ],
       [0.5, 0.5, 0.5]]), 'graph_lattice': array([[ 1.44719085e+00,  4.09327385e+00,  2.50660808e+00],
       [ 1.44719085e+00,  4.09327385e+00, -2.50660808e+00],
       [-2.89438170e+00,  4.09327385e+00,  1.51549528e-09]]), 'abc': array([5.01321616, 5.01321616, 5.01321616]), 'charge': array([0.]), 'volume': array([89.0910946]), 'node_number': array([56, 52])}


Read pymatgen only via `load_pymatgen_structures`. The structures are not assigned to the dataset but returned by the function.

In [7]:
dataset.get_structures_from_json_file()

INFO:kgcnn.data.ExampleCrystal:Reading structures from .json ...


[Structure Summary
 Lattice
     abc : 5.01321616 5.013216158484504 5.0132161584845045
  angles : 60.00000002 60.00000001 60.00000001
  volume : 89.09109460256703
       A : 1.4471908506158624 4.093273852854227 2.5066080815154956
       B : 1.4471908506158624 4.093273852854227 -2.506608078484504
       C : -2.8943816986068107 4.093273852854227 1.5154952848206449e-09
     pbc : True True True
 PeriodicSite: Ba (0.0000, 0.0000, 0.0000) [0.0000, 0.0000, 0.0000]
 PeriodicSite: Te (0.0000, 6.1399, 0.0000) [0.5000, 0.5000, 0.5000],
 Structure Summary
 Lattice
     abc : 3.40772664 3.4077266405150777 3.407726637424612
  angles : 60.000000029999995 60.000000024999984 59.999999995
  volume : 27.98203208499981
       A : 0.9837259499337652 2.7823971493851167 1.7038633194849222
       B : 0.9837259499337652 2.7823971493851167 -1.7038633205150777
       C : -1.9674518897566036 2.7823971493851167 -0.0
     pbc : True True True
 PeriodicSite: B (0.0000, 6.2604, -0.0000) [0.7500, 0.7500, 0.7500]
 Per

Or save them directly to json without collecting individual files.

In [8]:
dataset.save_structures_to_json_file(test_data)

INFO:kgcnn.data.ExampleCrystal:Exporting as dict for pymatgen ...
INFO:kgcnn.data.ExampleCrystal:Saving structures as .json ...


### 3. Generate graph