### Imports

In [6]:
import os
import numpy as np
import preprocessing as pp
from joblib import dump, load
import time


def preprocessing(directions: list, api_key: str, crystal_system: str, base_dir: str, folder_dir: str, txt_filename: str = None, min_size: int = None, space_group: int = None, plot: bool = False):
    # start = time.time()
    if txt_filename:
        filename = os.path.join(base_dir, txt_filename)

        preproc = pp.save_data(
                    pp.get_preprocessed_data(
                        pp.get_crystals_from_file(
                            filename=filename,
                            api_key=api_key
                            ),
                        directions,
                        plot=True,
                        vectors=True
                        ),
                    base_dir=base_dir,
                    new_dir=folder_dir
                    )
        # print(f">> Time to preprocess: {round(time.time() - start, 2)} s")
        return preproc
    else:
        preproc = pp.save_data(
                    pp.get_preprocessed_data(
                        pp.get_crystal_info(
                            pp.retrieve_crystals_from_api(
                                api_key=api_key, 
                                crystal_system=crystal_system, 
                                base_dir=base_dir,
                                write=False,
                                min_size=min_size,
                                space_group=space_group
                                ),
                            ), 
                        directions,
                        plot=plot,
                        vectors=True
                        ),
                    base_dir=base_dir, 
                    new_dir=folder_dir
                    )
        
        # print(f">> Time to preprocess: {round(time.time() - start, 2)} s")
        # print(f">> Path to load data (IMPORTANT):\n{preproc}")
        return preproc



key = 'MKc7ImqWWraesSOgZw5qy1pwY5pi3Djr'
upper = 2
base_dir = '/Users/jonathanchoi/Desktop/GitHub Projects/crystal_sim/'
directions = pp.get_cartesian_beam_directions(upper)
directions = np.delete(directions, 3, axis=0)

path = preprocessing(
                    directions=directions, 
                    api_key=key, 
                    crystal_system="Cubic", 
                    base_dir=base_dir, 
                    folder_dir="cubic_F_225_v3",
                    min_size=3000, 
                    space_group=225, 
                    plot=False
                    )

plot = False and 

Retrieving MaterialsDoc documents:   0%|          | 0/9119 [00:00<?, ?it/s]

_________________Summary:_________________

>> F Centered on A: number of crystals - 9119
>> Total number of crystals: 9119

>> Smallest number of crystals: 9119
>> Overwriting the smallest number of available sample points from 9119 to 3000
>> 3000 crystals will be processed



Retrieving MaterialsDoc documents:   0%|          | 0/3000 [00:00<?, ?it/s]

_________________0% complete_________________
>> Crystal No.0: (MPID(mp-1038831), 'Mg3Bi') 
>> Time elapsed: 0.001 s
>> Time diff: 0.001 s
_________________10% complete_________________
>> Crystal No.300: (MPID(mp-1008224), 'CrCo2Si') 
>> Time elapsed: 246.544 s
>> Time diff: 246.543 s
_________________20% complete_________________
>> Crystal No.600: (MPID(mp-1002187), 'TcB') 
>> Time elapsed: 495.931 s
>> Time diff: 249.386 s
_________________30% complete_________________
>> Crystal No.900: (MPID(mp-1009217), 'MnSb') 
>> Time elapsed: 741.894 s
>> Time diff: 245.963 s
_________________40% complete_________________
>> Crystal No.1200: (MPID(mp-977544), 'Hf2ReNi') 
>> Time elapsed: 1422.214 s
>> Time diff: 680.32 s




_________________50% complete_________________
>> Crystal No.1500: (MPID(mp-972949), 'LaSmZn2') 
>> Time elapsed: 1655.986 s
>> Time diff: 233.773 s
_________________60% complete_________________
>> Crystal No.1800: (MPID(mp-862814), 'PaSiTc2') 
>> Time elapsed: 1882.365 s
>> Time diff: 226.378 s
_________________70% complete_________________
>> Crystal No.2100: (MPID(mp-865140), 'MnGaRu2') 
>> Time elapsed: 2113.412 s
>> Time diff: 231.047 s
_________________80% complete_________________
>> Crystal No.2400: (MPID(mp-568914), 'K2WCl6') 
>> Time elapsed: 2348.825 s
>> Time diff: 235.413 s
_________________90% complete_________________
>> Crystal No.2700: (MPID(mp-2232639), 'NaMg(Cu3O4)2') 
>> Time elapsed: 2574.61 s
>> Time diff: 225.785 s
_________________100% complete_________________
>> No. of crystals preprocessed: 3000
>> Total time elapsed: 2797.375 s
______________________________________________________________________

>> Sanity Check: printing element(s) 1 of 3000 from each ar

In [5]:
path = '/Users/jonathanchoi/Desktop/GitHub Projects/crystal_sim/preprocessed_data/cubic_F_225_v2'

data_list = pp.load_data(path)

>> Retrieving: cubic_F_225_v2_regression_1500.joblib
>> Retrieving: cubic_F_225_v2_labels_classification_bravais_2.joblib
>> Retrieving: cubic_F_225_v2_material_ids1500.joblib
>> Retrieving: cubic_F_225_v2_features_2.joblib
>> Retrieving: cubic_F_225_v2_labels_classification_space_2.joblib
>> Retrieving: cubic_F_225_v2_labels_classification_system_2.joblib
>> Retrieving: cubic_F_225_v2_material_ids2.joblib
>> Retrieving: cubic_F_225_v2_labels_classification_system_1500.joblib
>> Retrieving: cubic_F_225_v2_labels_classification_bravais_1500.joblib
>> Retrieving: cubic_F_225_v2_features_1500.joblib
>> Retrieving: cubic_F_225_v2_regression_2.joblib
>> Retrieving: cubic_F_225_v2_labels_classification_space_1500.joblib
>> Sanity Check: printing element(s) 1 of 1500 from each array

[[3.43, 3.43, 3.43, 60.0, 60.0, 60.0]]
['F']
[(MPID(mp-1008785), 'RuO2')]
[[4.618, 3.265, 4.618, 3.265, 6.531, 0.785, 0.786, 0.785, 0.0, 4.618, 5.656, 3.266, 6.531, 5.656, 0.616, 0.955, 0.0, 0.955, 4.618, 3.265, 

In [None]:
from pymatgen.analysis.diffraction.tem import TEMCalculator
from pymatgen.core.structure import Structure
from pymatgen.symmetry.analyzer import SpacegroupAnalyzer
from mp_api.client import MPRester

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import math as m
from decimal import Decimal, ROUND_HALF_UP

    
with MPRester(api_key=api_key) as mpr:
    crystals = mpr.materials.search(
        # elements=["Si", "O"], # for testing only
        spacegroup_number = 225,
        crystal_system=[crystal_system.capitalize()],
        fields=['material_id', 'symmetry']
    )

In [9]:
def hello(**kwargs):
    return print(kwargs['min_size'])

hello(min_size = 1, dumb = 2)



1
