### Imports

In [1]:
import os
import numpy as np
import preprocessing as pp
from joblib import dump, load
import time

  from .autonotebook import tqdm as notebook_tqdm


## Preprocessing from .txt file
- If you would like to use a .txt file with specific mp-ids, you can use the following code to preprocess the data.


In [5]:
def preprocessing(directions: list, api_key: str, crystal_system: str, base_dir: str, folder_dir: str, txt_filename: str = None, min_size: int = None):
    start = time.time()
    if txt_filename:
        filename = os.path.join(base_dir, txt_filename)

        preproc = pp.save_data(
                    pp.get_preprocessed_data(
                        pp.get_crystals_from_file(
                            filename=filename,
                            api_key=api_key
                            ),
                        directions,
                        plot=False,
                        vectors=True
                        ),
                    base_dir=base_dir,
                    new_dir=folder_dir
                    )
        print(f">> Time to preprocess: {round(time.time() - start, 2)} s")
        return preproc
    else:
        preproc = pp.save_data(
                    pp.get_preprocessed_data(
                        pp.get_crystal_info(
                            pp.retrieve_crystals_from_api(
                                api_key=key, 
                                crystal_system=crystal_system, 
                                base_dir=base_dir,
                                write=False,
                                min_size=min_size
                                ),
                            ), 
                        directions,
                        plot=False,
                        vectors=True
                        ),
                    base_dir=base_dir, 
                    new_dir=folder_dir
                    )
        
        print(f">> Time to preprocess: {round(time.time() - start, 2)} s")
        return preproc


In [6]:
key = 'MKc7ImqWWraesSOgZw5qy1pwY5pi3Djr'
upper = 2
relative_path = 'raw_data/test.txt'
base_dir = '/Users/jonathanchoi/Desktop/GitHub Projects/crystal_sim/'
directions = pp.get_cartesian_beam_directions(upper)
directions = np.delete(directions, 3, axis=0)

# test for from txt_filename
preprocessing(directions=directions, api_key=key, crystal_system="Cubic", base_dir=base_dir, folder_dir="test_5",txt_filename='raw_data/test.txt')

Retrieving MaterialsDoc documents: 100%|██████████| 2/2 [00:00<00:00, 74235.47it/s]


_________________0% complete_________________
>> Crystal No.0: (MPID(mp-866016), 'AcBiAu2') 
>> Time elapsed: 0.0 s
>> Time diff: 0.0 s
___________________________________
>> Total time elapsed: 1.676 s
>> 100% complete
______________________________________________________________________

>> Sanity Check: printing element(s) 1 of 2 from each array

[[3.563, 2.52, 5.039, 3.563, 2.52, 0.785, 0.0, 0.786, 0.785, 3.563, 4.364, 2.52, 5.039, 4.364, 0.616, 0.955, 0.0, 0.955, 3.563, 2.52, 5.039, 3.563, 2.52, 0.785, 0.0, 0.786, 0.785, 3.563, 4.364, 2.52, 5.039, 4.364, 0.616, 0.955, 0.0, 0.955, 3.563, 4.364, 2.52, 5.039, 4.364, 0.616, 0.955, 0.0, 0.955, 3.563, 2.52, 5.039, 3.563, 2.52, 0.785, 0.0, 0.786, 0.785]]
[[5.3, 5.3, 5.3, 60.0, 60.0, 60.0]]
[225]
['F']
['Cubic']
[(MPID(mp-866016), 'AcBiAu2')]
>> All files saved to: /Users/jonathanchoi/Desktop/GitHub Projects/crystal_sim/preprocessed_data/test_5
>> Time to preprocess: 3.2 s


'/Users/jonathanchoi/Desktop/GitHub Projects/crystal_sim/preprocessed_data/test_5'

In [7]:
data_list = pp.load_data('/Users/jonathanchoi/Desktop/GitHub Projects/crystal_sim/preprocessed_data/test_5')

>> Retrieving: test_5_labels_classification_space_2.joblib
>> Retrieving: test_5_labels_classification_system_2.joblib
>> Retrieving: test_5_labels_classification_bravais_2.joblib
>> Retrieving: test_5_regression_2.joblib
>> Retrieving: test_5_material_ids2.joblib
>> Retrieving: test_5_features_2.joblib
>> Sanity Check: printing element(s) 1 of 2 from each array

[225]
['Cubic']
['F']
[[5.3, 5.3, 5.3, 60.0, 60.0, 60.0]]
[(MPID(mp-866016), 'AcBiAu2')]
[[3.563, 2.52, 5.039, 3.563, 2.52, 0.785, 0.0, 0.786, 0.785, 3.563, 4.364, 2.52, 5.039, 4.364, 0.616, 0.955, 0.0, 0.955, 3.563, 2.52, 5.039, 3.563, 2.52, 0.785, 0.0, 0.786, 0.785, 3.563, 4.364, 2.52, 5.039, 4.364, 0.616, 0.955, 0.0, 0.955, 3.563, 4.364, 2.52, 5.039, 4.364, 0.616, 0.955, 0.0, 0.955, 3.563, 2.52, 5.039, 3.563, 2.52, 0.785, 0.0, 0.786, 0.785]]
