### Imports

In [1]:
import os
import numpy as np
import preprocessing as pp
from joblib import dump, load
import time


def preprocessing(directions: list, api_key: str, crystal_system: str, base_dir: str, folder_dir: str, txt_filename: str = None, min_size: int = None):
    start = time.time()
    if txt_filename:
        filename = os.path.join(base_dir, txt_filename)

        preproc = pp.save_data(
                    pp.get_preprocessed_data(
                        pp.get_crystals_from_file(
                            filename=filename,
                            api_key=api_key
                            ),
                        directions,
                        plot=True,
                        vectors=True
                        ),
                    base_dir=base_dir,
                    new_dir=folder_dir
                    )
        print(f">> Time to preprocess: {round(time.time() - start, 2)} s")
        return preproc
    else:
        print("Hi from preprocessing")
        preproc = pp.save_data(
                    pp.get_preprocessed_data(
                        pp.get_crystal_info(
                            pp.retrieve_crystals_from_api(
                                api_key=api_key, 
                                crystal_system=crystal_system, 
                                base_dir=base_dir,
                                write=False,
                                min_size=min_size
                                ),
                            ), 
                        directions,
                        plot=False,
                        vectors=True
                        ),
                    base_dir=base_dir, 
                    new_dir=folder_dir
                    )
        
        print(f">> Time to preprocess: {round(time.time() - start, 2)} s")
        return preproc



key = 'MKc7ImqWWraesSOgZw5qy1pwY5pi3Djr'
upper = 2
base_dir = '/Users/jonathanchoi/Desktop/GitHub Projects/crystal_sim/'
directions = pp.get_cartesian_beam_directions(upper)
directions = np.delete(directions, 3, axis=0)
print(directions)



# test for from txt_filename
preprocessing(directions=directions, api_key=key, crystal_system="Cubic", base_dir=base_dir, folder_dir="cubic_F_225_v2",min_size=1500)

  from .autonotebook import tqdm as notebook_tqdm


[[0.    0.    1.   ]
 [0.    0.707 0.707]
 [0.    1.    0.   ]
 [0.707 0.    0.707]
 [0.707 0.707 0.   ]
 [1.    0.    0.   ]]
Hi from preprocessing


Retrieving MaterialsDoc documents: 100%|██████████| 9119/9119 [00:00<00:00, 13488.82it/s]


_________________Summary:_________________

>> F Centered on A: number of crystals - 9119
>> Total number of crystals: 9119

>> Smallest number of crystals: 9119
>> Overwriting the smallest number of available sample points from 9119 to 1500
>> 1500 crystals will be processed



Retrieving MaterialsDoc documents: 100%|██████████| 1500/1500 [00:00<00:00, 4337.75it/s] 


_________________0% complete_________________
>> Crystal No.0: (MPID(mp-1008734), 'ThH2') 
>> Time elapsed: 0.001 s
>> Time diff: 0.001 s


KeyboardInterrupt: 

In [5]:
store = '/Users/jonathanchoi/Desktop/GitHub Projects/crystal_sim/preprocessed_data/cubic_F_225_v2'


data_list = pp.load_data(store)

>> Retrieving: cubic_F_225_v2_regression_1500.joblib
>> Retrieving: cubic_F_225_v2_labels_classification_bravais_2.joblib
>> Retrieving: cubic_F_225_v2_material_ids1500.joblib
>> Retrieving: cubic_F_225_v2_features_2.joblib
>> Retrieving: cubic_F_225_v2_labels_classification_space_2.joblib
>> Retrieving: cubic_F_225_v2_labels_classification_system_2.joblib
>> Retrieving: cubic_F_225_v2_material_ids2.joblib
>> Retrieving: cubic_F_225_v2_labels_classification_system_1500.joblib
>> Retrieving: cubic_F_225_v2_labels_classification_bravais_1500.joblib
>> Retrieving: cubic_F_225_v2_features_1500.joblib
>> Retrieving: cubic_F_225_v2_regression_2.joblib
>> Retrieving: cubic_F_225_v2_labels_classification_space_1500.joblib
>> Sanity Check: printing element(s) 1 of 1500 from each array

[[3.43, 3.43, 3.43, 60.0, 60.0, 60.0]]
['F']
[(MPID(mp-1008785), 'RuO2')]
[[4.618, 3.265, 4.618, 3.265, 6.531, 0.785, 0.786, 0.785, 0.0, 4.618, 5.656, 3.266, 6.531, 5.656, 0.616, 0.955, 0.0, 0.955, 4.618, 3.265, 

In [None]:
from pymatgen.analysis.diffraction.tem import TEMCalculator
from pymatgen.core.structure import Structure
from pymatgen.symmetry.analyzer import SpacegroupAnalyzer
from mp_api.client import MPRester

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import math as m
from decimal import Decimal, ROUND_HALF_UP

    
with MPRester(api_key=api_key) as mpr:
    crystals = mpr.materials.search(
        # elements=["Si", "O"], # for testing only
        spacegroup_number = 225,
        crystal_system=[crystal_system.capitalize()],
        fields=['material_id', 'symmetry']
    )