In [2]:
import pickle
import numpy as np

# Assert that each record in the list has "angles" property
with open('combined_val.pickle', 'rb') as file:
    crystal_dict_list = pickle.load(file)
    assert all('angles' in record for record in crystal_dict_list)
    assert all(len(record['angles']) == 3 for record in crystal_dict_list)
    
    # mean hoa
    mean_hoa = np.mean([entry['hoa'] for entry in crystal_dict_list])
    std_hoa = np.std([entry['hoa'] for entry in crystal_dict_list])
    print(mean_hoa, std_hoa)
    # Extract HOA and zeolite codes
    hoa = np.array([entry['hoa'] for entry in crystal_dict_list])
    zeo_code = np.array([entry['zeolite_code'] for entry in crystal_dict_list])

    # Find unique zeolite codes
    unique_zeo_codes = np.unique(zeo_code)

    # Find the maximum HOA for each zeolite type
    mean_hoa_per_zeo_code = {}
    std_hoa_per_zeo_code = {}
    for code in unique_zeo_codes:
        # Get the HOA values corresponding to the current zeolite code
        hoa_values_for_code = np.array([entry['hoa'] for entry in crystal_dict_list if entry['zeolite_code'] == code])
        mean_hoa_per_zeo_code[code] = np.mean(hoa_values_for_code)
        std_hoa_per_zeo_code[code] = np.std(hoa_values_for_code)

        print(code, mean_hoa_per_zeo_code[code], std_hoa_per_zeo_code[code])


    # Add normalized HOA
    for entry in crystal_dict_list:
        mean_hoa = mean_hoa_per_zeo_code[entry['zeolite_code']]
        std_hoa = std_hoa_per_zeo_code[entry['zeolite_code']]
        entry['norm_hoa'] = (entry['hoa'] - mean_hoa) / std_hoa

    # Get all BEC zeolites from the dataset
    bec_zeolites = [entry for entry in crystal_dict_list if entry['zeolite_code'] == 'BEC']
    print(bec_zeolites)

38.707365311804004 6.922252067726768
BEC 36.17701543209876 5.139870753714076
CHA 35.36046219512195 4.458132437813281
ERI 36.56501036585366 4.023677386673377
FER 46.12105914634146 4.412968205420303
HEU 42.913875609756104 7.429379838142961
LTA 32.953580120481924 3.440506270451261
LTL 31.716109756097566 2.715615817758638
MER 37.96675061728396 4.0975307357632245
MTW 44.55826319018405 6.192633825010936
NAT 44.532299397590364 3.852816754379964
YFI 36.831441401273885 7.18378916584895
[{'frac_coords': [[0.877, 0.618, 0.617], [0.382, 0.877, 0.117], [0.123, 0.382, 0.617], [0.618, 0.123, 0.117], [0.123, 0.618, 0.617], [0.618, 0.877, 0.117], [0.877, 0.382, 0.617], [0.382, 0.123, 0.117], [0.123, 0.382, 0.383], [0.618, 0.123, 0.883], [0.877, 0.618, 0.383], [0.382, 0.877, 0.883], [0.877, 0.382, 0.383], [0.382, 0.123, 0.883], [0.123, 0.618, 0.383], [0.618, 0.877, 0.883], [0.807, 0.807, 0.25], [0.193, 0.807, 0.75], [0.193, 0.193, 0.25], [0.807, 0.193, 0.75], [0.193, 0.807, 0.25], [0.807, 0.807, 0.75], 

In [6]:
import pickle

# Find all unique zeolite codes
with open('combined_train.pickle', 'rb') as file:
    data = pickle.load(file)
    unique_zeolite_codes = {}
    for record in data:
        unique_zeolite_codes.update({record["zeolite_code"]: record['lengths']})
    print(len(unique_zeolite_codes))


print(unique_zeolite_codes)
print(len(unique_zeolite_codes))

unique_zeolite_codes_mapping = {code: i for i, code in enumerate(unique_zeolite_codes)}
print(unique_zeolite_codes_mapping)

26
{'DDRch1': [13.795, 13.795, 40.75], 'DDRch2': [13.795, 13.795, 40.75], 'FAU': [24.345, 24.345, 24.345], 'FAUch': [24.345, 24.345, 24.345], 'ITW': [10.45, 8.954, 8.954], 'MEL': [20.27, 20.27, 13.459], 'MELch': [20.27, 20.27, 13.459], 'MFI': [20.09, 19.738, 13.142], 'MOR': [18.256, 20.534, 7.542], 'RHO': [15.031, 15.031, 15.031], 'TON': [14.1, 17.84, 5.25], 'TON2': [14.105, 17.842, 5.256], 'TON3': [14.105, 17.842, 5.256], 'TON4': [14.105, 17.842, 5.256], 'TONch': [14.105, 17.842, 5.256], 'BEC': [12.77, 12.77, 12.977], 'CHA': [13.675, 13.675, 14.767], 'ERI': [13.054, 13.054, 15.175], 'FER': [19.018, 14.303, 7.541], 'HEU': [17.523, 17.644, 7.401], 'LTA': [11.919, 11.919, 11.919], 'LTL': [18.126, 18.126, 7.567], 'MER': [14.012, 14.012, 9.954], 'MTW': [25.552, 5.256, 12.117], 'NAT': [13.85, 13.85, 6.42], 'YFI': [18.181, 31.841, 12.641]}
26
{'DDRch1': 0, 'DDRch2': 1, 'FAU': 2, 'FAUch': 3, 'ITW': 4, 'MEL': 5, 'MELch': 6, 'MFI': 7, 'MOR': 8, 'RHO': 9, 'TON': 10, 'TON2': 11, 'TON3': 12, 'TON4