In [1]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pickle
import pandas as pd

import torch
import torch_geometric.data

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
with open(f'ZeoSynGen_dataset.pkl', 'rb') as f: # load scaler
    dataset = pickle.load(f)

In [3]:
dataset[0]

(tensor([3.0769e-02, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
         1.5385e-03, 7.6923e-03, 0.0000e+00, 9.5385e-01, 6.1538e-03, 1.5000e+02,
         5.0400e+02]),
 tensor([1.0000, 0.0000, 1.0000, 1.0000, 0.0000, 0.9109, 0.6862, 0.0000, 0.7188,
         0.3819, 0.3018, 0.9489]),
 'KOH, Aerosil 200',
 'AFI',
 Data(x=[72, 2], edge_index=[2, 648], edge_vec=[648, 3], code='AFI'),
 tensor([-0.3653,  0.2575,  0.3029, -0.2805,  0.6233,  0.5855,  1.0809, -0.3752,
          1.4155, -0.5759, -0.6293,  1.5647,  0.7716,  0.7555,  0.7690,  0.7832,
          0.7858, -0.0518,  0.0492, -0.4709, -0.5429, -0.2012, -0.1816, -0.1588,
         -0.1586,  0.8087,  1.4051,  0.8113, -0.5515, -0.5512, -0.5509, -0.1609,
         -0.1645, -0.1609, -0.1305, -0.1367, -0.1305, -0.1431, -0.1574, -0.1431,
         -0.1435, -0.1574, -0.1435,  0.6969, -0.3955,  0.1268,  0.0173, -0.1985,
         -0.1868, -0.1993,  0.0492, -0.3287, -0.3958, -0.1117, -0.0832, -0.0230,
         -0.0211,  0.2791, -0.256

In [27]:
dataset.get_datapoints_by_index([0,2], scaled=False, return_dataframe=True)

[         Si   Al    P   Ge    B   Na         K        OH    F       H2O  \
 0  0.030769  0.0  0.0  0.0  0.0  0.0  0.001538  0.007692  0.0  0.953846   
 1  0.021959  0.0  0.0  0.0  0.0  0.0  0.002635  0.005929  0.0  0.966184   
 
        sda1  cryst_temp  cryst_time  
 0  0.006154       150.0       504.0  
 1  0.003294       150.0       144.0  ,
    Si/Al  Al/P  Si/Ge   Si/B  Na/T   K/T  OH/T  F/T      H2O/T  sda1/T  \
 0  400.0   0.0   99.0  250.0   0.0  0.05  0.25  0.0  31.000006     0.2   
 1  400.0   0.0   99.0  250.0   0.0  0.05  0.25  0.0  31.000006     0.2   
 
    cryst_temp  cryst_time  
 0         0.0         0.0  
 1         0.0         0.0  ,
 ['KOH, Aerosil 200', 'KOH, Aerosil 200'],
 ['AFI', 'AFI'],
 [Data(x=[72, 2], edge_index=[2, 648], edge_vec=[648, 3], code='AFI'),
  Data(x=[72, 2], edge_index=[2, 648], edge_vec=[648, 3], code='AFI')],
    zeo_num_atoms      zeo_a      zeo_b     zeo_c  zeo_alpha  zeo_beta  \
 0           72.0  13.618734  13.591259  8.322777       90.0

In [None]:
dataset_idxs = [0] # list of idxs
scaled = False
return_dataframe = True

if len(dataset_idxs) == 1: # only 1 datapoint
    result = dataset[dataset_idxs[0] : dataset_idxs[0]+1]
else: # multiple datapoints
    lists = [[] for _ in range(len(dataset[0]))] # create list of lists
    for dataset_idx in dataset_idxs:
        datapoint = dataset[dataset_idx]
        for info_idx, info in enumerate(datapoint):
            lists[info_idx].append(info)
    result = [torch.stack(info) if type(info[0]) == torch.Tensor else info for info in lists]

final_result = []
for info in result:
    if type(info) == torch.Tensor:
        n_cols = info.shape[1]
        if n_cols == len(dataset.frac_names)+2:
            if return_dataframe:
                info = pd.DataFrame(info, columns=dataset.frac_names+['cryst_temp', 'cryst_time'])
        elif n_cols == len(dataset.ratio_names)+2:
            if scaled == False: # scale back
                for ratio_idx, ratio in enumerate(dataset.ratio_names):
                    qt = dataset.qts[ratio] # load quantile transformer
                    info[:,ratio_idx] = torch.tensor(qt.inverse_transform(info[:,ratio_idx].reshape(-1, 1)), dtype=torch.float32)[0] # transform back
            if return_dataframe:
                info = pd.DataFrame(info, columns=dataset.ratio_names+['cryst_temp', 'cryst_time'])
        elif n_cols == len(dataset.zeo_feat_names):
            if scaled == False: # scale back
                zeo_feat_scaler = dataset.zeo_feat_scaler # load standard scaler
                info = torch.tensor(zeo_feat_scaler.inverse_transform(info), dtype=torch.float32)
            if return_dataframe:
                info = pd.DataFrame(info, columns=dataset.zeo_feat_names)
        elif n_cols == len(dataset.osda_feat_names):
            if scaled == False: # scale back
                osda_feat_scaler = dataset.osda_feat_scaler
                info = torch.tensor(osda_feat_scaler.inverse_transform(info), dtype=torch.float32)
            if return_dataframe:
                info = pd.DataFrame(info, columns=dataset.osda_feat_names)
        final_result.append(info)
    else:
        final_result.append(info)
final_result[10]