In [1]:
import os
import pickle
from pathlib import Path
import numpy as np

# import required module
import sys
 
# append the path of the
# parent directory
sys.path.append("..")
 
# import method from sibling 
# module
from data_utils import save_samples_as_cifs, save_reconstructions_as_cifs, visualize_trajectory
from utils import retrieve_artifacts_by_name


import env

# Load environment variables
env.load_envs()

# Set the cwd to the project root
PROJECT_ROOT: Path = Path(env.get_env("PROJECT_ROOT"))
assert (
    PROJECT_ROOT.exists()
), "You must configure the PROJECT_ROOT environment variable in a .env file!"

os.chdir(PROJECT_ROOT)

cwd = os.getcwd()

  from .autonotebook import tqdm as notebook_tqdm


## Parse samples

In [None]:
experiment_name = ""

artifact_files = retrieve_artifacts_by_name(experiment_name, artifact_type='dataset', project='zeogen', entity='glafk')

for file in artifact_files:
    if "samples" in file:
        with open(file, "rb") as f:
            samples = pickle.load(f)


save_samples_as_cifs(samples, os.path.join(cwd, f"parsed_samples/samples_{experiment_name}"))


In [None]:
with open("./samples/samples_low_noise.pickle", "rb") as f:
    samples = pickle.load(f)

print(samples.keys())
print(samples["all_frac_coords"].shape)
samples["atom_types"] = samples["atom_types"].cpu()
samples["angles"] = samples["angles"].cpu()
samples["lengths"] = samples["lengths"].cpu()
samples["num_atoms"] = samples["num_atoms"].cpu()
samples["frac_coords"] = samples["frac_coords"].cpu()
samples["all_frac_coords"] = samples["all_frac_coords"].cpu()

# Split atom types
split_atom_types = np.split(samples["atom_types"], np.cumsum(samples["num_atoms"])[:-1])

# Split fractional coordinates
split_frac_coords = np.split(samples["frac_coords"], np.cumsum(samples["num_atoms"])[:-1])


trajectories = [samples["all_frac_coords"][:, i*48:(i+1)*48] for i in range(50)]

individual_samples = []
for i in range(len(samples["num_atoms"])):
    individual_samples.append({"atom_types": split_atom_types[i], "frac_coords": split_frac_coords[i], "lengths": samples["lengths"][i], "angles": samples["angles"][i]})

print(trajectories[0].shape)
print(trajectories[0].min(), trajectories[0].max())
visualize_trajectory(trajectories[0][-100:,0,:], individual_samples[0]["lengths"])

## Parse reconstructions

In [None]:
experiment_name = ""

artifact_files = retrieve_artifacts_by_name(experiment_name, artifact_type='dataset', project='zeogen', entity='glafk')

for file in artifact_files:
    if "reconstructions" in file:
        with open(file, "rb") as f:
            reconstructions = pickle.load(f)

recon_path = os.path.join(cwd, "parsed_reconstructions/reconstructions_{experiment_name}")
save_reconstructions_as_cifs(reconstructions, recon_path, save_trajectory=True, downsample_trajectory=True, downsample_frame_rate=5)

## Parse reconstruction ground truth

In [None]:
experiment_name = ""

artifact_files = retrieve_artifacts_by_name(experiment_name, artifact_type='dataset', project='zeogen', entity='glafk')

for file in artifact_files:
    if "reconstructions" in file and "gt" in file:
        with open(file, "rb") as f:
            reconstructions = pickle.load(f)

recon_path = os.path.join(cwd, "parsed_reconstructions/reconstructions_{experiment_name}_gt")
save_reconstructions_as_cifs(reconstructions, recon_path, ground_truth=True)