In [15]:
from cryosparc.tools import CryoSPARC
import json
import numpy as np
import pandas as pd
from pathlib import Path
with open(Path('~/instance-info.json').expanduser(), 'r') as f:
    instance_info = json.load(f)

cs = CryoSPARC(**instance_info)
assert cs.test_connection()

project_number = "P310"
workspace_number = "W2"
job_number = "J58"

project = cs.find_project(project_number)
job = project.find_job(job_number)
results = job.load_output("split_0")


Connection succeeded to CryoSPARC command_core at http://cryoem0.sbi:40002
Connection succeeded to CryoSPARC command_vis at http://cryoem0.sbi:40003
Connection succeeded to CryoSPARC command_rtp at http://cryoem0.sbi:40005


In [16]:
# slow, sorry
full_df = pd.DataFrame(results.rows())

In [20]:
df = full_df[['alignments3D/class', 'alignments3D/pose', 'uid', 'sym_expand/src_uid']]
df

Unnamed: 0,alignments3D/class,alignments3D/pose,uid,sym_expand/src_uid
0,1,"[0.8240723, -2.0247228, 2.1005526]",5102977743274490089,5102977743274490089
1,1,"[1.1745524, -1.2027127, -1.7076085]",7861261775274868257,1634749157119916066
2,1,"[1.6627127, -0.70035493, -0.95698214]",12875478332620446849,12875478332620446849
3,1,"[1.0091319, -0.37099874, -1.9075645]",3944933812928148443,3944933812928148443
4,1,"[-1.6078465, 0.37382147, -0.08112675]",10778458160678512335,10778458160678512335
...,...,...,...,...
77485,2,"[-0.3884624, 0.48980042, 1.3884416]",2644814842965745625,2876748623775182921
77486,2,"[2.4748948, -0.42047793, -1.2445024]",16174953670100347404,16174953670100347404
77487,2,"[0.36773956, 2.1644812, 0.42024016]",10064521577564522981,16174953670100347404
77488,2,"[0.5504791, -1.5322258, -2.2334733]",17905466341131595219,17905466341131595219


In [68]:
import itertools
max_sym = max(full_df['sym_expand/idx']) + 1
# in this case with only C2 symmetry this is trivially a single combination, but with higher
# symmetries this goes as N choose 2
combs = list(itertools.combinations(range(max_sym), 2))

In [138]:
def check_close_poses(df_group, idx_a, idx_b, treat_classes_separately = False, **kwargs) -> bool:
    class_a = df_group['alignments3D/class'].iloc[idx_a]
    class_b = df_group['alignments3D/class'].iloc[idx_b]
    if class_a != class_b and treat_classes_separately:
        return False
    
    pose_a = df_group['alignments3D/pose'].iloc[idx_a]
    pose_b = df_group['alignments3D/pose'].iloc[idx_b]
    return np.allclose(pose_a, pose_b, **kwargs)

duplicate_uids = set()

def check_all_combs(df_group, **kwargs):
    for comb in combs:
        if check_close_poses(df_group, comb[0], comb[1], **kwargs):
            duplicate_uids.add(df_group['uid'].iloc[comb[0]])
            duplicate_uids.add(df_group['uid'].iloc[comb[1]])


for name, group in df.groupby('sym_expand/src_uid'):
    # atol controls the number of radians **in each of the three axes** the pose vectors
    # can be different and still counted as the same.
    # 
    # You almost certainly want it to be a much smaller number
    check_all_combs(group, treat_classes_separately = True, atol = np.pi / 2)

In [139]:
print(*duplicate_uids, sep = '\n')

10166874555787842557
2477621879404028084
