# Tests

## New API points

### Molecule

* `__init__(hierarchy_schemes=hierarchy_flavor)`

```python
def test_molecule_init_no_hierarchy(self):
    """Test a new hierarchy scheme is succesfully registered in the molecule."""
    molecule = Molecule()
    assert molecule.hierarchy_schemes == {}
    
def test_molecule_init_with_hierarchy_flavor(self):
    """Test a new hierarchy scheme is succesfully registered in the molecule."""
    molecule = Molecule(hierarchy_schemes=HIER_FLAVOR_DEFAULT)
    assert len(HIER_FLAVOR_DEFAULT) == len(molecule.hierarchy_schemes)
    for hier_scheme in HIER_FLAVOR_DEFAULT:
        assert molecule.hierarchy_schemes[hier_scheme.name] == hier_scheme
    
def test_molecule_init_with_hierarchy_schemes(self):
    """Test a new hierarchy scheme is succesfully registered in the molecule."""
    hier_schemes = [HierarchyScheme(name='residues', 
                                    uniqueness_criteria = ['chain', 'residue_num', 'residue_name']),
                    HierarchyScheme(name='chains',
                                    uniqueness_criteria = ['chain'])
                   ]
    molecule = Molecule(hierarchy_schemes=hier_schemes)
    assert len(molecule.hierarchy_schemes) == 2
    for hier_scheme in hier_schemes:
        assert molecule.hierarchy_schemes[hier_scheme.name] == hier_scheme

def test_molecule_init_with_incompatible_hierarchy_schemes(self):
    """"Test incompatible hierarchy schemes fail to be registered."""
    hier_schemes = [HierarchyScheme(name='residues', 
                                    uniqueness_criteria = ['chain', 'residue_num', 'residue_name']),
                    HierarchyScheme(name='residues',
                                    uniqueness_criteria = ['chain', 'residue_index', 'residue_type'])]
    with pytest.raises(HierarchySchemeAlreadyRegisteredForName, match='...') as context:
        molecule = Molecule(hierarchy_schemes=hier_schemes)
        
        
def test_molecule_init_with_hierarchy_schemes_compared_to_added_at_runtime(self):
    """Test molecule hierarchy schemes built from init compared to added at runtime."""
    molecule1 = Molecule(hierarchy_schemes=HIER_FLAVOR_DEFAULT)
    molecule2 = Molecule(hierarchy_schemes=HIER_FLAVOR_NONE)
    molecule2.register_hierarchy_schemes(HIER_FLAVOR_DEFAULT)
    assert molecule1.hierarchy_schemes == molecule2.hierarchy_schemes
```

* merge_molecules
* smirks_reaction(?)
* mutate
* perceive_residues

```python
@pytest.parametrize('mapped_smiles,atom_names,residue_names,residue_nums', loaded_data)
def test_molecule_

 # Think about safe "default" behavior with respect to chains 
 # We want to AVOID situations where everyting added toa  Topology is "chain A", and ALSO
 # AVOID situations where the chain already assigned by a user gets overwritten yb adding it to a Topology. 

def test_molecule_perceive_residues_iterator(self):
    """Test residue iterator perception."""
    expected_residues = {...}
    protein = Molecule.from_file(...)
    protein.perceive_residues()
    # Can we make sure the order is always the same? JW -- Let's not check order in this test, just test for dict equivalence
    # expected residues is a dict of the form {('A',1, 'ALA'):<HierElement with indices 0..10>, ...}
    assert len(expected_residues) == len(protein.residues)
    for residue_tuple, residue_hier_ele in expected_residues:
        assert protein.residues[residue_tuple] == residue_hier_ele
       
    #assert protein.residues == expected_residues
    [atom.name for atom in residues[0].atoms]
    [protein.atoms[at_idx] for at_idx in residues[0].atom_indices]
    ele = HierarchyElement(atom_indices, hierarchy_scheme=None)

```

* perceive_hierarchy
* @classmethod? register_chemical_residue_substructures({residue SMARTS: (residue name, atom names)})
* ??register_typed_residue_substructures({residue networkx graph: ([formal charges], [bond orders])})
   * This could be redundant with the method above
* deregister_residue_substructures
* register_hierarchy_schemes

```python

 # Let's say that a HIER_FLAVOR is just a list of HierarchySchemes    

def test_molecule_register_deregister_hierarchy(self):
    """Test a new hierarchy scheme is succesfully registered in the molecule."""
    molecule = Molecule()
    # register new hierarchy scheme
    molecule.register_hierarchy_schemes(HIER_FLAVOR_DEFAULT)
    with pytest.raises(HierarchySchemeAlreadyRegisteredForName, match='...') as context:
        molecule.register_hierarchy_schemes(HIER_FLAVOR_DEFAULT)
    assert len(HIER_FLAVOR_DEFAULT) == len(topology.hierarchy_schemes)
    
    for hier_scheme in HIER_FLAVOR_DEFAULT:
        assert molecule.hierarchy_schemes[hier_scheme.name] == hier_scheme
    
    hier_scheme_name_to_deregister = HIER_FLAVOR_DEFAULT[0].name
    molecule.deregister_hierarchy_schemes([hier_scheme_name_to_deregister])
    assert len(HIER_FLAVOR_DEFAULT) - 1 == len(molecule.hierarchy_schemes)
    assert hier_scheme_name_to_deregister not in molecule.hierarchy_schemes
    with pytest.raises(HierarchySchemeNotFound, match='...') as context:
        molecule.deregister_hierarchy_schemes([hier_scheme_name_to_deregister])
```

* deregister_hierarchy_schemes
* hierarchy_schemes

```python
def test_molecule_default_none_hierarchy_schemes(self):
    """Test default and empty hierarchy flavors."""
    offmol = Molecule()
    assert HIER_FLAVOR_DEFAULT == offmol.hierarchy_schemes
    offmol = Molecule(hierarchy_flavor=HIER_FLAVOR_NONE)
    assert {} == offmol.hierarchy_schemes
```

* from_pdb_file
    * _from_pdb_with_atom_and_residue_names(typ_mol)
    * _from_pdb_with_conect(typ_mol) # uses _registered_residue_substructures
* residues (and other iterators) 
* to_openeye (update)
* to_rdkit (update)
* to_dict (update/obliterate with pydantic serialization)
* to_file (update)
* add_conformers (make better API -- let users specify a correct shaped array)


#### Troubleshooting/caveats

In [1]:
top = Topology.from_molecules([water, protein, ethanol])

top.chains[1].residues[5]

res = top.residues[0]
res.atoms[0].index(reference='topology')
> 3 # The whole-topology index of the atom
 # OR
res.atoms[0].topology_atom_index
> 3 # The whole-topology index of the atom
res.atoms[0].reference_molecule_atom_index == res.atoms[0].index(reference='reference_molecule')
> 0 # The index of the atom in the reference molecule
res.atoms[0].topology_molecule_atom_index  == res.atoms[0].index(reference='topology_molecule')
> 0 # The "local" index of the atom in the topology molecule

ele = top.topology_molecules[5].residues[0]
ele.atoms[0].topology_atom_index
> 200
ele.atoms[0].topology_molecule_atom_index
> 0
ele.atoms[0].index
> 0

ele = top.residues[500]
ele.atoms[0].topology_atom_index
> 200
ele.atoms[0].topology_molecule_atom_index
> 0
ele.atoms[0].index
> 200


type(res.atoms[0])
> TopologyAtom


res = protein.residues[0]
res.atoms[0].index
> 0
type(res.atoms[0])
> Atom

 
class HierarchyElementDefinedWithAtomIndices:
    def __init__(atom_indices, hierarchy_scheme):
        self._topology_molecule_atom_indices = atom_indices
        self._hierarchy_scheme = hierarchy_scheme
        
    def topology_atom_indices(self):
        for top_mol_at_idx in self._topology_molecule_atom_indices:
            yield self.hierarchy_scheme.molecule.local_idx_to_top_idx(top_mol_at_idx)
        
    def identifier(self):
        if self._cached_identifier is None:
            for identifier, element in self.hierarchy_scheme.elements.items():
                if element==self:
                    self._cached_identifier = identifier
                    break
        return self._cached_identifier
    
    def atoms(self) --> Atom or TopologyAtom:
        mol = self.hierarchy_scheme.molecule
        for at_idx in self.atom_indices:
            if isinstance(mol, Molecule):
                yield mol.atoms[at_idx]
            elif isinstance(mol, TopologyMolecule):
                yield mol.topology_atoms[at_idx]

class HierarchyElementDefinedWithAtoms:
    def __init__(atoms, hierarchy_scheme):
        self._atoms = atoms
        self._hierarchy_scheme = hierarchy_scheme
    
    def atom_indices(self):
        for atom in self.atoms:
            if isinstance(atom, Atom)
                yield atom.molecule_atom_index
            elif isinstance(atom, TopologyAtom):
                yield atom.topology_atom_index
                
[atom.name for atom in hierarchy_elemen.atoms]


atom = topology.chains[1].residues[5].atoms[7]
atom.residue
> <HierElement with name 'residues' with id ('A', 10, 'ALA')>
atom.residue.chain
> <HierElement with name 'chains' with id ('A',)>
type(atom)
> AtomView
atom.reference_atom
> Atom in some Molecule
atom
> <AtomView of <AtomView of <AtomView of <Atom in some Moleucle>>>>
atom.index(reference='residue')
> 7

atom2 = topology.atoms[105]
atom2.chain
> AttributeError???
type(atom2)
> TopologyAtom

SyntaxError: invalid syntax (<ipython-input-1-dff5a00bb734>, line 7)

### Atom

* metadata (dataclass preferred, but dict would work too)
* json

```python
def test_atom_metadata_json_roundtrip_preserves_types(self):
    """Test atom metadata preserves types when serializing to json."""
    atom = Atom(atomic_number=6, formal_charge=0, is_aromatic=False)
    atom.metadata['some_float'] = 3.14
    atom.metadata['some_other_float'] = 1.
    atom.metadata['some_list'] = [1, 1, 2, 3, 5]
    atom.metadata['some_bool'] = False
    atom.metadata['some_int'] = 4
    atom.metadata['some_str'] = '3.14'
    atom.metadata['some_other_str'] = '[1, 1, 2, 3, 5]'
    atom.metadata['some_tuple'] = ('a', 1)
    atom.metadata['some_dict'] = {'key1': 'value1', 'key2': None}
    atom.metadata['some_none'] = None
    atom.metadata['some_null_str'] = 'null'
    json_object = atom.json()
    new_atom = Atom.parse_raw(json_object)
    assert new_atom.metadata == atom.metadata  # May be more complex if we need to mind order
```


### TypedMolecule

Much like the "normal" stuff, just not being able to return formal charge or bond orders.

For now try to mirror the current API.

* `__init__(**kwargs)`

* add_atom(mass, element, **kwargs)
    * Why not passing typed atom directly? --> add_atom(typed_atom, **kwargs)

```python
def test_typed_molecule_add_atom(self):
    """Test added atom gets included in the molecule."""
    typed_molecule =  TypedMolecule()
    typed_atom = TypedAtom(mass=carbon.mass, element=carbon)
    typed_molecule.add_atom(typed_atom)
    assert typed_molecule.n_atoms == 1
    # Make sure it gets appended to the end of the list/iterator
    assert typed_molecule.atoms[-1] == typed_atom
    
def test_typed_molecule_add_atom_with_molecule(self):
    # This is not possible with current API
    """Test adding an atom that already belong to other molecule"""
    other_typed_molecule = TypeMolecule()
    typed_atom_with_molecule = TypedAtom(mass=carbon.mass, 
                                         element=carbon.element,
                                         molecule=other_typed_molecule
                                        )
    typed_molecule = TypedMolecule()
    with pytest.raises(AtomAlreadyInMolecule, match="..."):
        typed_molecule.add_atom(atom=typed_atom_with_molecule)
```
* add_bond(atom1, atom2, **kwargs)
    * Why not passing typed bond directly? --> add_bond(typed_bond, **kwargs)

```python
def test_typed_molecule_add_bond(self):
    """Test new added bond gets included in the molecule"""
    typed_molecule = TypedMolecule.from_file(...)
    current_n_bonds = typed_molecule.n_bonds
    # Assume it has at least two atoms
    atom1 = typed_molecule.atoms[...]
    atom2 = typed_molecule.atoms[...]
    typed_molecule.add_bond(atom1=atom1, atom2=atom2)  # no bond order
    assert typed_molecule.n_bonds = current_n_bonds + 1
    
def test_typed_molecule_add_already_existing_bond(self):
    # TODO: We should allow this to happen -- typedmols are messy
    """Test that an existing bond cannot be added twice."""
    typed_molecule = TypedMolecule.from_file(...)
    bond = typed_molecule.bonds[-1]  # take last bond
    with pytest.raises(BondAlreadyInMolecule, match='...'):
        typed_molecule.add_bond(atom1=bond.atom1, atom2=bond.atom2)

def test_typed_molecule_add_atoms_and_bonds(self):
    """Test typed molecule creation from the addition of atoms and bonds."""
    # This is a 'minimal' test -- TODO: everything-bagel test: when it contains more than just mass, element and bonds.
    typed_molecule_template = TypedMolecule.from_file(...)  # or fixture object?
    typed_molecule = TypedMolecule()
    # adding atoms 
    for atom in typed_molecule_template.atoms:
        typed_molecule.add_atom(mass=atom.mass, element=atom.element)
    # adding bonds
    for bond in typed_molecule_template.bonds:
        typed_molecule(atom1=bond.atom1, atom2=bond.atom2)
    assert typed_molecule == typed_molecule_template
    

```
* add_virtual_site(atoms, n_particles, **kwargs)
* atoms
* bonds
* virtual_sites
* particles
* n_atoms

```python
def test_typed_molecule_n_atoms(self):
    """Test number of atoms property"""
    typed_molecule = TypedMolecule.from_file(...)
    assert typed_molecule.n_atoms == len(typed_molecule.atoms)
```
* n_bonds

```python
def test_typed_molecule_n_bonds(self):
    """Test number of bonds property"""
    typed_molecule = TypedMolecule.from_file(...)
    assert typed_molecule.n_bonds == len(typed_molecule.bonds)
```
* n_virtual_sites
* n_particles
* angles/propers/impropers(?optional?)
* add_conformer
* conformers
    * Should mimic current Molecule API
* to_file (initially just support PDB, possibly as typ_mol.to_mdtraj().to_pdb() )
* to_networkx()

```python
def test_typed_molecule_to_networkx(self):
    """Test networkx graph generation from typed molecule."""
    typed_molecule = TypedMolecule.from_file(...)
    graph = typed_molecule.to_networkx()
    # Check number of nodes
    assert len(graph.nodes) == typed_molecule.n_atoms
    # Check number of edges
    assert len(graph.edges) == typed_molecule.n_bonds
```
* from_file (initially just PDB)

```python
def test_typed_molecule_from_pdb_file(self):
    """Test molecule is correctly read from PDB file"""
    typed_molecule = TypedMolecule.from_file('1aki_with_h.pdb')  # seems like a good candidate for PDB
    # Check number of atoms
    assert typed_molecule.n_atoms == ...
    # Check number of bonds
    assert typed_molecule.n_bonds == ...
```

### TypedBond

* `__init__(atom1, atom2, molecule, **kwargs)`
    
```python
def test_typed_bond_constructor(self):
    """Test bond is correctly constructed and added to the molecule."""
    typed_atom1 = TypedAtom(...)
    typed_atom2 = TypedAtom(...)
    typed_bond = TypedBond(atom1=typed_atom1, atom2=typed_atom2, molecule=typed_molecule)
    assert typed_bond == typed_molecule.bonds[-1]
```
* atom1
* atom2

```python
def test_typed_bond_constructor(self):
    """Test bond is correctly constructed and added to the molecule."""
    typed_atom1 = TypedAtom(...)
    typed_atom2 = TypedAtom(...)
    typed_bond = TypedBond(atom1=typed_atom1, atom2=typed_atom2, molecule=typed_molecule)
    assert typed_bond.atom1 == typed_atom1
    assert typed_bond.atom2 == typed_atom2
```

* atom1_index
* atom2_index



### TypedAtom
* `__init__(mass, element, molecule=None, **kwargs)`
    * Chat with MT about the requirements of mass and 'element'.

```python
def test_typed_atom_constructor_without_molecule(self):
    """Test TypedAtom creation when a molecule isn't specified."""
    typed_atom = TypedAtom(mass=carbon.mass, element=carbon)
    # Would the element always have the mass info?
    assert typed_atom.mass == carbon.mass
    assert typed_atom.element = carbon
    # What about the kwargs?
    assert typed_atom.molecule is None
    
def test_typed_atom_constructor_with_molecule(self):
    """
    Test TypedAtom creation when a molecule is specified.
    
    Tests specified molecule gets updated correctly.
    """
    typed_molecule = TypedMolecule(...)  # maybe from file?
    typed_atom = TypedAtom(mass=carbon.mass,
                           element=carbon,
                           molecule=typed_molecule
                          )
    # Assuming it gets added at the end of the atoms list/iterator
    assert typed_atom == typed_molecule.atoms[-1]
    # OR (if iterator)
    # better way to get last item in atoms?
    for atom in typed_molecule.atoms:
        pass
    assert atom == typed_atom
```
* molecule_atom_index

```python
def test_typed_atom_empty_molecule_atom_index(self):
    """Test typed molecule atom index is empty when there is no specified molecule."""
    typed_atom = TypedAtom(mass=carbon.mass, element=carbon)
    # Currently a ValueError is raised when atom doesn't belong to a molecule
    with pytest.raises(ValueError,
                       match="This Atom does not belong to a Molecule object"
                      ):
        typed_atom.molecule_atom_index
    
    
def test_typed_atom_with_molecule_atom_index(self):
    """Test typed atom has the expected molecule atom index."""
    typed_molecule = TypedMolecule(...)  # or maybe from file?
    typed_atom = TypedAtom(mass=carbon.mass, 
                           element,
                           molecule=typed_molecule
                          )
    # TODO: IP--test mol index with .add_molecule() in TypedMolecule tests
    atom_index = typed_molecule.n_atoms - 1
    assert typed_atom.molecule_atom_index == atom_index
    # OR (if list)
    atom_index = typed_molecule.atoms.index(typed_atom)
    assert typed_atom.molecule_atom_index == atom_index
```

* molecule_particle_index

```python
def test_typed_atom_empty_molecule_particle_index(self):
    """Test typed atom particle index is empty when there is no specified molecule."""
    typed_atom = TypedAtom(mass=carbon.mass, element=carbon)
    # Currently a ValueError is raised when atom doesn't belong to a molecule
    with pytest.raises(ValueError,
                       match="This Atom does not belong to a Molecule object"
                      ):
        typed_atom.molecule_atom_index
        
def test_typed_atom_with_molecule_atom_index(self):
    """Test typed atom has the expected molecule particle index."""
    typed_molecule = TypedMolecule(...)  # or maybe from file?
    typed_atom = TypedAtom(mass=carbon.mass, 
                           element,
                           molecule=typed_molecule
                          )
    # TODO: IP--test mol index with .add_molecule() in TypedMolecule tests
    # Expect the index to be given by n_atoms
    particle_index = typed_molecule.n_atoms - 1
    assert typed_atom.molecule_particle_index == atom_index
    # OR (if list)
    atom_index = typed_molecule.particles.index(typed_atom)
    assert typed_atom.molecule_particle_index == atom_index
```

* bonds
* bonded_atoms
* ?? partial_charge
* ?? from_atom (from cheminformatics atom)
* metadata

### TypedVirtualSite
* `__init__(atoms, n_particles, **kwargs)`
* particles

```python
class TypedVirtualSite(BaseModel):
    def __init__(self, atoms, n_particles, **kwargs):
        self.atoms = atoms
        self.n_particles = n_particles
        for kwarg in kwargs:
            self.some_dict[kwarg] = kwargs[kwarg]
            
    def particles(self):
        for i in range(self.n_particles):
            yield TypedVirtualParticle(self, i)
            
    def molecule_virtual_site_index(self):
        return self.atoms[0].molecule.virtual_sites.index(self)
    
```

### TypedVirtualParticle

* `__init__(virtual_site, orientation, **kwargs)`
* molecule_particle_index


### Topology

* `__init__()`

* coordinates
    * Maybe take the first conformer as coordinates.
    * Return NaNs or None for missing coordinate data.
* replace_molecule
* register_hierarchy_scheme
    * Would run recursively in the molecules.

```python

 # These have to be changed in terms of the new architectural changes

@pytest.parametrize('when_to_perceive_hierarchy_on_mols', (1, 2, 3))
def test_topology_hierarchy_transfer_from_mols(self, use_from_molecules):
    """Test hierarchy schemes are transferred from the topology molecules"""
    # Molecules with a specific hierarchy flavor
    molecule1 = some_fixture_molecule()
    molecule2 = some_other_fixture_molecule()
    if when_to_perceive_hierarchy_on_mols == 1:
        molecule1.perceive_hierarchy()
        molecule2.perceive_hierarchy()

    topology = Topology()
    # Assuming molecules have atoms and hierarchy information/schemes.
    topology.add_molecules([molecule1, molecule2]) 
    # Let's ensure that topology.form_molecules just does a very simple "add_molecules" loop so that we don't have to test both
    
    assert topology.molecules[0] is not molecule1
    
    # We shoudl also test that this updates the topology iterator
    if when_to_perceive_hierarchy_on_mols == 2:
        topology.molecules[0].perceive_hierarchy()
        topology.molecules[1].perceive_hierarchy()
    
    if when_to_perceive_hierarchy_on_mols == 3:
        topology.perceive_hierarchy()
        
        
    mol_iter_1_eles = [*molecule1.iterator_1_name] + [*molecule2.iterator_1_name]
    for top_ele, mol_ele in zip(topology.iterator_1_name, mol_iter_1_eles):
        assert isinstance(mol_ele, MoleculeHierarchyElement)
        assert isinstance(top_ele, TopologyHierarchyElement)
        assert top_ele.molecule_element == mol_ele # This is OK
        assert top_ele.molecule_element is mol_ele # This is slightly preferable, if the implementation will allow
        assert top_ele.molecule_atom_indices == mol_ele.molecule_atom_indices

        if top_ele.molecule == molecule1:
            assert top_ele.topology_atom_indices == mol_ele.molecule_atom_indices
        else:
            assert top_ele.topology_atom_indices == mol_ele.molecule_atom_indices + molecule1.n_atoms
        
 # The same thing as above, but change an element on one molecule and ensure that the topology iterator reflects that change
 # Same thing as above, but delete all the elements on one molecule and ensure the topology iterator reflects that change
 # Same thing as above, but fully delete all the elements under iterator_1_name in both mols, and ensure that the topology iterator either 
 #       returns an empty list/doesn't yield anyhting; OR
 #       raises an error of some sort
 # 
        
 # A test like the below, that reflects that the residue iterator DOES NOT update with changes to atom metadata
 # Then show that the iterator DOES update after calling perceive_hierarchy
def test_hierarchy_element_metadata_setter_and_iterator_update(self):
    new_res_nums = []
    for residue in topology.residues:
        new_residue_number = residue.residue_num + 3
        new_res_nums.append(new_residue_number)
        # Don't support this
        residue.residue_num = new_residue_number
        > AttributeError
        residue.id = ...
        > DontDoThatException

        for atom in residue.atoms:
            atom.metadata['residue_num'] = residue.residue_num + 3
        assert residue_residue_num != new_residue_number

    topology.perceive_hierarchy()
    for residue, new_residue_num in zip(topology.residues, new_residue_nums):
        assert residue.residue_num == new_residue_num


    
@pytest.parametrize('use_from_molecules', (False, True))
def test_topology_hierarchy_transfer_from_top_mols_partial_overlap(self, use_from_molecules):
    hier_scheme_1 = HierarchyScheme(name='residues', 
                                       uniqueness_criteria = ['chain', 'residue_num', 'residue_name'])
    hier_scheme_2 = HierarchyScheme(name='chains',
                                       uniqueness_criteria = ['chain'])
    molecule1 = Molecule(hierarchy_schemes=[hier_schemes_1])
    molecule2 = Molecule(hierarchy_schemes=[hier_schemes_1, hier_schemes_2])
    if use_from_molecules:
        topology = Topology.from_molecules([molecule1, molecule2])
    else:
        topology = Topology()
        topology.add_molecule(molecule1)
        topology.add_molecule(molecule2)
    assert len(topology.hierarchy_schemes) == 2
    assert topology.hierarchy_schemes[hier_schemes_mol2.name] == hier_schemes_2

@pytest.parametrize('use_from_molecules', (False, True))
def test_topology_hierarchy_transfer_from_top_mols_no_overlap(self, use_from_molecules):
    hier_schemes_mol1 = HierarchyScheme(name='residues', 
                                       uniqueness_criteria = ['chain', 'residue_num', 'residue_name'])
    hier_schemes_mol2 = HierarchyScheme(name='chains',
                                       uniqueness_criteria = ['chain'])
    molecule1 = Molecule(hierarchy_schemes=[hier_schemes_mol1])
    molecule2 = Molecule(hierarchy_schemes=[hier_schemes_mol2])
    
    if use_from_molecules:
        topology = Topology.from_molecules([molecule1, molecule2])
    else:
        topology = Topology()
        topology.add_molecule(molecule1)
        topology.add_molecule(molecule2)
    assert len(topology.hierarchy_schemes) == 2
    assert topology.hierarchy_schemes[hier_schemes_mol1.name] == hier_schemes_mol1
    assert topology.hierarchy_schemes[hier_schemes_mol2.name] == hier_schemes_mol2

@pytest.parametrize('use_from_molecules', (False, True))
def test_topology_hierarchy_transfer_from_top_mols_incompatible_partial_overlap(self, use_from_molecules):
    hier_schemes_mol1 = [HierarchyScheme(name='residues', 
                                       uniqueness_criteria = ['chain', 'residue_num', 'residue_name']),
                         HierarchyScheme(name='chains',
                                       uniqueness_criteria = ['chain'])
                        ]
    hier_schemes_mol2 = [HierarchyScheme(name='residues',
                                       uniqueness_criteria = ['chain', 'residue_index', 'residue_type']),
                         HierarchyScheme(name='chains',
                                       uniqueness_criteria = ['chain'])
                        ]
    molecule1 = Molecule(hierarchy_schemes=hier_schemes_mol1)
    molecule2 = Molecule(hierarchy_schemes=hier_schemes_mol2)
    
    with pytest.raises(IncompatibleHierarchySchemeError, match=f'Cannot combine hierarchy schemes with name "{hier_scheme_mol1.name}"'):
        if use_from_molecules:
            topology = Topology.from_molecules([molecule1, molecule2])
        else:
            topology = Topology()
            topology.add_molecule(molecule1)
            topology.add_molecule(molecule2)

@pytest.parametrize('use_from_molecules', (False, True))
def test_topology_hierarchy_transfer_from_top_mols_incompatible_overlap_uniqueness_order(self, use_from_molecules):
    hier_schemes_mol1 = [HierarchyScheme(name='residues', 
                                       uniqueness_criteria = ['chain', 'residue_num', 'residue_name'])
                        ]
    hier_schemes_mol2 = [HierarchyScheme(name='residues',
                                       uniqueness_criteria = ['residue_num', 'residue_name', 'chain'])
                        ]
    molecule1 = Molecule(hierarchy_schemes=hier_schemes_mol1)
    molecule2 = Molecule(hierarchy_schemes=hier_schemes_mol2)
    
    with pytest.raises(IncompatibleHierarchySchemeError, match=f'Cannot combine hierarchy schemes with name "{hier_scheme_mol1.name}"'):
        if use_from_molecules:
            topology = Topology.from_molecules([molecule1, molecule2])
        else:
            topology = Topology()
            topology.add_molecule(molecule1)
            topology.add_molecule(molecule2)
```

* deregister_hierarchy_scheme
    * Already tested in register_hierarchy_scheme
* mdtraj_select
```python
def test_topology_mdtraj_selection(self):
    """Test selection gives the same atoms compared to mdtraj topology selection."""
    molecule1 = Molecule.from_pdb_file(...)
    molecule2 = Molecule.from_pdb_file(...)
    topology = Topology.from_molecules([molecule1, molecule2])
    selection_str = 'name CA and resid 7 to 10'
    mdtraj_top = topology.to_mdtraj()
    mdtraj_output = mdtraj_top.select(selection_str)
    # IP: Do we expect orders to be the same?
    assert topology.mdtraj_select(selection_str) == mdtraj_output
```
* perceive_residues
```python
def test_topology_perceive_residues_iterator(self):
    """Test residue iterator perception."""
    protein = Molecule.from_file(...)
    topology = Topology()
    topology.add_molecule(protein)
    # Perceive residues at both molecule and top levels
    protein.perceive_residues()
    topology.perceive_residues()
    
    assert topology.residues == protein.residues
```
* perceive_hierarchy
    
```python
def test_perceive_hierarchy(self):
    mol = Molecule.from_file('ethanol.sdf')
    residue_1_atoms = [0,1,4,5,6,7,8]
    residue_2_atoms = [2,9]
    for atom in mol:
        if atom.molecule_atom_index in residue_1_atoms:
            atom.metadata['chain'] = 'A'
            atom.metadata['residue_name'] = 'X'
            atom.metadata['residue_num'] = 1
        else:
            atom.metadata['chain'] = 'A'
            atom.metadata['residue_name'] = 'Y'
            atom.metadata['residue_num'] = 2

    mol.percieve_hierarchy()
    assert len([*mol.residues]) == 2
    ...
    
def test_perceive_hierarchy_missing_info(self):
    mol = Molecule.from_file('ethanol.sdf')
    residue_1_atoms = [0,1,4,5,6,7,8]
    residue_2_atoms = [2,9]
    for atom in mol:
        if atom.molecule_atom_index in residue_1_atoms:
            atom.metadata['residue_name'] = 'X'
            atom.metadata['residue_num'] = 1
        else:
            atom.metadata['residue_name'] = 'Y'
            atom.metadata['residue_num'] = 2

    mol.perceive_hierarchy()
    assert len([*mol.residues]) == 0
    expected_ids = []
    assert expected_ids == [ele.id for ele in mol.residues]
    ...

def test_perceive_hierarchy_custom_uniqueness(self):
    mol = Molecule.from_file('ethanol.sdf')
    residue_1_atoms = [0,1,4,5,6,7,8]
    residue_2_atoms = [2,9]
    for atom in mol:
        if atom.molecule_atom_index in residue_1_atoms:
            atom.metadata['residue_name'] = 'X'
            atom.metadata['residue_num'] = 1
        else:
            atom.metadata['residue_name'] = 'Y'
            atom.metadata['residue_num'] = 2 # Will the type info (integer) survive a JSON roundtrip? Will depend on what pydantic supports.

    mol.hierarchy_schemes['residues'].uniqueness_criteria = ('residue_num', 'residue_name')
    mol.perceive_hierarchy()
    assert len([*mol.residues]) == 2
    expected_ids = [(1, 'X'), (2, 'Y')]
    assert expected_ids == [ele.id for ele in mol.residues]
    ...    
```

Here's a bad design that's currently in the toolkit

```python
molecule.properties
offmol.properties['color'] = 'green'
offmol.properties['atom_map'] = dict'{1:2, 2:3, 3:4}'
pydantic: offmol.json and offmol.dict 
current functional "exchange point" with all serialization formats (json, bson, etc): offmol.to_dict
```

* residues (and other iterators) 
* add_molecule (update to include copying in metadata, TypedMolecules, appending hierarchies)

```python
def test_topology_add_single_molecule_copy_metadata(self):
    """Test metadata is copied from atoms to topology atoms using a single molecule."""
    molecule = Molecule.from_file(...)
    # IP -- Would this fill the metadata?
    molecule.perceive_hierarchy(...)
    topology = Topology()
    toplogy.add_molecule(molecule)
    # loop through all atoms and check metadata
    for top_atom, mol_atom in zip(topology.topology_atoms, molecule1.atoms):
        assert top_atom.metadata == mol_atom.metadata

def test_topology_add_typed_molecule(self):
    """Test TypedMolecule is correctly added to the topology molecules."""
    typed_molecule = TypedMolecule.from_file(...)
    topology= Topology()
    topology.add_molecule(typed_molecule)
    assert len(topology.topology_molecules) == 1
    assert isinstance(topology.reference_molecules[0], TypedMolecule)

topology.topology_molecules[0].residue[('A', 10, 'ALA')] --> HierElement or KeyError
topology.residues[('A',10,'ALA')] --> list/iterator of HierElement??

class Topology:
    def residues(self):
        for top_mol in self.topology_molecules:
            if hasattr('residues', top_mol):
                for residue in top_mol.residues:
                    yield residue
    
    # Looks like IP's "get_residues"
    def residue(self, item): # item is tuple, or integer index
        # result = []
        for top_mol in self.topology_molecules:
            if hasattr('residues', top_mol):
                try:
                    yield top_mol.residue(item)
                    # result.append(top_mol.residue(item)
                except KeyError:
                    pass
        # return result

def test_topology_add_molecule_hier_element_collision(self):
    """Test that overlapping topology residues get extended when adding molecules."""
    # This could be useful for testing you get duplicated elements from the same query
    protein1 = Molecule.from_pdb_file(...)
    n_residues_protein1 =  len(protein1.residues)
    topology = Topology()
    topology.add_molecule(protein1)
    topology.add_molecule(protein1)
    assert len(topology.residues) == 2*len(protein1.residues)
    ### 2021-04-29 --Let's leave the expected behavior undefined for now -- Will depend
    # Check first part is NOT still protein1.residues, since each atom should be present twice
    ala_10s_found = 0
    for residue in topology.residues:
        if residue.identifier == ('A', 10, 'ALA'):
            ala_10s_found += 1
    assert ala_10s_found == 2
    assert len(topology.residues[('A', 10, 'ALA')]) == 2*len(protein1.residues[('A', 10, 'ALA')])
    
    
    #    assert topology.residues[residue_idx].topology_atom_indices == protein1.residues[residue_idx].atom_indices
    
    
def test_topology_add_molecule_extend_residues(self):
    """Test topology residues get extended when adding molecules."""
    protein1 = Molecule.from_pdb_file(...)
    protein2 = Molecule.from_pdb_file(...)
    n_residues_protein1 =  len(protein1.residues)
    n_residues_protein2 =  len(protein2.residues)
    total_residues = n_residues_protein1 + n_residues_protein2
    topology = Topology()
    topology.add_molecule(protein1)
    for residue_idx in range(n_residues_protein1):
        assert topology.residues[residue_idx].topology_atom_indices == protein1.residues[residue_idx].atom_indices
    topology.add_molecule(protein2)
    # Check the rest corresponds to protein2.residues
    # TODO: Check indices , make them work!
    for residue_idx in range(n_residues_protein1, total_residues):
        assert topology.residues[residue_idx].topology_atom_indices == protein1.residues[residue_idx].atom_indices
    
def test_topology_add_molecule_extend_iterators(self):
    """Test that all common compatible iterators from the molecules are included in the topology"""
    protein1 = Molecule.from_pdb_file(...)
    protein2 = Molecule.from_pdb_file(...)
    # Assume the molecules already have some exposed iterators and hierarchy schemes
    all_hierarchy_schemes = copy.deepcopy(protein.hierarchy_schemes)
    all_hierarchy_schemes.extend(protein2.hierarchy_schemes)
    all_hierarchy_names = []
    ### STUCK    
```
* to_openmm (update)
* to_mdtraj (update)




### TopologyAtom

* metadata -- API point. Data lives in the topology molecule.

```python
 # Parametrize using atom with and without metadata?
def test_topology_atom_metadata_from_molecule(self):
    """Test atom metadata points to its topology molecule metadata."""
    # Assuming there is some topology atom already existing.
    topology_molecule = topology_atom.topology_molecule
    # Topology molecule should know about the atom indices
    # Topology atoms should know about there index
    top_atom_index = topology_atom.topology_atom_index
    assert topology_atom.metadata == topology_molecule.atom_metadata(top_atom_index)
    # OR as iterable
    assert topology_atom.metadata == topology_molecule.atom_metadata(top_atom_index)
```

### TopologyMolecule

* `__init__()` 
    * update with atom_metadata and make sure ref_mol does not have it
    
```python
def test_topology_molecule_fill_atom_metadata(self):
    """Test topology molecule metadata gets atom metadata from molecule."""
    molecule = Molecule.frome_file(...)
    # This fills metadata
    molecule.perceive_residues()
    topology = Topology()
    topology_molecule = TopologyMolecule(reference_molecule=molecule,
                                         topology=topology
                                        )
    assert topology_molecule.metadata == some_molecule.metadata
    
def test_topology_molecule_reference_molecules_no_metadata(self):
    """Test reference molecules have empty metadata"""
    molecule = Molecule.from_file(...)
    molecule.perceive_residues()
    topology = Topology()
    topology_molecule = TopologyMolecule(reference_molecule=molecule,
                                         topology=topology
                                        )
    reference_molecule = topology.topology_molecules[0].reference_molecule
    assert reference_molecule.metadata is None
    # OR
    assert reference_molecule.metadata == {}
```
* Take first conformer (if present) as coordinates

```python
def test_topology_molecule_take_first_conformer(self):
    """Test topology molecule gets first conformer from molecule."""
    molecule = Molecule.from_pdb_file(...)  # Should it get conformers by default?
    topology = Topology()
    # Should be just add_molecule?
    topology_molecule = TopologyMolecule(reference_molecule=molecule,
                                         topology=topology
                                        )
    # Currently TopologyMolecule doesn't have conformers
    assert topology_molecule.n_conformers == 1
    assert topology_molecule.conformers[0] == molecule.conformers[0] 
```
* perceive_residues
    * Should have a model/known molecule to compare with and get the expected results (positive control).
    * Should have a known molecule that fails (nonsensic residues?) (negative control).
* perceive_hierarchy
* residues (and other iterators) 
* ref_molecule (Test it is a copy of the reference Molecule)

```python
def test_topology_molecule_reference_molecule_is_new_object(self):
    """Test topology reference molecules are new objects and not copies of
    the referenced molecules."""
    some_molecule = Molecule.from_file(...)
    some_topology = Topology()
    some_topology.add_molecule(some_molecule)
    reference_molecule = some_topology.topology_molecules[0].reference_molecule
    assert reference_molecule is not some_molecule
```

* atom_metadata
* coordinates -- let users specify a single correctly shaped array.

```python
def test_topology_molecule_coordinates_wrong_shape(self):
    """Test topology molecules coordinates do not accept wrong shapes."""
    # Create a wrong sized matrix and try using it as coordinates
    molecule = Molecule.from_file(...)
    topology = Topology()
    topology.add_molecule(molecule)
    # Wrong number of rows/atoms
    with pytest.raises(IncorrectCoordinateArrayShape, match='...'):
        coordinates_array = np.random.rand(molecule.n_atoms+1, 3) * unit.angstroms
        topology.topology_molecules[0].coordinates = coordinates_array
    # Wrong number of dimensions
    with pytest.raises(IncorrectCoordinateArrayShape, match='...'):
        coordinates_array = np.random.rand(molecule.n_atoms, 2) * unit.angstroms
        topology.topology_molecules[0].coordinates = coordinates_array
        
def test_topology_molecule_coordinates_right_shape(self):
    """Test coordinates array has the right shape."""
    molecule = Molecule.from_pdb_file(...)
    topology = Topology()
    topology.add_molecule(molecule)
    # Assuming it automatically transfers the coordinates from the molecule
    n_atoms = molecule.n_atoms
    assert np.shape(topology.topology_molecules[0].coordinates) == (n_atoms, 3)
```



### HierarchyFlavor
* Three types of flavors: None, DEFAULT and No-chains.
* List of HierarchySchemes


### HierarchyScheme
* `__init__(uniqueness_key, iterator_name)`
* `__eq__ (for whether two schemes can safely be appended)`
    * JW -- Don't think we need this.
    * What we really mean when two schemes are equal.
* is_compatible_with
* parent
* elements (return list of HierarchyElement's)
* Test uniqueness_criteria order (different order -> different schemes)
* name -- check for safety (registry behavior)
* `__getitem__` (accept: tuple, integer or slice/list)

```python

```

### MoleculeHierarchyElement

* `__init__()`
* `__eq__`
* atoms
* atom_indices
* hierarchy_scheme
* index/identifier
* Nice to have, but not necessary: EITHER
    * (best case) magical indirection that allows `protein.chains[0].residues[10]`
    * magical slicing that allows `protein.chains[0].iterators.residues[10]`
    * magical slicing that allows `protein.chains[0].iterators['residues'][10]`
    * magical slicing that allows `protein.iterators.chains[0].iterators['residues'][10]`

* iterators (returns self.hierarchy_scheme.topology.hierarchy_schemes)

```python
protein.iterators['chains'][1].iterators['residues'][10]

for chain in protein.chains:
    for residue in chain.residues:
        ...

class TopologyHierarchyElement:
    def iterators(self):
        new_iterators = {}
        sel = set(self.topology_atom_indices)
        for hierarchy_name, hierarchy_scheme in self.hierarchy_scheme.topology.hierarchy_schemes.items():
            new_hier_scheme =  copy.deepcopy(hierarchy_scheme)
            new_hier_scheme.elements = []
            for element in hierarchy_scheme.elements:
                element_atom_indices = set(element.topology_atom_indices)
                if len(sel.intersection(element_atom_indices)):
                    new_hier_scheme.elements.append(copy.deepcopy(element))
            new_iterators[hierarchy_name] = new_hier_scheme
        return new_iterators
                        

```

### TopologyHierarchyElement

* `__init__()`
* `__eq__`
* topology_atoms
* topology_atom_indices
* topology_molecule_atom_indices
* hierarchy_scheme
* index/identifier


* Test running merge_molecules on two molecules that have an existing hierarchies with the SAME name, but DIFFERENT HierarchySchemes behind them. 
    * An error should be raised, instructing the user to EITHER run merge_molecules with transfer_hierarchy_schemes=False
      OR delete one of the colliding hierarchy schemes before merging.
    * Upon deleting the colliding HierarchyScheme from the second molecule, the iterator should immediately dissapear
      (`mol2.delete_hier_scheme('residues')` `mol2.residues --> AttributeError`) 
    * If both mol1 and mol2 have iterators with the same name, they should be appended
    * If ONLY mol1 OR mol2 have an iterator, the final mol should have that iterator, but immediately following
      the merge, only some of the atoms in the new molecule (only those originally in iterators) should be included
    * Atoms that were deleted should not appear in any iterators after the merge
    * Residues/HierElements that had all their atoms deleted MAY appear in the new molecule
    * Residues/HierElements with the same value of `uniqueness_key` should be merged in the new molecule

* Test that running perceive_residues on (a TypedMolecule, or a TopologyMolecule with its reference as a typed molecule, or a Topology containing a TypedMolecule) does NOT clear the existing data 

```python
new_mol.perceive_hierarchy([atom_indices], [hierarchy_scheme_names])
#new_mol.perceive_hierarchy({'residues': Molecule.HierScheme, 'chains': Molecule2.HierScheme})
new_mol.register_hierarchy_scheme(hier_scheme_with_name_residues)
new_mol.register_hierarchy_scheme(hier_scheme_with_name_chains)
new_mol.perceive_hierarchy(['residues', 'chains'])                            
new_mol.hierarchy_schemes
> {'residues': <HierScheme with name 'residues'>, 'chains': <HierScheme2 with name 'chains'>...}
```