In [1]:
from typing import Any
from abc import ABC, abstractmethod

from numpy import ndarray
from moddipic.core.data.abstract import Representation

import pathlib

class MoleculeRep(Representation, ABC):
    pass

class Molecule3DRep(MoleculeRep, ABC):
    @abstractmethod
    def update_coordinates(self, coords: ndarray):
        pass

class Molecule1DRep(MoleculeRep, ABC):
    pass

class SmallMolRep(MoleculeRep, ABC):
    pass

class MacroMolRep(MoleculeRep, ABC):
    pass

class SMILESRep(SmallMolRep, Molecule1DRep):
    rep_name = "smiles"
    def __init__(self, smiles: str):
        assert isinstance(smiles, str)
        super().__init__(data=smiles)

class PDBPathRep(SmallMolRep, MacroMolRep, Molecule3DRep):
    rep_name = "pdb_path"
    def __init__(self, path: str):
        path = str(pathlib.Path(path).absolute())
        assert isinstance(path, str)
        super().__init__(data=path)

In [2]:
issubclass(PDBPathRep, MoleculeRep)

True

In [11]:
from typing import List, Type

from moddipic.core.data.abstract import Data
from moddipic.core.data import LigandData

class Batched(Data):
    def __init__(self, data_list: List[Data]):
        self._data_list = data_list 
        self.obtain_batch_type()
        # TODO: Assert if with every branch of the batched tree, type is same.

    def obtain_batch_type(self):
        selected_data = self._data_list[0]
        if isinstance(selected_data, Batched):
            btype_child = selected_data.batch_type
        elif isinstance(selected_data, Data):
            btype_child = [type(selected_data)]
        else:
            raise ValueError("Batch should only consist of Data class.")

        self._batch_type: List[Type] = [Batched] + btype_child
    
    @property
    def batch_type(self):
        return self._batch_type # TODO: return in a format easier to understand
    
    @property
    def shallow_dtype(self):
        return self._batch_type[1]
    
    @property
    def basic_dtype(self):
        return self._batch_type[-1]
    
    @property
    def depth(self):
        return len(self._batch_type) - 1
    
    def __len__(self):
        return len(self._data_list)
    
    def __iter__(self):
        return iter(self._data_list)
    
    def __getitem__(self, index):
        return self._data_list[index]

In [14]:
Batched((LigandData(SMILESRep("C")), LigandData(SMILESRep("CC"))))._

2

In [16]:
isinstance(LigandData(), Data)

True

In [20]:
LigandData().__class__()

<moddipic.core.data.data_types.LigandData at 0x7f7c09cc3fa0>

In [11]:
from molsberry.core.data import BatchedData, MoleculeData, SMILESRep

mol_datas = [MoleculeData(SMILESRep("CCC")), MoleculeData(SMILESRep("CC"))]
mol_datas2 = [MoleculeData(SMILESRep("CCO")), MoleculeData(SMILESRep("CO"))]
BatchedData.merge([BatchedData(mol_datas), BatchedData(mol_datas2)])[0][0].get_representation(SMILESRep).content

'CCC'

In [12]:
from molsberry.core.data import Batched, MoleculeData, SMILESRep
from itertools import product, repeat

d = {
    "a": [1, 2, 3],
    "b": [4, 5, 6],
    "c": [11, 12, 13, 14, 15],
    "d": 'DDD',
    "e": 'EEE'
}
paired_keyss = (("a", "b"), ("c", ))

g1_keys = paired_keyss[0]
g1_pair = [
    dict(zip(g1_keys, vs)) for vs in zip(*[d[k] for k in g1_keys])
]
g2_keys = paired_keyss[1]
g2_pair = [
    dict(zip(g2_keys, vs)) for vs in zip(*[d[k] for k in g2_keys])
]
g3_keys = ["d", "e"]
g3_pair = [{k: v for k, v in d.items() if k in g3_keys}]


list(product(g1_pair, g2_pair, g3_pair))

[({'a': 1, 'b': 4}, {'c': 11}, {'d': 'DDD', 'e': 'EEE'}),
 ({'a': 1, 'b': 4}, {'c': 12}, {'d': 'DDD', 'e': 'EEE'}),
 ({'a': 1, 'b': 4}, {'c': 13}, {'d': 'DDD', 'e': 'EEE'}),
 ({'a': 1, 'b': 4}, {'c': 14}, {'d': 'DDD', 'e': 'EEE'}),
 ({'a': 1, 'b': 4}, {'c': 15}, {'d': 'DDD', 'e': 'EEE'}),
 ({'a': 2, 'b': 5}, {'c': 11}, {'d': 'DDD', 'e': 'EEE'}),
 ({'a': 2, 'b': 5}, {'c': 12}, {'d': 'DDD', 'e': 'EEE'}),
 ({'a': 2, 'b': 5}, {'c': 13}, {'d': 'DDD', 'e': 'EEE'}),
 ({'a': 2, 'b': 5}, {'c': 14}, {'d': 'DDD', 'e': 'EEE'}),
 ({'a': 2, 'b': 5}, {'c': 15}, {'d': 'DDD', 'e': 'EEE'}),
 ({'a': 3, 'b': 6}, {'c': 11}, {'d': 'DDD', 'e': 'EEE'}),
 ({'a': 3, 'b': 6}, {'c': 12}, {'d': 'DDD', 'e': 'EEE'}),
 ({'a': 3, 'b': 6}, {'c': 13}, {'d': 'DDD', 'e': 'EEE'}),
 ({'a': 3, 'b': 6}, {'c': 14}, {'d': 'DDD', 'e': 'EEE'}),
 ({'a': 3, 'b': 6}, {'c': 15}, {'d': 'DDD', 'e': 'EEE'})]

In [11]:
g3_pair

<zip at 0x7fa64b4fbf40>

In [10]:
list(product(*[
    [
        dict(zip(paired_keys, vs)) for vs in zip(*[d[k] for k in paired_keys])
    ] for paired_keys in paired_keyss
]))

[({'a': 1, 'b': 4}, {'c': 11}),
 ({'a': 1, 'b': 4}, {'c': 12}),
 ({'a': 1, 'b': 4}, {'c': 13}),
 ({'a': 1, 'b': 4}, {'c': 14}),
 ({'a': 1, 'b': 4}, {'c': 15}),
 ({'a': 2, 'b': 5}, {'c': 11}),
 ({'a': 2, 'b': 5}, {'c': 12}),
 ({'a': 2, 'b': 5}, {'c': 13}),
 ({'a': 2, 'b': 5}, {'c': 14}),
 ({'a': 2, 'b': 5}, {'c': 15}),
 ({'a': 3, 'b': 6}, {'c': 11}),
 ({'a': 3, 'b': 6}, {'c': 12}),
 ({'a': 3, 'b': 6}, {'c': 13}),
 ({'a': 3, 'b': 6}, {'c': 14}),
 ({'a': 3, 'b': 6}, {'c': 15})]

In [12]:
data = [({'a': 1, 'b': 4}, {'c': 11}),
         ({'a': 1, 'b': 4}, {'c': 12}),
         ({'a': 1, 'b': 4}, {'c': 13}),
         ({'a': 1, 'b': 4}, {'c': 14}),
         ({'a': 1, 'b': 4}, {'c': 15}),
         ({'a': 2, 'b': 5}, {'c': 11}),
         ({'a': 2, 'b': 5}, {'c': 12}),
         ({'a': 2, 'b': 5}, {'c': 13}),
         ({'a': 2, 'b': 5}, {'c': 14}),
         ({'a': 2, 'b': 5}, {'c': 15}),
         ({'a': 3, 'b': 6}, {'c': 11}),
         ({'a': 3, 'b': 6}, {'c': 12}),
         ({'a': 3, 'b': 6}, {'c': 13}),
         ({'a': 3, 'b': 6}, {'c': 14}),
         ({'a': 3, 'b': 6}, {'c': 15})]

merged_data = [{**dict1, **dict2} for dict1, dict2 in data]
print(merged_data)


[{'a': 1, 'b': 4, 'c': 11}, {'a': 1, 'b': 4, 'c': 12}, {'a': 1, 'b': 4, 'c': 13}, {'a': 1, 'b': 4, 'c': 14}, {'a': 1, 'b': 4, 'c': 15}, {'a': 2, 'b': 5, 'c': 11}, {'a': 2, 'b': 5, 'c': 12}, {'a': 2, 'b': 5, 'c': 13}, {'a': 2, 'b': 5, 'c': 14}, {'a': 2, 'b': 5, 'c': 15}, {'a': 3, 'b': 6, 'c': 11}, {'a': 3, 'b': 6, 'c': 12}, {'a': 3, 'b': 6, 'c': 13}, {'a': 3, 'b': 6, 'c': 14}, {'a': 3, 'b': 6, 'c': 15}]


In [33]:
list_of_dicts

[{'a': 1, 'b': 4}, {'a': 2, 'b': 5}, {'a': 3, 'b': 6}]