In [49]:
from pymol import cmd
import pandas as pd
import torch
import re
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [None]:
try:
    cmd.load('cox1.pdb')
    cmd.load('cox2.pdb')
    print('Load models from local files.')
except:
    cmd.fetch('6y3c')
    cmd.fetch('5ikr')
    cmd.set_name('5ikr', 'cox2')
    cmd.set_name('6y3c', 'cox1')
    print(f'Chain labels on Cox2{cmd.get_chains('cox2')}')
    cmd.remove('organic')
    cmd.remove('inorganic')
    cmd.remove('solvent')
    cmd.remove('cox2 and chain B')
    cmd.remove('hydrogens')
    print(f'Chain labels on Cox2 (monomer){cmd.get_chains('cox2')}')
    print(f'Chain labels on Cox1{cmd.get_chains('cox1')}')
    cmd.save('cox1.pdb', 'cox1')
    cmd.save('cox2.pdb', 'cox2')
finally:
    c1 = cmd.get_model('cox1')
    c2 = cmd.get_model('cox2')

Load models from local files.


In [6]:
import numpy as np
def get_coord(obj):
    name_list = []
    coord_list = []
    for i in range(len(obj.atom)):
        element_name = obj.atom[i].name
        coordinate = obj.atom[i].coord
        name_list.append(element_name)
        coord_list.append(coordinate)

    df = pd.DataFrame(
        {'n': name_list, 
        'c': coord_list}
    )
    return df

df_c1 = get_coord(c1)
df_c1


Unnamed: 0,n,c
0,N,"[1.9709999561309814, -83.97200012207031, 1.736..."
1,CA,"[1.625, -83.79100036621094, 3.134000062942505]"
2,C,"[1.8530000448226929, -82.3740005493164, 3.6219..."
3,O,"[2.9739999771118164, -81.86699676513672, 3.578..."
4,N,"[0.7879999876022339, -81.73600006103516, 4.097..."
...,...,...
4513,O,"[-49.4119987487793, -31.339000701904297, 19.80..."
4514,CB,"[-47.69300079345703, -33.000999450683594, 18.4..."
4515,CG,"[-46.624000549316406, -32.9119987487793, 19.57..."
4516,OD1,"[-46.94300079345703, -32.50699996948242, 20.71..."


In [None]:
class lDDT():

    # variable interpretation:
    # o, obj: pdb object

    def __init__(self, *obj, cutoff, thresholds):
        self.all_objects = obj

        for i, o in enumerate(obj):
            obj_name = f'obj{i}'
            setattr(self, obj_name , o)

        self.cutoff = cutoff
        self.thresholds = thresholds

    @property
    def element_names(self):
        all_names = []
        for object in self.all_objects:
            name_list = [object.atom[i].name for i in range(len(object.atom))]
            all_names.append(name_list)
        return all_names
    
    @property
    def coordinates(self):
        all_coords = []
        for object in self.all_objects:
            coord_list = [object.atom[i].coord for i in range(len(object.atom))]
            all_coords.append(coord_list)
        return all_coords
    
    def save(self, i):
        if i >= len(self.all_objects):
            raise ValueError('expect i <= number of objects')
        element_name = self.element_names[i]
        coordinate = self.coordinates[i]
        df = pd.DataFrame(
            {
                'n': element_name, 
                'c': coordinate
            }
        )
        return df
           
    # Method uses CUDA to accelerate computation
    def get_dist(self, residue_alignment_file, ):
        
        tensor_coor1 = torch.tensor(self.coordinates[0], device=device)
        tensor_coor2 = torch.tensor(self.coordinates[1], device=device)
        dist_obj1 = torch.cdist(tensor_coor1, tensor_coor1)
        dist_obj2 = torch.cdist(tensor_coor2, tensor_coor2)
        mask = (dist_obj1<self.cutoff)
        
l = lDDT(c1, c2, cutoff=15, thresholds=[0.5, 1.0, 2.0, 4.0])

AttributeError: 'Indexed' object has no attribute 'get_fastastr'

In [None]:
fasta1 = cmd.get_fastastr('cox1')
re.findall()

'>cox1_A\nGAPTPVNPCCYYPCQHQGICVRFGLDRYQCDCTRTGYSGPNCTIPGLWTWLRNSLRPSPSFTHFLLTHGR\nWFWEFVNATFIREMLMRLVLTVRSNLIPSPPTYNSAHDYISWESFSNVSYYTRILPSVPKDCPTPMGTKG\nKKQLPDAQLLARRFLLRRKFIPDPQGTNLMFAFFAQHFTHQFFKTSGKMGPGFTKALGHGVDLGHIYGDN\nLERQYQLRLFKDGKLKYQVLDGEMYPPSVEEAPVLMHYPRGIPPQSQMAVGQEVFGLLPGLMLYATLWLR\nEHNRVCDLLKAEHPTWGDEQLFQTTRLILIGETIKIVIEEYVQQLSGYFLQLKFDPELLFGVQFQYRNRI\nAMEFNHLYHWHPLMPDSFKVGSQEYSYEQFLFNTSMLVDYGVEALVDAFSRQIAGRIGGGRNMDHHILHV\nAVDVIRESREMRLQPFNEYRKRFGMKPYTSFQELVGEKEMAAELEELYGDIDALEFYPGLLLEKCHPNSI\nFGESMIEIGAPFSLKGLLGNPICSPEYWKPSTFGGEVGFNIVKTATLKKLVCLNTKTCPYVSFRVPD\n'

In [46]:
cmd.get_fastastr('cox2').strip()

'>cox2_A\nNPCCSHPCQNRGVCMSVGFDQYKCDCTRTGFYGENCSTPEFLTRIKLFLKPTPNTVHYILTHFKGFWNVV\nNNIPFLRNAIMSYVLTSRSHLIDSPPTYNADYGYKSWEAFSNLSYYTRALPPVPDDCPTPLGVKGKKQLP\nDSNEIVEKLLLRRKFIPDPQGSNMMFAFFAQHFTHQFFKTDHKRGPAFTNGLGHGVDLNHIYGETLARQR\nKLRLFKDGKMKYQIIDGEMYPPTVKDTQAEMIYPPQVPEHLRFAVGQEVFGLVPGLMMYATIWLREHNRV\nCDVLKQEHPEWGDEQLFQTSRLILIGETIKIVIEDYVQHLSGYHFKLKFDPELLFNKQFQYQNRIAAEFN\nTLYHWHPLLPDTFQIHDQKYNYQQFIYNNSILLEHGITQFVESFTRQIAGRVAGGRNVPPAVQKVSQASI\nDQSRQMKYQSFNEYRKRFMLKPYESFEELTGEKEMSAELEALYGDIDAVELYPALLVEKPRPDAIFGETM\nVEVGAPFSLKGLMGNVICSPAYWKPSTFGGEVGFQIINTASIQSLICNNVKGCPFTSFSVP'

In [32]:
cox1 = l.save(10)
cox1

ValueError: expect i <= number of objects

In [38]:
coor1 = torch.tensor(l.coordinates[0], device=device)
coor2 = torch.tensor(l.coordinates[1], device=device)
cd1 = torch.cdist(coor1, coor1)
cd2 = torch.cdist(coor2, coor2)

In [39]:
diff = torch.abs(cd1 - cd2)
diff.shape

RuntimeError: The size of tensor a (4518) must match the size of tensor b (4470) at non-singleton dimension 1

In [37]:
sum(sum(cd1<0))

tensor(0)

In [31]:
coor1.shape

torch.Size([4518, 3])

In [16]:
np.pow([1, 2, 3], 2)

array([1, 4, 9])

In [7]:
df_c1.to_parquet('cox1.parquet', index=False)

In [6]:
cox1 = pd.read_parquet('cox1.parquet')
cox1['c'].values

array([array([  1.97099996, -83.97200012,   1.73699999]),
       array([  1.625     , -83.79100037,   3.13400006]),
       array([  1.85300004, -82.37400055,   3.62199998]), ...,
       array([-46.62400055, -32.91199875,  19.5739994 ]),
       array([-46.94300079, -32.50699997,  20.71100044]),
       array([-45.45800018, -33.2480011 ,  19.27599907])], dtype=object)

In [8]:
import numpy as np
np.stack(cox1['c'].values)

array([[  1.97099996, -83.97200012,   1.73699999],
       [  1.625     , -83.79100037,   3.13400006],
       [  1.85300004, -82.37400055,   3.62199998],
       ...,
       [-46.62400055, -32.91199875,  19.5739994 ],
       [-46.94300079, -32.50699997,  20.71100044],
       [-45.45800018, -33.2480011 ,  19.27599907]])

In [9]:
torch.tensor(np.stack(cox1['c'].values))

tensor([[  1.9710, -83.9720,   1.7370],
        [  1.6250, -83.7910,   3.1340],
        [  1.8530, -82.3740,   3.6220],
        ...,
        [-46.6240, -32.9120,  19.5740],
        [-46.9430, -32.5070,  20.7110],
        [-45.4580, -33.2480,  19.2760]], dtype=torch.float64)

In [51]:
cmd.select('cox2_maskseq', 'cox2 and resi 1-89 and resi 91-586 and backbone')
cmd.select('cox1_maskseq', 'cox1 and resi 14-599 and backbone')

2228

In [54]:
#select cox2_maskseq, cox2 and (resi -104 | resi 106-) and name CA
cmd.select('cox2_maskseq', 'cox2 and (resi -104 | resi 106-) and name CA')

549

In [56]:
#select cox1_maskseq, (cox1 and resi 34-77 | resi 79-583) and name CA
cmd.select('cox1_maskseq', 'cox1 and (resi 34-77 | resi 79-583) and name CA')

549