In [35]:
import numpy as np
import re
from pymol import cmd

In [25]:
class NW():
    def __init__(
            self, seqa:str, seqb:str, 
            match, mismatch, gap
    ):
        self.seqa = seqa
        self.seqb = seqb
        self.match_score = match
        self.mismatch_score = mismatch
        self.gap_score = gap
        self.matrix = np.zeros((len(seqa)+1, len(seqb)+1))
        self.matrix.fill(np.inf)
        self.matrix[:, 0] = [self.gap_score * i for i in range(self.matrix.shape[0])]
        self.matrix[0, :] = [self.gap_score * j for j in range(self.matrix.shape[1])]

                
    def _scoring(self):
        for row_idx, i in enumerate(self.matrix):
            for col_idx, j in enumerate(i):
                
                if j != np.inf:
                    pass
                else:
                    '''
                    lt: right top (diagonal)
                    lt_: rt + match or mismatch
                    rb: right bottom
                    lt: left top
                    '''
                    lt = self.matrix[row_idx-1, col_idx-1]
                    lt_ = lt + self.match_score if self.seqa[row_idx-1] == self.seqb[col_idx-1] else lt + self.mismatch_score
                    
                    lb = self.matrix[row_idx, col_idx-1]
                    rt = self.matrix[row_idx-1, col_idx]

                    score = np.max([lt_, lb+self.gap_score, rt+self.gap_score])

                    self.matrix[row_idx, col_idx] = score

        return self.matrix
    
    def _traceback(self,):
        seqa_output, seqb_output = [], []
        row = self.matrix.shape[0] - 1
        col = self.matrix.shape[1] - 1

        while row > 0 or col > 0:

            # How N&W gets rid of local optimum

            #print(f'row: {row}, col: {col}')
            current_score = self.matrix[row, col]

            if row > 0 and col >0:
                #match_val = self.match_score if self.seqa[row-1] == self.seqb[col-1] else self.mismatch_score
                match_val = self.match_score if self.seqa[row-1] == self.seqb[col-1] else self.mismatch_score
                #print(match_val)
                if current_score == self.matrix[row-1, col - 1] + match_val:
                    seqa_output.append(self.seqa[row-1])
                    seqb_output.append(self.seqb[col-1])
                    
                    row -= 1
                    col -= 1
                    continue
                
            if col>0 and current_score == self.matrix[row, col - 1] + self.gap_score:
                seqa_output.append('-')
                seqb_output.append(self.seqb[col-1])
                col -= 1
                continue

            if row > 0 and current_score == self.matrix[row - 1, col] + self.gap_score:
                
                seqa_output.append(self.seqa[row-1])
                seqb_output.append('-')
                row -= 1
                
                continue

                
        seqa_output.reverse(), seqb_output.reverse()
        return seqa_output, seqb_output
    
    def align(self):
        score_matrix = self._scoring()
        #print(score_matrix)
        seqa_output, seqb_output = self._traceback()
        return seqa_output, seqb_output

In [21]:
with open('F:/coxpdb.txt', 'r') as f:
    content = f.read()
    sequences = re.findall('>.*\n(.*)', content)

seq1 = ''
seq2 = ''
for i in range(len(sequences[0])):
    if sequences[0][i] != '-' and sequences[1][i] !='-':
        seq1 += sequences[0][i]
        seq2 += sequences[1][i]

In [28]:
nw = NW(seq1, seq2,match=1, mismatch=-1, gap=-2)
m,n = nw.align()
print(m, n)

['-', '-', '-', '-', '-', '-', 'N', 'P', 'C', 'C', 'S', 'H', 'P', 'C', 'Q', 'N', 'R', 'G', 'V', 'C', 'M', 'S', 'V', 'G', 'F', 'D', 'Q', 'Y', 'K', 'C', 'D', 'C', 'T', 'R', 'T', 'G', 'F', 'Y', 'G', 'E', 'N', 'C', 'S', 'T', 'P', 'E', 'F', 'L', 'T', 'R', 'I', 'K', 'L', 'F', 'L', 'K', 'P', 'T', 'P', 'N', 'T', 'V', 'H', 'Y', 'I', 'L', 'T', 'H', 'F', 'K', 'G', 'F', 'W', 'N', 'V', 'V', 'N', 'N', 'I', 'P', 'F', 'L', 'R', 'N', 'A', 'I', 'M', 'S', 'Y', 'V', 'L', 'T', 'S', 'R', 'S', 'H', 'L', 'I', 'D', 'S', 'P', 'P', 'T', 'Y', 'N', 'A', 'D', 'Y', 'G', 'Y', 'K', 'S', 'W', 'E', 'A', 'F', 'S', 'N', 'L', 'S', 'Y', 'Y', 'T', 'R', 'A', 'L', 'P', 'P', 'V', 'P', 'D', 'D', 'C', 'P', 'T', 'P', 'L', 'G', 'V', 'K', 'G', 'K', 'K', 'Q', 'L', 'P', 'D', 'S', 'N', 'E', 'I', 'V', 'E', 'K', 'L', 'L', 'L', 'R', 'R', 'K', 'F', 'I', 'P', 'D', 'P', 'Q', 'G', 'S', 'N', 'M', 'M', 'F', 'A', 'F', 'F', 'A', 'Q', 'H', 'F', 'T', 'H', 'Q', 'F', 'F', 'K', 'T', 'D', 'H', 'K', 'R', 'G', 'P', 'A', 'F', 'T', 'N', 'G', 'L', 'G', 'H',

In [32]:
seperator = ''
seperator.join(m)

'------NPCCSHPCQNRGVCMSVGFDQYKCDCTRTGFYGENCSTPEFLTRIKLFLKPTPNTVHYILTHFKGFWNVVNNIPFLRNAIMSYVLTSRSHLIDSPPTYNADYGYKSWEAFSNLSYYTRALPPVPDDCPTPLGVKGKKQLPDSNEIVEKLLLRRKFIPDPQGSNMMFAFFAQHFTHQFFKTDHKRGPAFTNGLGHGVDLNHIYGETLARQRKLRLFKDGKMKYQIIDGEMYPPTVKDTQAEMIYPPQVPEHLRFAVGQEVFGLVPGLMMYATIWLREHNRVCDVLKQEHPEWGDEQLFQTSRLILIGETIKIVIEDYVQHLSGYHFKLKFDPELLFNKQFQYQNRIAAEFNTLYHWHPLLPDTFQIHDQKYNYQQFIYNNSILLEHGITQFVESFTRQIAGRVAGGRNVPPAVQKVSQASIDQSRQMKYQSFNEYRKRFMLKPYESFEELTGEKEMSAELEALYGDIDAVELYPALLVEKPRPDAIFGETMVEVGAPFSLKGLMGNVICSPAYWKPSTFGGEVGFQIINTASIQSLICNNVKGCPFTSFSVP'

In [33]:
seperator.join(n)

'GAPTPVNPCCYYPCQHQGICVRFGLDRYQCDCTRTGYSGPNCTIPGLWTWLRNSLRPSPSFTHFLLTHGRWFWEFV-NATFIREMLMRLVLTVRSNLIPSPPTYNSAHDYISWESFSNVSYYTRILPSVPKDCPTPMGTKGKKQLPDAQLLARRFLLRRKFIPDPQGTNLMFAFFAQHFTHQFFKTSGKMGPGFTKALGHGVDLGHIYGDNLERQYQLRLFKDGKLKYQVLDGEMYPPSVEEAPVLMHYPRGIPPQSQMAVGQEVFGLLPGLMLYATLWLREHNRVCDLLKAEHPTWGDEQLFQTTRLILIGETIKIVIEEYVQQLSGYFLQLKFDPELLFGVQFQYRNRIAMEFNHLYHWHPLMPDSFKVGSQEYSYEQFLFNTSMLVDYGVEALVDAFSRQIAGRIGGGRNMDHHILHVAVDVIRESREMRLQPFNEYRKRFGMKPYTSFQELVGEKEMAAELEELYGDIDALEFYPGLLLEKCHPNSIFGESMIEIGAPFSLKGLLGNPICSPEYWKPSTFGGEVGFNIVKTATLKKLVCLNTKTCP----YV-'

In [36]:
try:
    cmd.load('cox1.pdb')
    cmd.load('cox2.pdb')
    print('Load models from local files.')
except:
    cmd.fetch('6y3c')
    cmd.fetch('5ikr')
    cmd.set_name('5ikr', 'cox2')
    cmd.set_name('6y3c', 'cox1')
    print(f'Chain labels on Cox2{cmd.get_chains('cox2')}')
    cmd.remove('organic')
    cmd.remove('inorganic')
    cmd.remove('solvent')
    cmd.remove('cox2 and chain B')
    cmd.remove('hydrogens')
    print(f'Chain labels on Cox2 (monomer){cmd.get_chains('cox2')}')
    print(f'Chain labels on Cox1{cmd.get_chains('cox1')}')
    cmd.save('cox1.pdb', 'cox1')
    cmd.save('cox2.pdb', 'cox2')
finally:
    c1 = cmd.get_model('cox1')
    c2 = cmd.get_model('cox2')

Load models from local files.


In [50]:
class lDDT():

    def __init__(self, objects:list, obj_names:list, cutoff:int|float, thresholds:list):
        self.all_objects = objects
        self.all_names = obj_names
        for i, o in enumerate(objects):
            obj_name = f'obj{i}'
            setattr(self, obj_name , o)

        self.cutoff = cutoff
        self.thresholds = thresholds

    @property
    def fasta(self):
        all_fasta = []
        for name in self.all_names:
            obj_fasta = cmd.get_fastastr(f'{name}')
            obj_fasta = re.findall(r'\n(\w*)', obj_fasta)
            seperator = ''
            obj_fasta = seperator.join(obj_fasta)
            all_fasta.append(obj_fasta)
        return all_fasta
    
    def read_fasta(self):
        with open('F:/coxpdb.txt', 'r') as f:
            content = f.read()
            sequences = re.findall('>.*\n(.*)', content)

        seq1 = ''
        seq2 = ''
        for i in range(len(sequences[0])):
            if sequences[0][i] != '-' and sequences[1][i] !='-':
                seq1 += sequences[0][i]
                seq2 += sequences[1][i]

        return seq1, seq2

    def sequence_alignment(self, method='NW', match=1, mismatch=-1, gap=-2):
        if method == 'NW':
            nw = NW(*self.fasta,match=match, mismatch=mismatch, gap=gap)
            seq1_,seq2_ = nw.align()
            seperator = ''
            seq1_ = seperator.join(seq1_)
            seq2_ = seperator.join(seq2_)
            return seq1_, seq2_
        
    def res_selection(self):
        pass


l = lDDT([c1, c2], ['cox1', 'cox2'], cutoff=15, thresholds=[0.5, 1.0, 2.0, 4.0])
l.fasta
            



['GAPTPVNPCCYYPCQHQGICVRFGLDRYQCDCTRTGYSGPNCTIPGLWTWLRNSLRPSPSFTHFLLTHGRWFWEFVNATFIREMLMRLVLTVRSNLIPSPPTYNSAHDYISWESFSNVSYYTRILPSVPKDCPTPMGTKGKKQLPDAQLLARRFLLRRKFIPDPQGTNLMFAFFAQHFTHQFFKTSGKMGPGFTKALGHGVDLGHIYGDNLERQYQLRLFKDGKLKYQVLDGEMYPPSVEEAPVLMHYPRGIPPQSQMAVGQEVFGLLPGLMLYATLWLREHNRVCDLLKAEHPTWGDEQLFQTTRLILIGETIKIVIEEYVQQLSGYFLQLKFDPELLFGVQFQYRNRIAMEFNHLYHWHPLMPDSFKVGSQEYSYEQFLFNTSMLVDYGVEALVDAFSRQIAGRIGGGRNMDHHILHVAVDVIRESREMRLQPFNEYRKRFGMKPYTSFQELVGEKEMAAELEELYGDIDALEFYPGLLLEKCHPNSIFGESMIEIGAPFSLKGLLGNPICSPEYWKPSTFGGEVGFNIVKTATLKKLVCLNTKTCPYVSFRVPD',
 'NPCCSHPCQNRGVCMSVGFDQYKCDCTRTGFYGENCSTPEFLTRIKLFLKPTPNTVHYILTHFKGFWNVVNNIPFLRNAIMSYVLTSRSHLIDSPPTYNADYGYKSWEAFSNLSYYTRALPPVPDDCPTPLGVKGKKQLPDSNEIVEKLLLRRKFIPDPQGSNMMFAFFAQHFTHQFFKTDHKRGPAFTNGLGHGVDLNHIYGETLARQRKLRLFKDGKMKYQIIDGEMYPPTVKDTQAEMIYPPQVPEHLRFAVGQEVFGLVPGLMMYATIWLREHNRVCDVLKQEHPEWGDEQLFQTSRLILIGETIKIVIEDYVQHLSGYHFKLKFDPELLFNKQFQYQNRIAAEFNTLYHWHPLLPDTFQIHDQKYNYQQFIYNNSILLEHGITQFVESFTRQIAGRVAGGRNVPPAVQKVSQASIDQSRQMKYQSFNEYRK

In [52]:
l.sequence_alignment()

('GAPTPVNPCCYYPCQHQGICVRFGLDRYQCDCTRTGYSGPNCTIPGLWTWLRNSLRPSPSFTHFLLTHGRWFWEFV-NATFIREMLMRLVLTVRSNLIPSPPTYNSAHDYISWESFSNVSYYTRILPSVPKDCPTPMGTKGKKQLPDAQLLARRFLLRRKFIPDPQGTNLMFAFFAQHFTHQFFKTSGKMGPGFTKALGHGVDLGHIYGDNLERQYQLRLFKDGKLKYQVLDGEMYPPSVEEAPVLMHYPRGIPPQSQMAVGQEVFGLLPGLMLYATLWLREHNRVCDLLKAEHPTWGDEQLFQTTRLILIGETIKIVIEEYVQQLSGYFLQLKFDPELLFGVQFQYRNRIAMEFNHLYHWHPLMPDSFKVGSQEYSYEQFLFNTSMLVDYGVEALVDAFSRQIAGRIGGGRNMDHHILHVAVDVIRESREMRLQPFNEYRKRFGMKPYTSFQELVGEKEMAAELEELYGDIDALEFYPGLLLEKCHPNSIFGESMIEIGAPFSLKGLLGNPICSPEYWKPSTFGGEVGFNIVKTATLKKLVCLNTKTCPYVSFRVPD',
 '------NPCCSHPCQNRGVCMSVGFDQYKCDCTRTGFYGENCSTPEFLTRIKLFLKPTPNTVHYILTHFKGFWNVVNNIPFLRNAIMSYVLTSRSHLIDSPPTYNADYGYKSWEAFSNLSYYTRALPPVPDDCPTPLGVKGKKQLPDSNEIVEKLLLRRKFIPDPQGSNMMFAFFAQHFTHQFFKTDHKRGPAFTNGLGHGVDLNHIYGETLARQRKLRLFKDGKMKYQIIDGEMYPPTVKDTQAEMIYPPQVPEHLRFAVGQEVFGLVPGLMMYATIWLREHNRVCDVLKQEHPEWGDEQLFQTSRLILIGETIKIVIEDYVQHLSGYHFKLKFDPELLFNKQFQYQNRIAAEFNTLYHWHPLLPDTFQIHDQKYNYQQFIYNNSILLEHGITQFVESFTRQIAGRVAGGRNVPPAVQKVSQASIDQSRQMKYQ

In [None]:
t = '>cox1_A\nGAPTPVNPCCYYPCQHQGICVRFGLDRYQCDCTRTGYSGPNCTIPGLWTWLRNSLRPSPSFTHFLLTHGR\nWFWEFVNATFIREMLMRLVLTVRSNLIPSPPTYNSAHDYISWESFSNVSYYTRILPSVPKDCPTPMGTKG\nKKQLPDAQLLARRFLLRRKFIPDPQGTNLMFAFFAQHFTHQFFKTSGKMGPGFTKALGHGVDLGHIYGDN\nLERQYQLRLFKDGKLKYQVLDGEMYPPSVEEAPVLMHYPRGIPPQSQMAVGQEVFGLLPGLMLYATLWLR\nEHNRVCDLLKAEHPTWGDEQLFQTTRLILIGETIKIVIEEYVQQLSGYFLQLKFDPELLFGVQFQYRNRI\nAMEFNHLYHWHPLMPDSFKVGSQEYSYEQFLFNTSMLVDYGVEALVDAFSRQIAGRIGGGRNMDHHILHV\nAVDVIRESREMRLQPFNEYRKRFGMKPYTSFQELVGEKEMAAELEELYGDIDALEFYPGLLLEKCHPNSI\nFGESMIEIGAPFSLKGLLGNPICSPEYWKPSTFGGEVGFNIVKTATLKKLVCLNTKTCPYVSFRVPD\n'


In [None]:
class lDDT():

    # variable interpretation:
    # o, obj: pdb object

    def __init__(self, *obj, cutoff, thresholds):
        self.all_objects = obj

        for i, o in enumerate(obj):
            obj_name = f'obj{i}'
            setattr(self, obj_name , o)

        self.cutoff = cutoff
        self.thresholds = thresholds

    @property
    def fasta(self):
        all_fastas = []
        for object in self.all_objects:
            fasta = 

    @property
    def element_names(self):
        all_names = []
        for object in self.all_objects:
            name_list = [object.atom[i].name for i in range(len(object.atom))]
            all_names.append(name_list)
        return all_names
    
    @property
    def coordinates(self):
        all_coords = []
        for object in self.all_objects:
            coord_list = [object.atom[i].coord for i in range(len(object.atom))]
            all_coords.append(coord_list)
        return all_coords