In [1]:
import os
import numpy as np
import pandas as pd

In [2]:
class Protein:
    '''
    Protein class that takes a sequence and returns a SPARC prediction, optionally with a PDB file for mutational improvement of melting point. Computes features for the sequence and stores them as attributes of the object. Full list of features can be accessed by calling the object's .features attribute.
    
    Initial attributes:
    seq: str, protein sequence
    PDB: str, name of PDB file in ./data/pdbs directory, optional for SPARC but required for .mutate() method
    dpath: str, path to directory where temp files will be stored, defaults to ./data/fastas
    S4path: str, path to directory where S4pred is stored, required for SPARC, defaults to ./data/s4pred
    
    Methods:
    mutate( ): performs mutational improvement of melting point, requires PDB file. Returns mutated melting point, sequence, and difference in melting point and adds them to the object as .mutatedmp, .mutatedseq, and .mutatedmpdiff
    mutreduce( ): reduces amount of mutations while still maintaining improved melting point. Requires mutate() method to be used first. Returns reduced sequence and melting point and adds them to the object as .mut_seq_reduced and .mut_mp_reduced
    '''
    import os
    import numpy as np
    def __init__(self, seq:str, PDB = None, dpath = os.path.abspath('./data/fastas'), S4path = os.path.abspath('./data/s4pred')):
        self.PDB = PDB
        from SPARC import SPARC
        self.sequence = seq.upper()
        result = SPARC(self.sequence, 'placeholder', dpath, S4path)
        self.SPARC = result[0][0]
        self.features = result[1]
        self.features_scaled = result[2]
        self.features_pca = result[3]
        for i in self.features.index:
            setattr(self, i, self.features[i])
    def __str__(self):
        return f"Protein with predicted melting point of {str(self.SPARC).strip('[]')} °C"
    def mutate(self,pqr_output_path='./data/pqrs',iterations = 100, threshhold = 1000):
        from function_mut import prot_mut
        output_mut = prot_mut(pdb_path = './data/pdbs', pdb_file=self.PDB,pqr_output_path=pqr_output_path,iterations = iterations, threshhold = threshhold)
        self.mutatedmp = output_mut[0][1][0][0]
        self.mutatedseq = output_mut[1][0]
        self.mutatedmpdiff = self.mutatedmp - self.SPARC
        mutationlist = []
        for n in range(len(self.mutatedseq)):
            if self.mutatedseq[n] != self.sequence[n]:
                mutationlist.append(f'{self.sequence[n]}{n+1}{self.mutatedseq[n]}')
        self.mutationlist = mutationlist
        return [self.mutatedmp, self.mutatedseq, self.mutatedmpdiff, self.mutationlist]
    def mutreduce(self,name = 'proteinxyz'):
        from function_mut import mutation_decreaser
        outputdecreaser = mutation_decreaser(mut_temp = self.mutatedmp, wt_temp = self.SPARC, wt_protein = self.sequence, mut_protein = self.mutatedseq, name = name)
        self.mut_mp_reduced = outputdecreaser[2]
        self.mut_seq_reduced = outputdecreaser[0]
        mutationlist_reduced = []
        for n in range(len(self.mut_seq_reduced)):
            if self.mut_seq_reduced[n] != self.sequence[n]:
                mutationlist_reduced.append(f'{self.sequence[n]}{n+1}{self.mut_seq_reduced[n]}')
        self.mutationlist_reduced = mutationlist_reduced
        removed_mutations = []
        for n in range(len(self.mutatedseq)):
            if self.mutatedseq[n] != self.mut_seq_reduced[n]:
                removed_mutations.append(f'{self.mutatedseq[n]}{n+1}')
        self.removed_mutations = removed_mutations
        return [self.mut_mp_reduced, self.mut_seq_reduced, self.mutationlist_reduced]

In [9]:
qwer = Protein(seq = 'MSKIRVSLLGSTGMVGQKMVRLLENHPYIELAKVSASPNNTGKRYIDAVRWVENSEIPEYVSDMNLVSSDPNDHRDVDFVLSALPSEIAEGIETRLVSNGINVISNASPLRMRSDIPLINPEINYEHLYMLEDRDTKYVKNPNCTTTIMSMPLFDIINSDYERMYLTTMQAVSGAGFSGLPYMAINNNIIPYINGEEEKIPAEISKIFGYRNDDKIVNRNIKMSVTTVRVPVAVDHAGVLYINIKNFDIENFIKDIRNFKPLSRFSGLTMAPRQPIIIHEKNDAPQVHDVSGMEIHIGRLSYNDDTLRMYILGDNLIRGAAGITLLTLELMHAMKLDN', PDB = 'AF-Q6KZA0-F1.pdb')

AttributeError: partially initialized module 'pandas' has no attribute '_pandas_parser_CAPI' (most likely due to a circular import)

In [None]:
qwer.mutate(iterations=1,threshhold=1)

Random mutation finished


[74.81022816078658,
 ['M',
  'S',
  'K',
  'I',
  'R',
  'V',
  'S',
  'L',
  'L',
  'G',
  'S',
  'T',
  'G',
  'M',
  'V',
  'G',
  'Q',
  'K',
  'M',
  'V',
  'R',
  'L',
  'L',
  'E',
  'N',
  'H',
  'P',
  'Y',
  'I',
  'E',
  'L',
  'A',
  'K',
  'V',
  'S',
  'A',
  'S',
  'P',
  'N',
  'N',
  'T',
  'G',
  'K',
  'R',
  'Y',
  'I',
  'D',
  'A',
  'V',
  'R',
  'W',
  'V',
  'E',
  'N',
  'S',
  'E',
  'I',
  'P',
  'E',
  'Y',
  'V',
  'S',
  'D',
  'M',
  'N',
  'L',
  'V',
  'S',
  'S',
  'D',
  'P',
  'N',
  'D',
  'H',
  'R',
  'D',
  'V',
  'D',
  'F',
  'V',
  'L',
  'S',
  'A',
  'L',
  'P',
  'S',
  'E',
  'I',
  'A',
  'E',
  'G',
  'I',
  'E',
  'T',
  'R',
  'L',
  'V',
  'S',
  'N',
  'G',
  'I',
  'N',
  'V',
  'I',
  'S',
  'N',
  'A',
  'S',
  'P',
  'L',
  'R',
  'M',
  'R',
  'S',
  'D',
  'I',
  'P',
  'L',
  'I',
  'N',
  'P',
  'E',
  'I',
  'N',
  'Y',
  'E',
  'H',
  'L',
  'Y',
  'M',
  'L',
  'E',
  'D',
  'R',
  'D',
  'T',
  'K',
  'Y',
  'V',
  'K',


In [None]:
print(qwer.SPARC)
print(qwer.mutatedmp)

49.3233842458866
49.3233842458866


In [None]:
class test:
    test3 = None
    def __init__(self,name):
        self.label = name
    def __str__(self):
        return f'{self}'
    def tzui(self,test4):
        self.test3 = test4

In [None]:
qwert = test('qwer')
qwert.label
print(qwert.test3)

None


In [None]:
from function_mut import mutation_decreaser

In [None]:
print(qwer.sequence)

MSKIRVSLLGSTGMVGQKMVRLLENHPYIELAKVSASPNNTGKRYIDAVRWVENSEIPEYVSDMNLVSSDPNDHRDVDFVLSALPSEIAEGIETRLVSNGINVISNASPLRMRSDIPLINPEINYEHLYMLEDRDTKYVKNPNCTTTIMSMPLFDIINSDYERMYLTTMQAVSGAGFSGLPYMAINNNIIPYINGEEEKIPAEISKIFGYRNDDKIVNRNIKMSVTTVRVPVAVDHAGVLYINIKNFDIENFIKDIRNFKPLSRFSGLTMAPRQPIIIHEKNDAPQVHDVSGMEIHIGRLSYNDDTLRMYILGDNLIRGAAGITLLTLELMHAMKLDN


In [None]:
qwer.mutatedseq = 'MQVLAKENIKLNQTVSSKEEAIKLAGQTLQWERTVTEDYISKMFEREETSSTFMGNFIAIPHGTEEAKSEVLHSGISIIQIPEGVEYGEGNTAKVVFGIAGKNNEHLDILSNIAIICSEEENIERLISAKSEEDLIAIFNEVN'

In [None]:
len(qwer.mutatedseq)

143

In [None]:
yxc = mutation_decreaser(mut_temp = qwer.mutatedmp, wt_temp = qwer.SPARC, wt_protein = qwer.sequence, mut_protein = qwer.mutatedseq, name = 'proteinxy')

In [None]:
yxc[2]

49.730312839077804

In [None]:
from function_mut import prot_mut

In [None]:
os.getcwd()

'c:\\Users\\tobia\\OneDrive\\Documents\\Uni\\FS 4\\Bioinfo Projekt\\topic04_02'

In [None]:
lkj = prot_mut(pdb_path = './data/pdbs',pdb_file = 'AF-C0H3V2-F1.pdb', pqr_output_path='./data/pdbs',iterations=1,threshhold=1)

Pqr file already exists
fasta file already exists
fas file already exists
Random mutation finished


In [None]:
lkj[0][1][0]

array([55.84154306])

In [None]:
testl = []
teststring1 = 'QWERTZUIOPQWERIOPQOKASNDKOWJNFONAOSFBAOSBDNJAWOFJOWAJNBOFDBWAOAJSFBASJBDOFJAWOAJDSBAOSJBDOJBWOQUBOBDWQJFWQNFCWQOWQBFGOJWQBFOWJBNDnbwqodbuNDBOWQbOBQOWJDBOQWBDOBQOWBDOQWBDOBWQODBWQOBDOQBWDOJNWQPDKMQWPNPDKNWQPDNPIWQNDPNWQPNDPWQBDBPQWIBDPIBWQQWERTZUIOPQWERIOPQOKASNDKOWJNFONAOSFBAOSBDNJAWOFJOWAJNBOFDBWAOAJSFBASJBDOFJAWOAJDSBAOSJBDOJBWOQUBOBDWQJFWQNFCWQOWQBFGOJWQBFOWJBNDnbwqodbuNDBOWQbOBQOWJDBOQWBDOBQOWBDOQWBDOBWQODBWQOBDOQBWDOJNWQPDKMQWPNPDKNWQPDNPIWQNDPNWQPNDPWQBDBPQWIBDPIBWQQWERTZUIOPQWERIOPQOKASNDKOWJNFONAOSFBAOSBDNJAWOFJOWAJNBOFDBWAOAJSFBASJBDOFJAWOAJDSBAOSJBDOJBWOQUBOBDWQJFWQNFCWQOWQBFGOJWQBFOWJBNDnbwqodbuNDBOWQbOBQOWJDBOQWBDOBQOWBDOQWBDOBWQODBWQOBDOQBWDOJNWQPDKMQWPNPDKNWQPDNPIWQNDPNWQPNDPWQBDBPQWIBDPIBWQQWERTZUIOPQWERIOPQOKASNDKOWJNFONAOSFBAOSBDNJAWOFJOWAJNBOFDBWAOAJSFBASJBDOFJAWOAJDSBAOSJBDOJBWOQUBOBDWQJFWQNFCWQOWQBFGOJWQBFOWJBNDnbwqodbuNDBOWQbOBQOWJDBOQWBDOBQOWBDOQWBDOBWQODBWQOBDOQBWDOJNWQPDKMQWPNPDKNWQPDNPIWQNDPNWQPNDPWQBDBPQWIBDPIBWQ'
teststring2 = 'QWERTZUIOPQWERIOPQOKASNDKOWJNFONAOSFBAOSBDNJAWOFJOWAJNROFDBWAOAJSFBASJBDOFJAWOAJDSBAOSJBDOJBWOQUBOBDWQJFWQNFCWQOWQBFGOJWQBFOWJBNDnbwqodbuNDBOWQbOBQOWJDBOQWBDOBQOWBDOQWBDOBWQODBWQOBDOQBWDOJNWQPDKMQWPNPDKNWQPDNPIWQNDPNWQPNDPWQBDBPQWIBDPIBWQQWRRTZUIOPQWERIOPQOKASNDKOWJNFONAOSFBAOSBDNJAWOFJOWAJNBOFDBWAOAJSFBASJBDOFJAWOAJDSBAOSJBDOJBWOQUBOBDWQJFWQNFCWQOWQBFGOJWQRFOWJBNDnbwqodbuNDBOWQbOBQOWJDBOQWBDOBQOWBDOQWBDOBWQODBWQOBDOQBWDOJNWQPDKMQRPNPDKNWQPDNPIWQNDPNWQPNDPWQBDBPQWIBDPIBWQQWERTZUIOPQWERIOPQOKASNDKOWJNFONAOSFBAOSBDNJAWOFJOWAJNBOFDBWAOAJSFBASJBDOFJAWOAJDSBAOSJBDOJBWOQUBOBDWQJFWQNFCWQOWQBFGOJWQBFOWJBNDnbwqodbuNDBOWQbOBQOWJDBOQWBDOBQOWBDOQWBDOBWQODBWQOBDOQBWDOJNWQPDKMQWPNPDKNWQPDNPIWQNDPNWQPNDPWQBDBPQWIBDPIBWQQWERTZUIOPQWERIOPQOKASNDKOWJNFONAOSFBAOSBDNJAWOFJOWAJNBOFDBWAOAJSFBASJBDOFJAWOAJDSBAOSJBDOJBWOQUBOBDWQJFWQNFCWQOWQBFGOJWQBFOWJBNDRbwqodbuNDBOWQbOBQOWJDBOQWBDOBQOWBDOQWBDOBWQODRWQOBDOQBWDOJNWQPDKRQWPNPDKNWQPDNPIWQNDRRRRRRDPWQBDBPQWIBDPIBWQ'
for n in range(len(teststring1)):
            if teststring1[n] != teststring2[n]:
                testl.append(f'{teststring1[n]}{n+1}{teststring2[n]}')
testl

['B55R',
 'E241R',
 'B360R',
 'W435R',
 'n844R',
 'B890R',
 'M909R',
 'P929R',
 'N930R',
 'W931R',
 'Q932R',
 'P933R',
 'N934R']

In [None]:
from function_mut import functional_aa

In [None]:
ghj = functional_aa('./data/pdbs','AF-C0H3V2-F1.pdb','./data/pqrs')

Pqr file already exists


In [None]:
ghj #column 2 ist aminosäurenzahl

array([['C0H3V2', 'MET', '1', '18', 'Cluster 0'],
       ['C0H3V2', 'GLN', '2', '20', 'Cluster 0'],
       ['C0H3V2', 'GLN', '2', '22', 'Cluster 0'],
       ['C0H3V2', 'GLN', '2', '25', 'Cluster 0'],
       ['C0H3V2', 'GLN', '2', '27', 'Cluster 0'],
       ['C0H3V2', 'GLN', '2', '29', 'Cluster 0'],
       ['C0H3V2', 'VAL', '3', '46', 'Salt_bridge'],
       ['C0H3V2', 'LEU', '4', '64', 'Cluster 0'],
       ['C0H3V2', 'LEU', '4', '66', 'Cluster 0'],
       ['C0H3V2', 'LEU', '4', '68', 'Cluster 0'],
       ['C0H3V2', 'ALA', '5', '81', 'Cluster 0'],
       ['C0H3V2', 'LYS', '6', '83', 'Cluster 0'],
       ['C0H3V2', 'LYS', '6', '85', 'Cluster 0'],
       ['C0H3V2', 'LYS', '6', '90', "['Hbond_don', 'Hbond_don']"],
       ['C0H3V2', 'GLU', '7', '113', 'Cluster 0'],
       ['C0H3V2', 'GLU', '7', '115', 'Cluster 0'],
       ['C0H3V2', 'GLU', '7', '117', 'Cluster 0'],
       ['C0H3V2', 'ILE', '9', '140', 'Salt_bridge'],
       ['C0H3V2', 'LYS', '10', '164', 'Cluster 1'],
       ['C0H3V2', 'LYS'

In [None]:
ghj[ghj[:,4]=='Salt_bridge']

array([['C0H3V2', 'VAL', '3', '46', 'Salt_bridge'],
       ['C0H3V2', 'ILE', '9', '140', 'Salt_bridge'],
       ['C0H3V2', 'GLU', '20', '327', 'Salt_bridge'],
       ['C0H3V2', 'TYR', '34', '533', 'Salt_bridge'],
       ['C0H3V2', 'PRO', '61', '962', 'Salt_bridge'],
       ['C0H3V2', 'PRO', '61', '963', 'Salt_bridge']], dtype='<U32')

In [3]:
test123 = Protein('RQNPWLTFSQSPVTVDGKVEKLKQFVKVTETGSKKVVTVTGIVFSASIDRPYYSFTVNPSSNPTSDRVYGVDITFLHSDHLIRNQWSPHGNLVRIPMEKAGNNGDPLPNKSDQYLGNGMNSDKEILGGASGYSHPTLVTKKGGGSGGGSGGGGGGSLGGGGGGGSGGGLSGGGGGGGGSGGGGGGGRTPSFGGGLRQNPWLTFSQSPVTVDGKVEKLKQFVKVTETGSKKVVTVTGIVFSASIDRPYYSFTVNPSSNPTSDRVYGVDITFLHSDHLIRNQWSPHGNLVRIPMEKAGNNGDPLPNKSDQYLGNGMNSDKEILGGASGYSHPTLVTKKGGGSGGGSGGGGGGSLGGGGGGGSGGGLSGGGGGGGGSGGGGGGGRTPSFGGGLRQNPWLTFSQSPVTVDGKVEKLKQFVKVTETGSKKVVTVTGIVFSASIDRPYYSFTVNPSSNPTSDRVYGVDITFLHSDHLIRNQWSPHGNLVRIPMEKAGNNGDPLPNKSDQYLGNGMNSDKEILGGASGYSHPTLVTKKGGGSGGGSGGGGGGSLGGGGGGGSGGGLSGGGGGGGGSGGGGGGGRTPSFGGGLRQNPWLTFSQSPVTVDGKVEKLKQFVKVTETGSKKVVTVTGIVFSASIDRPYYSFTVNPSSNPTSDRVYGVDITFLHSDHLIRNQWSPHGNLVRIPMEKAGNNGDPLPNKSDQYLGNGMNSDKEILGGASGYSHPTLVTKKGGGSGGGSGGGGGGSLGGGGGGGSGGGLSGGGGGGGGSGGGGGGGRTPSFGGGLRQNPWLTFSQSPVTVDGKVEKLKQFVKVTETGSKKVVTVTGIVFSASIDRPYYSFTVNPSSNPTSDRVYGVDITFLHSDHLIRNQWSPHGNLVRIPMEKAGNNGDPLPNKSDQYLGNGMNSDKEILGGASGYSHPTLVTKKGGGSGGGSGGGGGGSLGGGGGGGSGGGLSGGGGGGGGSGGGGGGGRTPSFGGGLRQNPWLTFSQSPVTVDGKVEKLKQFVKVTETGSKKVVTVTGIVFSASIDRPYYSFTVNPSSNPTSDRVYGVDITFLHSDHLIRNQWSPHGNLVRIPMEKAGNNGDPLPNKSDQYLGNGMNSDKEILGGASGYSHPTLVTKKGGGSGGGSGGGGGGSLGGGGGGGSGGGLSGGGGGGGGSGGGGGGGRTPSFGGGLRQNPWLTFSQSPVTVDGKVEKLKQFVKVTETGSKKVVTVTGIVFSASIDRPYYSFTVNPSSNPTSDRVYGVDITFLHSDHLIRNQWSPHGNLVRIPMEKAGNNGDPLPNKSDQYLGNGMNSDKEILGGASGYSHPTLVTKKGGGSGGGSGGGGGGSLGGGGGGGSGGGLSGGGGGGGGSGGGGGGGRTPSFGGGLRQNPWLTFSQSPVTVDGKVEKLKQFVKVTETGSKKVVTVTGIVFSASIDRPYYSFTVNPSSNPTSDRVYGVDITFLHSDHLIRNQWSPHGNLVRIPMEKAGNNGDPLPNKSDQYLGNGMNSDKEILGGASGYSHPTLVTKKGGGSGGGSGGGGGGSLGGGGGGGSGGGLSGGGGGGGGSGGGGGGGRTPSFGGGLRQNPWLTFSQSPVTVDGKVEKLKQFVKVTETGSKKVVTVTGIVFSASIDRPYYSFTVNPSSNPTSDRVYGVDITFLHSDHLIRNQWSPHGNLVRIPMEKAGNNGDPLPNKSDQYLGNGMNSDKEILGGASGYSHPTLVTKKGGGSGGGSGGGGGGSLGGGGGGGSGGGLSGGGGGGGGSGGGGGGGRTPSFGGGLRQNPWLTFSQSPVTVDGKVEKLKQFVKVTETGSKKVVTVTGIVFSASIDRPYYSFTVNPSSNPTSDRVYGVDITFLHSDHLIRNQWSPHGNLVRIPMEKAGNNGDPLPNKSDQYLGNGMNSDKEILGGASGYSHPTLVTKKGGGSGGGSGGGGGGSLGGGGGGGSGGGLSGGGGGGGGSGGGGGGGRTPSFGGGL')

  data = np.loadtxt(faspath, dtype={'names': ('index', 'col1', 'col2', 'val1', 'val2', 'val3'),'formats': ('i4', 'S1', 'S1', 'f4', 'f4', 'f4')})
