In [1]:
# Parses FASTA file using BioPython Library

from Bio import SeqIO

for seq_record in SeqIO.parse("6ezq.fasta.txt", "fasta"):
    print((seq_record.id))
    print((repr(seq_record.seq)))
    print((len(seq_record)))

6EZQ:A|PDBID|CHAIN|SEQUENCE
Seq('DAHKSEVAHRFKDLGEENFKALVLIAFAQYLQQCPFEDHVKLVNEVTEFAKTCV...LGL', SingleLetterAlphabet())
585


In [2]:
from Bio.Alphabet import IUPAC
from Bio.Seq import Seq

my_seq = seq_record.seq
# for index, letter in enumerate(my_seq):
#     print("%i %s" % (index, letter))

In [3]:
from Bio.Seq import Seq
from Bio import Alphabet
from Bio.Alphabet import Reduced
my_protein = seq_record.seq
# Used BioPython hp conversion function from Library. Tested with the
# Amino Acid Properties to be the same classification

new_protein = Seq('', Alphabet.Reduced.HPModel())

for aa in my_protein:
    new_protein += Alphabet.Reduced.hp_model_tab[aa]


new_protein

Seq('PPPPPPHPPPHPPHPPPPHPPHHHHPHPPHHPPHPHPPPHPHHPPHPPHPPPHH...HPH', HPModel())

In [4]:
import sys

EMPTY = ' '  # This represents an empty character in the grid
best_grids = []  # This is a list of the grids that contain the best foldings

#---Main function definition---#


def main():

    #--Local variable initialization--#
    current_element_idx = 0  # Index of the current element in protein
    protein = 'HPPPPPPHH'  # Protein we are folding
    # protein = "HHPPHHHPHHPH" # Protein we are folding
    current_grid = []  # Grid in which we are currently folding
    current_num_H_bonds = 0
    max_num_H_bonds = 0  # Counters
    print("Hello protein folders, beginning program")  # Print a greeting

    #-Fill the grid with empty characters-#
    # For each row of the grid
    for row in range(len(protein) * 2 - 1):

        # Create a new empty list for that row
        current_grid.append([])

        # For each column of the grid
        for col in range(len(protein) * 2 - 1):

            # Add an empty character at the given row/column
            current_grid[row].append(EMPTY)

    # Start the first protein element in the middle of the grid
    current_row = current_col = int((len(protein) * 2 - 1) / 2)

    # Recursively find the best foldings, filling best_grid
    print(("Best number of H-H bonds",
           fold(protein, max_num_H_bonds, current_element_idx, current_grid,
                current_row, current_col, ' ', current_num_H_bonds)))

    #-Print the best grids-#
    # For each of the best grids
    for grid in range(len(best_grids)):

        # Print the index of the grid
        print("Grid ", grid, ":")

        # For each row in the grid
        for row in range(len(protein) * 2 - 1):

            # For each column in the grid
            for col in range(len(protein) * 2 - 1):

                # Print the character at the given row and
                # column
                print(best_grids[grid][row][col], end=' ')

            # Print a line in between the rows
            print('')


#---Fold function definition---#


def fold(protein, max_num_H_bonds, current_element_idx, current_grid,
         current_row, current_col, direction, current_num_H_bonds):

    # Determine the new current row and column based on the current
    # direction
    if direction == 'R':
        current_col += 1
    elif direction == 'D':
        current_row += 1
    elif direction == 'L':
        current_col -= 1
    elif direction == 'U':
        current_row -= 1

    # If we are able to place an element at the current row and column
    if current_grid[current_row][current_col] == EMPTY:

        #-Make a copy of the current grid before we change it-#
        new_grid = []
        for row in range(len(protein) * 2 - 1):
            new_grid.append([])
            for col in range(len(protein) * 2 - 1):
                new_grid[row].append(current_grid[row][col])

        # Place the protein in the new grid
        new_grid[current_row][current_col] = protein[current_element_idx]

        # Check for H-H bonds in the current fold
        if protein[current_element_idx] == 'H':

            # Check to the left
            if current_col > 0 and new_grid[current_row][current_col -
                                                         1] == 'H':
                current_num_H_bonds += 1

            # Check above
            if current_row > 0 and new_grid[current_row -
                                            1][current_col] == 'H':
                current_num_H_bonds += 1

            # Check to the right
            if current_col < len(new_grid[current_row]) - 1 and new_grid[
                    current_row][current_col + 1] == 'H':
                current_num_H_bonds += 1

            # Check below
            if current_row < len(new_grid) - 1 and new_grid[
                    current_row + 1][current_col] == 'H':
                current_num_H_bonds += 1

        # Move on to the next element index
        current_element_idx += 1

        # If not end of string, choose each direction and recur
        if current_element_idx != len(protein):
            for direction in ['R', 'D', 'L', 'U']:
                max_num_H_bonds = fold(protein, max_num_H_bonds,
                                       current_element_idx, new_grid,
                                       current_row, current_col, direction,
                                       current_num_H_bonds)

        else:
            # If end of string, check if the current fold has more
            # H-H bonds than the max we found before.
            # if true update the max.  If we have the same number of
            # H-H bonds, append the current grid to the list of best
            # grids
            if current_num_H_bonds > max_num_H_bonds:
                max_num_H_bonds = current_num_H_bonds
                del best_grids[:]
                best_grids.append(new_grid)
            elif current_num_H_bonds == max_num_H_bonds:
                best_grids.append(new_grid)

    # Return the count of the maximum number of H-H bonds
    return max_num_H_bonds


##---Call main function-----##
main()

Hello protein folders, beginning program
('Best number of H-H bonds', 2)
Grid  0 :
                                  
                                  
                                  
                                  
                                  
                                  
                                  
                                  
                H P P P           
                H P P P           
                H                 
                                  
                                  
                                  
                                  
                                  
                                  
Grid  1 :
                                  
                                  
                                  
                                  
                                  
                                  
                                  
                                  
                H P P P         

                                  
                                  
                                  
                                  
                                  
                                  
                                  
Grid  18 :
                                  
                                  
                                  
                                  
                                  
                                  
                                  
                                  
                H P               
              H H P P             
                P P P             
                                  
                                  
                                  
                                  
                                  
                                  
Grid  19 :
                                  
                                  
                                  
                                 

                                  
                                  
                                  
                                  
                                  
                  P P             
                H P P             
              H H P P             
                                  
                                  
                                  
                                  
                                  
                                  
                                  
Grid  33 :
                                  
                                  
                                  
                                  
                                  
                                  
                P P P             
              H H P P             
                H P               
                                  
                                  
                                  
                                  
         

                                  
                                  
                                  
                                  
                                  
Grid  49 :
                                  
                                  
                                  
                                  
                                  
                                  
                                  
                  H               
                H H P             
                P P P             
                  P P             
                                  
                                  
                                  
                                  
                                  
                                  
Grid  50 :
                                  
                                  
                                  
                                  
                                  
                                 

                                  
                                  
                                  
                                  
                                  
Grid  68 :
                                  
                                  
                                  
                                  
                                  
                                  
                                  
                                  
            H H H                 
            P P P                 
            P P P                 
                                  
                                  
                                  
                                  
                                  
                                  
Grid  69 :
                                  
                                  
                                  
                                  
                                  
                                 

                                  
                                  
                                  
                                  
              P P                 
              P H H               
              P H                 
              P P                 
                                  
                                  
                                  
                                  
                                  
                                  
                                  
Grid  88 :
                                  
                                  
                                  
                                  
                                  
                                  
                                  
                  H               
              P H H P             
              P P P P             
                                  
                                  
                                  
         

                                  
            P P H                 
            P H H                 
            P P P                 
                                  
                                  
                                  
                                  
                                  
                                  
Grid  106 :
                                  
                                  
                                  
                                  
                                  
                                  
                                  
                                  
            P P H                 
            P P H H               
            P P                   
                                  
                                  
                                  
                                  
                                  
                                  
Grid  107 :
                               

                                  
            P P                   
            P P H                 
            P P H H               
                                  
                                  
                                  
                                  
                                  
                                  
                                  
Grid  125 :
                                  
                                  
                                  
                                  
                                  
                                  
                                  
            P P                   
            P P H                 
            P P H                 
                H                 
                                  
                                  
                                  
                                  
                                  
                                  
Grid  12

              P P P               
                                  
                                  
                                  
                                  
                                  
                                  
                                  
Grid  141 :
                                  
                                  
                                  
                                  
                                  
                                  
                                  
              H P P               
              H H P               
              P P P               
                                  
                                  
                                  
                                  
                                  
                                  
                                  
Grid  142 :
                                  
                                  
                               

                                  
                                  
                                  
                                  
                                  
                P P P             
                P   P             
                H H P             
                  H               
                                  
                                  
                                  
                                  
                                  
                                  
                                  
Grid  159 :
                                  
                                  
                                  
                                  
                                  
                                  
                P P P             
                P H P             
                H H P             
                                  
                                  
                                  
        

                                  
                                  
                                  
                                  
                                  
