In [1]:
import csv

# 5UDC

In [8]:
# This script calls 3 functions to manipulate a SEQRES: 1) delete lines 2) fill SEQRES lines that are not full (13 residues)
# 3) reindex SEQRES lines so that indexes are consecutive
# Output: PDB file with desired SEQRES sequence removed

# Set parameters
seqres_records = [8, 9, 10, 11]
seqres_chains = ['F', 'A', 'D']
seqres_numres = 568-38
offset_start = 7 # first residue of segment to delete -- starts from first residue (index 0) of first seqres_record
offset_end = 6 # one after the last residue to include in segment to delete -- in the last seqres_record

with open("../data/5udc/5udc_noloop_noseqres.pdb", "w") as output:
    writer = csv.writer(output, delimiter=' ')
    with open("../data/5udc/5udc_noloop.pdb", 'r') as f:
        reader = csv.reader(f, delimiter= ' ')
        lines = [] 
        for line in reader:
            lines.append(line)
        
        # Call delete_lines()
        lines_deleted = delete_lines(lines, seqres_records, seqres_chains, seqres_numres, offset_start, offset_end)
        
        # Call fill_lines()
        lines_filled = fill_lines(lines_deleted)
        
        # Call reindex_lines()
        lines_reindexed = reindex_lines(lines_filled)

        for line in lines_reindexed:
            writer.writerow(line)


# 4JHW

In [5]:

seqres_records = [6, 7]
seqres_chains = ['F']
seqres_numres = 498-11
offset_start = 8 # first residue of segment to delete -- starts from first residue (index 0) of first seqres_record
offset_end = 6 # one after the last residue to include in segment to delete -- in the last seqres_record

with open("../data/4jhw/4jhw_noloop_noseqres.pdb", "w") as output:
    writer = csv.writer(output, delimiter=' ')
    with open("../data/4jhw/4jhw_noloop.pdb", 'r') as f:
        reader = csv.reader(f, delimiter= ' ')
        lines = [] 
        for line in reader:
            lines.append(line)
        
        # Call delete_lines()
        lines_deleted = delete_lines(lines, seqres_records, seqres_chains, seqres_numres, offset_start, offset_end)
        
        # Call fill_lines()
        lines_filled = fill_lines(lines_deleted)
        
        # Call reindex_lines()
        lines_reindexed = reindex_lines(lines_filled)

        for line in lines_reindexed:
            writer.writerow(line)


# Functions

In [2]:
# Description: Deletes residues as specified by SEQRES line indices, chain, and residue indices
# 
# lines : list of lines read in from file
# seqres_records : list of SEQRES line index(es) to remove
# seqres_chains : list of chain(s) from which to remove SEQRES sequences
# seqres_numres : (int) number of residues in SEQRES chain after removing desired sequence
# offset_start : (int) index of first residue in first seqres_record to delete, starts from index 0
# offset_end : (int) index of one after the last residue (in the last seqres_record) to include in segment to delete
def delete_lines(lines, seqres_records, seqres_chains, seqres_numres, offset_start, offset_end):
    lines_deleted = []
    for line in lines:
        record_name = line[0]
        if record_name == "SEQRES":
            temp_line = [element for element in line if element != '']
            record_number = int(temp_line[1])
            chain = temp_line[2]
            if chain in seqres_chains:
                # Edit numRes in SEQRES record
                if line[1] == '' and line[2] == '':
                    line[6] = str(seqres_numres)
                else:
                    line[5] = str(seqres_numres)

                # Delete residues in specific seqres_records for seqres_chains
                if record_number in seqres_records:
                    if record_number == min(seqres_records):
                        line = line[:8+offset_start] if line[1] == '' and line[2] == '' else line[:7+offset_start]
                    elif record_number == max(seqres_records):
                        line = line[:8] + line[8+offset_end:] if line[1] == '' and line[2] == '' else line[:7] + line[7+offset_end:]
                    else:
                        # Do not append line
                        continue
        lines_deleted.append(line)
    return lines_deleted

In [3]:
# Description: Fills in incomplete SEQRES lines after deletion of residues. E.g. if one line no only has 5 residues,
# This function adds 8 residues of the following line to complete the line.  
# 
# lines : list of lines outputted by delete_lines()

def fill_lines(lines_deleted):
    lines = []
    previous_line = lines_deleted[0]
    for line in lines_deleted:
        record_name = line[0]
        if record_name == "SEQRES":
           
            previous_line_residues = [res for res in previous_line[8:] if res != ''] if previous_line[1] == '' and previous_line[2] == '' else [res for res in previous_line[7:] if res != '']
            previous_line_chain = previous_line[4] if previous_line[1] == '' and previous_line[2] == '' else previous_line[3]
            previous_line_missing_residues = 13 - len(previous_line_residues)
            
            current_line_residues = [res for res in line[8:] if res != ''] if line[1] == '' and line[2] == '' else [res for res in line[7:] if res != '']
            current_line_chain = line[4] if line[1] == '' and line[2] == '' else line[3]
            
            # Check if there are missing residues and if previous chain matches the current chain
            # --> fill previous line with current residues
            if previous_line_missing_residues > 0 and previous_line_chain == current_line_chain:
                # If there aren't enough residues to completely fill the previous line, don't update previous line or append current line
                if previous_line_missing_residues > len(current_line_residues):
                    previous_line = previous_line + current_line_residues
                    # Remove the previous line from lines and add the updated version of the previous line
                    lines = lines[:-1]
                    lines.append(previous_line)
                    continue
                else:
                    previous_line = previous_line + current_line_residues[:previous_line_missing_residues]
                     # Remove the previous line from lines and add the updated version of the previous line
                    lines = lines[:-1]
                    lines.append(previous_line)
                    # Update current line with residues added to previous line removed                    
                    line = line[:8] if line[1] == '' and line[2] == '' else line[:7]
                    line += current_line_residues[previous_line_missing_residues:]
                   
                    
        previous_line = line
        lines.append(line)
    return lines
            

In [7]:
# Description: Re-index SEQRES lines after filling in incomplete SEQRES lines. E.g. if one line has index 9 and the
# next line has index 12, this function sets the second line as index 10 and all subsequent line indexes in consecutive order
# 
# lines : list of lines outputted by fill_lines()

def reindex_lines(lines_filled):
    lines = []
    previous_line = lines_deleted[0]
    for line in lines_filled:
        record_name = line[0]
        if record_name == "SEQRES":
            previous_line_chain = previous_line[4] if previous_line[1] == '' and previous_line[2] == '' else previous_line[3]
            current_line_chain = line[4] if line[1] == '' and line[2] == '' else line[3]
            if previous_line_chain == current_line_chain:
                previous_line_index = int(previous_line[3]) if previous_line[1] == '' and previous_line[2] == '' else int(previous_line[2])
                current_line_index = int(line[3]) if line[1] == '' and line[2] == '' else int(line[2])
                if current_line_index != previous_line_index + 1:
                    if line[1] == '' and line[2] == '':
                        if previous_line_index + 1 < 10:
                            line[3] = str(previous_line_index + 1)
                        else:
                            line[2] = str(previous_line_index + 1)
                            line = line[0:3] + line[4:]
                    else:
                        if previous_line_index + 1 < 10:
                            line[2] = ''
                            line = line[:3] + [str(previous_line_index + 1)] + line[3:]
                        else:
                            line[2] = str(previous_line_index + 1)
        previous_line = line
        lines.append(line)
    return lines
            
            