### Loading GEM-PRO dataframe

In [2]:
import pandas as pd

Let's try to do a case study with ARGSS...its uniprot ID is P00966

In [3]:
GP = pd.read_csv('DF_GEMPRO.csv', index_col=0)
# forcing gene IDs to be read as strings
GP['m_gene_original'] = GP['m_gene_original'].astype(str)
GP['m_gene_entrez'] = GP['m_gene_entrez'].astype(str)
GP['m_gene_isoform'] = GP['m_gene_isoform'].astype(str)
# this searches for an ID and prints out which row it is in
gene_id = raw_input("What is the uniprot ID?   ") # this can be modified to ask for gene original, entrez, uniprot, isoform id, refseq etc.


GP[GP.u_uniprot_acc == gene_id.upper()]

What is the uniprot ID?   p00966


Unnamed: 0,m_gene_original,m_gene_entrez,m_gene_isoform,u_uniprot_acc,u_isoform_id,u_refseq,u_ensp,u_seq_len,u_seq,u_reviewed,...,ssb_p_aln_coverage,ssb_p_percent_seq_ident,ssb_p_no_deletions_in_pdb,ssb_p_aln_coverage_sim,ssb_si_score,ssb_rez_score,ssb_raw_score,ssb_above_cutoffs,ssb_rank,ssb_best_file
3049,445.1,445,1,P00966,P00966-1,NP_446464,ENSP00000253004,412,MSSKGSVVLAYSGGLDTSCILVWLKEQGYDVIAYLANIGQKEDFEE...,True,...,402,0.975728,True,402,1.565458,1.066667,2.632125,True,1,2nz2.pdb


In [4]:
gene_original_id = raw_input("What is the gene original ID?   ")

What is the gene original ID?   445.1


In [5]:
# This extracts all chains present
chains_avail = GP[GP.m_gene_original == gene_original_id].p_chains
chains_present = ""
for a in chains_avail:
    chains_present = a
    
#print "These chains are present in the pdb structure: %s" %(chain_strings)
#pdb_chain_choose = raw_input("Which chain are you interested in?   ")

In [6]:
# This automatically displays/chooses which chain to align as it is the "best"; a string of A,B,C is returned
best_pdb_chain = GP[GP.m_gene_original == gene_original_id].p_chain_uniprot_map.values[0][2]

## Outputs gene of interest, most importantly identifies the best pdb structure. As can be seen, the pdb file is 2nz2

#### download the pdb file and get its sequence

In [7]:
# load Biopython PDB packages

# PDBList to download PDBs
from Bio.PDB.PDBList import PDBList
pdbl = PDBList()

# PDBParser to load and work with files
from Bio.PDB.PDBParser import PDBParser
parser = PDBParser()

import urllib2
import uuid

pdb_name = raw_input("What is the pdb ID?   ")

# download pdb
pdb_file_path = pdbl.retrieve_pdb_file(pdb_name)

What is the pdb ID?   2nz2
Structure exists: '/Users/LAURENCE/Desktop/Senior Design/Untitled Folder/nz/pdb2nz2.ent' 


In [8]:
pdb_structure = parser.get_structure('ARGSS', pdb_file_path)

In [9]:
# get the ligands within this file for display
# from: http://stackoverflow.com/questions/25718201/remove-heteroatoms-from-pdb
ligands = []

for residue in pdb_structure.get_residues():
    tags = residue.get_full_id()
    # tags contains a tuple with (Structure ID, Model ID, Chain ID, (Residue ID))
    # Residue ID is a tuple with (*Hetero Field*, Residue ID, Insertion Code)

    # Thus you're interested in the Hetero Field, that is empty if the residue
    # is not a hetero atom or have some flag if it is (W for waters, H, etc.)
    if tags[3][0] != " " and tags[3][0] != "W":
        ligands.append(tags[3][0].split('_')[1].strip())
    else:
        continue
        
print(ligands)

['NA', 'ASP', 'CIR']


In [10]:
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord
from Bio.Alphabet import IUPAC
from Bio.PDB import Polypeptide

In [11]:
def get_pdb_seq(structure):
    '''
    Takes in a Biopython structure object and returns a list of the structure's sequences
    :param structure: Biopython structure object
    :return: Dictionary of sequence strings with chain IDs as the key
    '''
    
    structure_seqs = {}
    
    # loop over each chain of the PDB
    for chain in structure[0]:
        
        chain_it = iter(chain) 
        
        chain_seq = ''
        tracker = 0
        
        # loop over the residues
        for res in chain.get_residues():
            # NOTE: you can get the residue number too
            res_num = res.id[1]
            
            # double check if the residue name is a standard residue
            # if it is not a standard residue (ie. selenomethionine),
            # it will be filled in with an X on the next iteration)
            if Polypeptide.is_aa(res, standard=True):
                full_id = res.get_full_id()
                end_tracker = full_id[3][1]
                i_code = full_id[3][2]
                aa = Polypeptide.three_to_one(res.get_resname())
                
                # tracker to fill in X's
                if end_tracker != (tracker + 1):# and first == False:
                    if i_code != ' ':
                        chain_seq += aa
                        tracker = end_tracker + 1
                        continue
                    else:
                        chain_seq += 'X'*(end_tracker - tracker - 1)
                        
                chain_seq += aa
                tracker = end_tracker
                
            else:
                continue

        structure_seqs[chain.get_id()] = chain_seq

    return structure_seqs

In [12]:
# represented in a single string
pdb_sequence = get_pdb_seq(pdb_structure)
string_pdb_seq = pdb_sequence[best_pdb_chain]

In [13]:
# outputs a fasta file format
faa_out1 = '> '
faa_out2 = '%s pdb sequence fasta' %(pdb_name)
faa_out3 = '\n%s' %(string_pdb_seq)
faa_out = faa_out1 + faa_out2 + faa_out3
print faa_out

> 2nz2 pdb sequence fasta
XXXKGSVVLAYSGGLDTSCILVWLKEQGYDVIAYLANIGQKEDFEEARKKALKLGAKKVFIEDVSREFVEEFIWPAIQSSALYEDRYLLGTSLARPCIARKQVEIAQREGAKYVSHGATGKGNDQVRFELSCYSLAPQIKVIAPWRMPEFYNRFKXRNDLMEYAKQHGIPIPVTPKNPWSMDENLMHISYEAGILENPKNQAPPGLYTKTQDPAKAPNTPDILEIEFKKGVPVKVTNVKDGTTHQTSLELFMYLNEVAGKHGVGRIDIVENRFIGMKSRGIYETPAGTILYHAHLDIEAFTMDREVRKIKQGLGLKFAELVYTGFWHSPECEFVRHCIAKSQERVEGKVQVSVLKGQVYILGRESPLSLYNEELVSXNVQGDYEPTDATGFININSLRLKEYHRLQSXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXD


In [14]:
file = open(faa_out2 + '.faa', "w")
# note the fasta file name is named faa_out2
file.write(faa_out)
file.close()

### Writing FASTA files

In [15]:
import os.path
from Bio import SeqIO
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord
from Bio.Alphabet import IUPAC

def write_fasta_file(sequence, fileout):
    '''
    This writes a fasta file for a SeqRecord object. It also checks if the file exists already and returns the filename.
    
    Input: sequence - Biopython SeqRecord object, identification - ID of the sequence.
    Output: Filename of fasta file
    '''
    
    outfile = "%s" % fileout
    if os.path.isfile(outfile):
        print 'FASTA file already exists %s' % outfile
        return outfile
    else:
        SeqIO.write(sequence, outfile, "fasta")
        return outfile

In [16]:
# example: for gene argss

# getting the IDs and making output file name 
seq_id = GP[GP.m_gene_original == gene_original_id].u_isoform_id.values[0]
# the /tmp/ in '/tmp/' + seq_id + '.faa' puts it in a temporary folder; I will remove the temp saving for now
seq_output = seq_id + '.faa'

# getting the sequence and making it into a Biopython SeqRecord object
seq = GP[GP.m_gene_original == gene_original_id].u_seq.values[0]
seq_biop = SeqRecord(Seq(seq, IUPAC.protein),id=seq_id,description='uniprot sequence')

# writing the SeqRecord object (formats it as FASTA file)
out_file = write_fasta_file(seq_biop, seq_output)

FASTA file already exists P00966-1.faa


In [17]:
# just saving in tmp for this example, all fasta files were already written in "sequence_files"
# !cat $out_file

## aligning 2 FASTA files

In [18]:
import os.path
from Bio.Emboss.Applications import NeedleCommandline

def run_alignment(fasta1_id, fasta1, fasta2_id, fasta2):
    '''
    Runs the needle alignment program and writes the result to a file. Returns the filename. Standard gap inputs are used.
    
    Input:  fasta1 - fasta file name ("reference" sequence)
            fasta2 - fasta file name (what you're interested in aligning)
    Output: alignment_file - file name of alignment
    '''

    alignment_file = "%s_%s_align.txt" % (fasta1_id, fasta2_id)
    
    if os.path.isfile(alignment_file):
        print 'Alignment %s file already exists' % alignment_file
        return alignment_file

    else:
        print '**RUNNING ALIGNMENT FOR %s AND %s**' % (fasta1_id, fasta2_id)
        needle_cline = NeedleCommandline(asequence=fasta1, bsequence=fasta2, gapopen=10, gapextend=0.5, outfile=alignment_file)
        stdout, stderr = needle_cline()
        return alignment_file

In [19]:
SEQUENCE_FILES = '/Users/LAURENCE/Desktop/Senior Design/Untitled Folder'
UNIPROT_FILES = '/Users/LAURENCE/Desktop/Senior Design/Untitled Folder'
PDB_SEQ_FILES = '/Users/LAURENCE/Desktop/Senior Design/Untitled Folder'

# 1. get the uniprot sequence file
seq_id = GP[GP.m_gene_original == gene_original_id].u_isoform_id.values[0]
seq_fasta = os.path.join(UNIPROT_FILES, seq_id + '.faa')

if os.path.exists(seq_fasta):
    print('found uniprot fasta file {}'.format(seq_fasta))
    
# 2. get the pdb sequence file
pdb_id = GP[GP.m_gene_original == gene_original_id].ssb_best_file.values[0].strip('.pdb')
pdb_fasta = os.path.join(PDB_SEQ_FILES, faa_out2 + '.faa')

if os.path.exists(pdb_fasta):
    print('found pdb fasta file {}'.format(pdb_fasta))
    
# 3. run the alignment using the function above
os.chdir('/tmp/')
alignment_filename = run_alignment(seq_id, seq_fasta, pdb_id, pdb_fasta)

found uniprot fasta file /Users/LAURENCE/Desktop/Senior Design/Untitled Folder/P00966-1.faa
found pdb fasta file /Users/LAURENCE/Desktop/Senior Design/Untitled Folder/2nz2 pdb sequence fasta.faa
Alignment P00966-1_2nz2_align.txt file already exists


In [20]:
!cat $alignment_filename

########################################
# Program: needle
# Rundate: Thu  4 Feb 2016 17:38:50
# Commandline: needle
#    -outfile P00966-1_2nz2_align.txt
#    -asequence "/Users/LAURENCE/Desktop/Senior Design/Untitled Folder/P00966-1.faa"
#    -bsequence "/Users/LAURENCE/Desktop/Senior Design/Untitled Folder/argss_temp.faa"
#    -gapopen 10
#    -gapextend 0.5
# Align_format: srspair
# Report_file: P00966-1_2nz2_align.txt
########################################

#
# Aligned_sequences: 2
# 1: P00966-1
# 2: ARGSS
# Matrix: EBLOSUM62
# Gap_penalty: 10.0
# Extend_penalty: 0.5
#
# Length: 501
# Identity:     402/501 (80.2%)
# Similarity:   402/501 (80.2%)
# Gaps:          89/501 (17.8%)
# Score: 2100.0
# 
#

P00966-1           1 MSSKGSVVLAYSGGLDTSCILVWLKEQGYDVIAYLANIGQKEDFEEARKK     50
                     ...|||||||||||||||||||||||||||||||||||||||||||||||
ARGSS              1 XXXKGSVVLAYSGGLDTSCILVWLKEQGYDVIAYLANIGQKEDFEEARKK     50

P00966-1          51 

### loading alignment file as a dataframe
this code parses a needle alignment file and makes it into a dataframe
each row of the dataframe is a position in the reference sequence
it also tells you what parts are deleted, mutated, etc.

In [21]:
import numpy as np
from Bio import AlignIO
from collections import defaultdict

def get_alignment_allpos_df(alignment_file, a_seq_id=None, b_seq_id=None):
    '''
    Takes in a needle alignment file and returns a pandas dataframe of the results
    Input: alignment_file - the path to the alignment file, 
            a_seq_id - optional ID of the reference sequence, 
            b_seq_id - optional ID of the second sequence
    Output: alignment_df - a pandas dataframe of the alignment results
    '''
    alignments = list(AlignIO.parse(alignment_file, "emboss"))

    appender = defaultdict(dict)
    idx = 0
    for alignment in alignments:
    #         if not switch:
        if not a_seq_id:
            a_seq_id = list(alignment)[0].id
        a_seq = str(list(alignment)[0].seq)
        if not b_seq_id:
            b_seq_id = list(alignment)[1].id
        b_seq = str(list(alignment)[1].seq)

        a_idx = 1
        b_idx = 1

        for i, (a,b) in enumerate(zip(a_seq,b_seq)):
            if a == b and a != '-' and b != '-':
                aa_flag = 'match'
            if a != b and a == '-' and b != '-':
                aa_flag = 'insertion'
            if a != b and a != '-' and b == '-':
                aa_flag = 'deletion'
            if a != b and a != '-' and b == 'X':
                aa_flag = 'unresolved'
            if a != b and b != '-' and a == 'X':
                aa_flag = 'unresolved'
            elif a != b and a != '-' and b != '-':
                aa_flag = 'mutation'
                
            appender[idx]['Uniprot_ID'] = a_seq_id
            appender[idx]['Structure'] = b_seq_id
            appender[idx]['type'] = aa_flag
            
            if aa_flag == 'match' or aa_flag == 'unresolved' or aa_flag == 'mutation':
                appender[idx]['Uniprot_sequence'] = a
                appender[idx]['Uniprot_sequence_position'] = a_idx
                appender[idx]['PDB_sequence'] = b
                appender[idx]['PDB_sequence_position'] = b_idx
                a_idx += 1
                b_idx += 1

            if aa_flag == 'deletion':
                appender[idx]['Uniprot_sequence'] = a
                appender[idx]['Uniprot_sequence_position'] = a_idx
                a_idx += 1

            if aa_flag == 'insertion':
                appender[idx]['PDB_sequence'] = b
                appender[idx]['PDB_sequence_position'] = b_idx
                b_idx += 1
            
            idx += 1

    alignment_df = pd.DataFrame.from_dict(appender, orient='index')
    alignment_df = alignment_df[['Uniprot_ID', 'Structure', 'type', 'Uniprot_sequence', 'Uniprot_sequence_position', 'PDB_sequence', 'PDB_sequence_position']].fillna(value=np.nan)
    
    return alignment_df

### Let's take this information and use it to display relevant annotations

In [22]:
# load Biopython PDB packages

# PDBList to download PDBs
from Bio.PDB.PDBList import PDBList
pdbl = PDBList()

# PDBParser to load and work with files
from Bio.PDB.PDBParser import PDBParser
parser = PDBParser()

import urllib2
import uuid

In [23]:
structure = parser.get_structure('someprotein', pdb_file_path)

In [24]:
# get the ligands within this file for display
# from: http://stackoverflow.com/questions/25718201/remove-heteroatoms-from-pdb
ligands = []

for residue in structure.get_residues():
    tags = residue.get_full_id()
    # tags contains a tuple with (Structure ID, Model ID, Chain ID, (Residue ID))
    # Residue ID is a tuple with (*Hetero Field*, Residue ID, Insertion Code)

    # Thus you're interested in the Hetero Field, that is empty if the residue
    # is not a hetero atom or have some flag if it is (W for waters, H, etc.)
    if tags[3][0] != " " and tags[3][0] != "W":
        ligands.append(tags[3][0].split('_')[1].strip())
    else:
        continue
        
print(ligands)

['NA', 'ASP', 'CIR']


In [25]:
def get_pdb_seq2(structure):
    '''
    Takes in a Biopython structure object and returns a list of the structure's sequences
    :param structure: Biopython structure object
    :return: Dictionary of sequence strings with chain IDs as the key
    '''
    
    structure_seqs = {}
    
    # loop over each chain of the PDB
    for chain in structure[0]:
        
        chain_it = iter(chain) 
        
        chain_seq = []
        tracker = 0
        
        # loop over the residues
        for res in chain.get_residues():
            # NOTE: you can get the residue number too
            res_num = res.id[1]
            
            # double check if the residue name is a standard residue
            # if it is not a standard residue (ie. selenomethionine),
            # it will be filled in with an X on the next iteration)
            # TODO: except when it's at the beginning or end...
            if Polypeptide.is_aa(res, standard=True):
                full_id = res.get_full_id()
                end_tracker = full_id[3][1]
                i_code = full_id[3][2]
                aa = Polypeptide.three_to_one(res.get_resname())
                
                # tracker to fill in X's
                if end_tracker != (tracker + 1):
                    if i_code != ' ':
                        chain_seq.append((aa,end_tracker))
                        tracker = end_tracker + 1
                        continue
                    else:
                        xes = 'X'*(end_tracker - tracker - 1)
                        for x in xes:
                            chain_seq.append((x,end_tracker))
                        
                chain_seq.append((aa,end_tracker))
                tracker = end_tracker
                
            else:
                continue

        structure_seqs[chain.get_id()] = chain_seq

    return structure_seqs

In [26]:
my_structure_sequence = get_pdb_seq2(structure)

In [27]:
from Bio.PDB import Selection

In [28]:
get_alignment_allpos_df(alignment_filename).head(30) #How many rows does the user want to see?

Unnamed: 0,Uniprot_ID,Structure,type,Uniprot_sequence,Uniprot_sequence_position,PDB_sequence,PDB_sequence_position
0,P00966-1,ARGSS,mutation,M,1,X,1
1,P00966-1,ARGSS,mutation,S,2,X,2
2,P00966-1,ARGSS,mutation,S,3,X,3
3,P00966-1,ARGSS,match,K,4,K,4
4,P00966-1,ARGSS,match,G,5,G,5
5,P00966-1,ARGSS,match,S,6,S,6
6,P00966-1,ARGSS,match,V,7,V,7
7,P00966-1,ARGSS,match,V,8,V,8
8,P00966-1,ARGSS,match,L,9,L,9
9,P00966-1,ARGSS,match,A,10,A,10


In [30]:
# again, open debate as to how this can be called
my_mutation_resnum = int(raw_input("What is the corresponding mutation on the PDB structure?   "))

What is the corresponding mutation on the PDB structure?   6


In [31]:
# let's get the info from the structure
my_mutation_residue = structure[0][best_pdb_chain][my_mutation_resnum]
print my_mutation_residue

<Residue SER het=  resseq=6 icode= >


In [32]:
# we can use the Selection class to select all atoms of this residue
# 'A' here stands for ATOM (http://biopython.org/DIST/docs/api/Bio.PDB.Selection-module.html)
atom_list = Selection.unfold_entities(my_mutation_residue, 'A')

# then you can format this information for PV:
for a in atom_list:
    print('{}.{}.{}').format('A',my_mutation_resnum,a.id)

A.6.N
A.6.CA
A.6.C
A.6.O
A.6.CB
A.6.OG


In [33]:
# how to guide user through this??
label_input = raw_input("Copy and paste the desired label  ")

Copy and paste the desired label  A.6.CB


In [34]:
class PDBViewer_label(object):
    '''
    Contributed by: Ali Ebrahim
    '''
    
    def __init__(self, f):
        self.pdb = open(f).read()

    def _repr_html_(self):
        div_id = str(uuid.uuid4())
        
        return """<div id="%s" style="width: 800px; height: 600px">
    <div>
    
        <!--testing static label-->
        <style>
            .static-label {
                position: absolute;
                background: #0000;
                text-align: right;
                z-index: 1;
                font-weight: bold;
                width: 800px;
            }
        </style>
        
        <script>
            require.config({
                paths: {
                    "pv": "//biasmv.github.io/pv/js/pv.min"
                }
            });
            
            require(["pv"], function(pv) {
                pdb = "%s";
                
                <!--append the static label to the parent-->
                var parent = document.getElementById('%s');
                var staticLabel = document.createElement('div');
                staticLabel.innerHTML = 'myProtein';
                staticLabel.className = 'static-label';
                parent.appendChild(staticLabel);
                
                <!--load the structure-->
                structure = pv.io.pdb(pdb);
                
                <!--choose atom to label-->
                var carbonAlpha = structure.atom("%s");
                
                viewer = pv.Viewer(parent, {
                    width: '800',
                    height: '600',
                    antialias: true,
                    outline: true,
                    quality: 'medium',
                    style: 'hemilight',
                    background: 'white',
                    animateTime: 500,
                    selectionColor: '#f00'
                });
                
                <!--misc viewer functions-->
                viewer.fitParent();
                viewer.cartoon('molecule', structure);
                viewer.centerOn(structure);
                
                <!--atom label options-->
                var options = {
                 fontSize : 16, fontColor: '#f22', backgroundAlpha : 0.4
                };
                
                <!--display the label-->
                viewer.label('label', carbonAlpha.qualifiedName(), carbonAlpha.pos(), options);
                
                <!--not sure how the auto zoom works-->
                viewer.autoZoom();
            });
        </script>
        """ % (div_id, self.pdb.replace("\n", "\\n"), div_id, label_input)

In [35]:
PDBViewer_label(pdb_file_path)

options:
    how many chains to display, which ones to display
    where is the mutation? On which chain?
    Where is the active site? On which chains?

In [36]:
print "These are the chains present in the structure:   " + chains_present
chain_display = raw_input("Would you like to display all chains (answer with yes or no)?   ")
if chain_display.upper() == 'YES':
    cnames_pv_var = ''
    structure_var = 'structure'
    chain_iso = ''
elif chain_display.upper() == 'NO':
    cnames = raw_input("Type in the chain you would like to display:   ")
    cnames_pv = "cname: '" + cnames + "'"
    cnames_pv_var = "var chain = structure.select({cname: '" + cnames + "'})"
    structure_var = 'chain'
else:
    print "not sure what's here yet..."

These are the chains present in the structure:   ['A']
Would you like to display all chains (answer with yes or no)?   yes


In [37]:
bind_site_avail = raw_input("Is there a binding or active site you would like to display (answer with yes or no)?   ")
if bind_site_avail.upper() == 'YES':
    chain_res_disp_choice = raw_input("Would you like to display the site on both chains (answer with yes or no)?   ")
    if chain_res_disp_choice.upper() == 'YES':
        chain_iso = ''
        site_start = int(raw_input("Which AA sequence does it start at?   "))
        site_end = int(raw_input("Which AA sequence does it end at?   "))
        rnums_seq_script = str(site_start)
        res_loop_counter = site_start
        for a in range(site_end - site_start):
            res_loop_counter = res_loop_counter + 1
            rnums_seq_script += ', ' + str(res_loop_counter)
        rnums_script = "rnums : [" + rnums_seq_script + "]"
        is_res = ''
    elif chain_res_disp_choice.upper() == 'NO':
        chain_res_disp_chain_choice = raw_input("Which chain would you like to display the site on?   ")
        chain_iso = 'cname: \'' + chain_res_disp_chain_choice + '\','
        site_start = int(raw_input("Which AA sequence does it start at?   "))
        site_end = int(raw_input("Which AA sequence does it end at?   "))
        rnums_seq_script = str(site_start)
        res_loop_counter = site_start
        for a in range(site_end - site_start):
            res_loop_counter = res_loop_counter + 1
            rnums_seq_script += ', ' + str(res_loop_counter)
        rnums_script = "rnums : [" + rnums_seq_script + "]"
        is_res = ''
    else:
        print "tbd"
elif bind_site_avail.upper() == 'NO':
    site_start = 0
    site_end = 0
    rnums_script = ''
    is_res = '//'
else:
    print "tbd"
    
print chain_iso
print rnums_script

Is there a binding or active site you would like to display (answer with yes or no)?   yes
Would you like to display the site on both chains (answer with yes or no)?   no
Which chain would you like to display the site on?   A
Which AA sequence does it start at?   16
Which AA sequence does it end at?   20
cname: 'A',
rnums : [16, 17, 18, 19, 20]


In [38]:
class PDBViewer_options(object):
    '''
    Contributed by: Ali Ebrahim
    '''
    
    def __init__(self, f):
        self.pdb = open(f).read()

    def _repr_html_(self):
        div_id = str(uuid.uuid4())
        
        return """<div id="%s" style="width: 800px; height: 600px">
    <div>
    
        <!--testing static label-->
        <style>
            .static-label {
                position: absolute;
                background: #0000;
                text-align: right;
                z-index: 1;
                font-weight: bold;
                width: 800px;
            }
        </style>
        
        <script>
            require.config({
                paths: {
                    "pv": "//biasmv.github.io/pv/js/pv.min"
                }
            });
            
            require(["pv"], function(pv) {
                pdb = "%s";
                
                <!--append the static label to the parent-->
                var parent = document.getElementById('%s');
                var staticLabel = document.createElement('div');
                staticLabel.innerHTML = 'myProtein';
                staticLabel.className = 'static-label';
                parent.appendChild(staticLabel);
                
                <!--load the structure-->
                structure = pv.io.pdb(pdb);
                
                // select a chain to display and see if user wants to only display one
                %s
                
                <!--choose atom to label-->
                var carbonAlpha = structure.atom('A.27.CA');
                
                // choose a ligand to color (later on), if want to see on both chains remove cname
                %s var residues = structure.select({%s %s});
                
                viewer = pv.Viewer(parent, {
                    width: '800',
                    height: '600',
                    antialias: true,
                    outline: true,
                    quality: 'medium',
                    style: 'hemilight',
                    background: 'white',
                    animateTime: 500,
                    selectionColor: '#f00'
                });
            
                
                <!--misc viewer functions-->
                viewer.fitParent();
                
                // add cartoon visualization
                viewer.cartoon('molecule', %s);
                
                // color the selected residues in red, and display as red lines
                %s viewer.spheres('residues', residues,  { color: pv.color.uniform('red') });
                
                // center on the structure
                viewer.centerOn(%s);
                
                <!--atom label options-->
                var options = {
                 fontSize : 16, fontColor: '#f22', backgroundAlpha : 0.4
                };
                
                <!--display the label-->
                viewer.label('label', carbonAlpha.qualifiedName(), carbonAlpha.pos(), options);
                
                <!--not sure how the auto zoom works-->
                viewer.autoZoom();
            });
        </script>
r        """ % (div_id, self.pdb.replace("\n", "\\n"), div_id, cnames_pv_var, is_res, chain_iso, rnums_script, structure_var, is_res, structure_var)

In [39]:
pdb_file_path2 = pdbl.retrieve_pdb_file('2VFA')

Structure exists: '/Users/LAURENCE/Desktop/Senior Design/Untitled Folder/vf/pdb2vfa.ent' 


In [40]:
PDBViewer_options(pdb_file_path2)