## Workshop #2: PyRosetta

In [None]:
import os
from pyrosetta import *
from pyrosetta.toolbox import pose_from_rcsb
init()



In [None]:
#defining connection to laptop for PyMOL
my_ip = os.environ["my_ip"]
#obtaining a protein structure from the pdb
pose = pose_from_rcsb("1YY8")
#obtaining information about the protein
print "pose:", pose
print "sequence:", pose.sequence()
print "protein has", pose.total_residue(), "residues."
print "residue 500:", pose.residue(500).name()
print "chain", pose.pdb_info().chain(500), "number", pose.pdb_info().number(500)
print "internal number resi 100, chain A:", pose.pdb_info().pdb2pose('A', 100)

In [None]:
#Visualizing Protein via PyMOL
from pyrosetta import PyMOLMover
pymol = PyMOLMover(my_ip, 65000)
pymol.apply(pose)
#Secondary Structure, Angles, and Bond Lengths
from pyrosetta.toolbox import get_secstruct
print "secondary structure:"
get_secstruct(pose)
print "phi(5):", pose.phi(5)
print "psi(5):", pose.psi(5)
print "chi(1,5):", pose.chi(1, 5)
R5N = AtomID(1, 5)
R5CA = AtomID(2, 5)
R5C = AtomID(3, 5)
print "N-CA bond length:", pose.conformation().bond_length(R5N, R5CA)
print "CA-C bond length:", pose.conformation().bond_length(R5CA, R5C)
print "N-CA-C bond angle:", pose.conformation().bond_angle(R5N, R5CA, R5C)



In [None]:
#Manipulating Protein Geometry and PyMOL Visualization
pose.set_phi(5, -60)
pose.set_psi(5, -43)
pose.set_chi(1, 5, 180)
pose.conformation().set_bond_length(R5N, R5CA, 1.5)
pose.conformation().set_bond_angle(R5N, R5CA, R5C,
110./180.*3.14159)
#Applying Changes in PyMOL
pymol.apply(pose)

In [None]:
N_xyz = pose.residue(5).xyz("N")
CA_xyz = pose.residue(5).xyz("CA")
N_CA_vector = CA_xyz-N_xyz
print N_CA_vector.norm
print N_CA_vector
N_CA_vector_norm = ((0.5761217501096674**2)+(0.4695084725093004**2)+(1.302937267596777**2))**0.5
print "N-CA bond length:", N_CA_vector_norm
N_CA_C_ba = pose.conformation().bond_angle(R5N, R5CA, R5C)
print "N-CA-C bond angle:", N_CA_C_ba
N_CA_C_ba_deg = N_CA_C_ba*(180/3.14159265359)
print N_CA_C_ba_deg
print "N", N_xyz
print "CA", CA_xyz


Programming Exercise 1: Torsion Angle...Use the vector objects to write a script to calculate torsion angles between four arbitrary atoms. This will require knowledge of vector calculus. Hint: You will need to calculate the normal vectors of the two planes of the dihedral angle.


Ideal helix. Write a program to create a 20-residue ideal helix by setting the φ and ψ
angles to the typical values for an α-helix. To start, use pose =
pose_from_sequence("AAA", "fa_standard") to create a new pose, except
using 20 “A”s in the argument to create a 20-residue polyalanine.
Output your structure using pose.dump_pdb("helix.pdb").

In [None]:
#create a 20-alanine peptide sequence
pose = pose_from_sequence("A"*20, "fa_standard")
#set all torsion angles to typical alpha helix angles
for x in xrange(1, 20):
    pose.set_phi(x, -60)
    pose.set_psi(x, -60)
#save file
pose.dump_pdb("helix.pdb")
#show in PyMOL
pymol.apply(pose)

Ideal strand. Write a program to create a 20-residue ideal β-strand by setting the φ and
ψ angles to values in the middle of the β region of the Ramachandran plot.

In [None]:
#create a 20-alanine peptide sequence
pose = pose_from_sequence("A"*20, "fa_standard")
#set all torsion angles to typical alpha helix angles
for x in xrange(1, 20):
    pose.set_phi(x, -140)
    pose.set_psi(x, 130)
#save file
pose.dump_pdb("B_strand.pdb")
#show in PyMOL
pymol.apply(pose)

#### Scondary structure propensities. 
Write a program to calculate the propensity of each
residue type to appear in a helix. Loop through all residues in a protein, and count each
alanine that is in a helix, sheet, or loop according to some φ/ψ-based criteria. The
propensity can then be calculated as....
where Nα Ala, Nβ Ala, and NL Ala, are the counts of alanine residues in helices, sheets, and
loops, respectively, and Nα total, Nβ total, and NL total, are the counts of all residues in
helices, sheets, and loops, respectively. (Note that terminal residues have different
names in Rosetta than internal ones; e.g., an N-terminal ALA has the name
ALA_p:NtermProteinFull.)

In [None]:
pose = pose_from_sequence("GALMFWKQESPICYHRNDT", "fa_standard")
for x in xrange(1, 20):
    print pose.phi(x)
    print pose.psi(x)
#I'm having difficulty figuring this one out

Idealize a protein. Write a program that sets all bond lengths and angles to their Engh
& Huber ideal values. Test your program using a structure from the pdb. What happens
to the resulting protein? Why?

In [None]:
pose = pose_from_rcsb("1YY8")

for x in xrange(1, 20):
    N = AtomID(1, x)
    Ca = AtomID(2, x)
    C = AtomID(3, x)
    Cb = AtomID(5, x)
    Cg = AtomID(6, x)
    pose.conformation().set_bond_angle(N, Ca, C,
    110./180.*3.14159)
    pose.conformation().set_bond_angle(N, Ca, Cb,
    112./180.*3.14159)
    pose.conformation().set_bond_angle(Cb, Ca, C,
    110./180.*3.14159)
#     pose.conformation().set_bond_length(Ca, Cb, 5)
#     pose.conformation().set_bond_length(Cb, Cg, 5)
pymol.apply(pose)
   
    