# DDG Stuff
This is the iPython Notebook containing a lot of my code for working with the Zemu DDG dataset. Each entry has its own little description below.
## Contents:
* [RunScript](#RunScript)<br>
* [RosettaOut_Parser](#RosettaOut_Parser.py)<br>
* [List_PDBs](#List_PDBs.py)<br>
* [PDB_REDO .pdb Scraper](#PDB_REDO-.pdb-Scraper)<br>
* [JSON File Editor](#JSON-File-Editor)<br>
* [PDB_REDO Data Setup](#PDB_REDO-data-setup)<br>
* [Resfile_Editor](#Resfile_Editor.py)<br>
* [Strip PDB_REDO .pdb's](#Strip-PDBs)<br>
* [Zip](#Zip)<br>
<br>
* [Scratch Space](#Scratch-Space)

## RunScript
This is a modified version of SubmitRun_DDG_Zemu_General.py for locally troubleshooting RosettaScripts (specifically DDG_Text.xml). 

In [None]:
#Modified SubmitRun_DDG_Zemu_General.py for local troubleshooting of DDG_Test.xml
#3/26/16

import os
import subprocess
import time
import sys
import shutil
import inspect
import gzip
import tempfile
import re
import json
from Bio.PDB import *
from datetime import *

#os.chdir('/Users/jameslucas/Kortemme_Rotation/TestJobs')
os.chdir('/home/james.lucas/Rotation/DDGBenchmarks_Test/TestJobs')

#From Kyle's Finalize.py
def read_mutations_resfile(filenum_dir):
    resfile = os.path.join(filenum_dir, 'mutations_repack.resfile')
    mutations = []
    with open(resfile, 'r') as f:
        post_start = False
        for line in f:
            if post_start:
                line = line.strip()
                pdb_resnum, chain, pikaa, mut_res = line.split()
                mutations.append( [pdb_resnum, chain, pikaa, mut_res] )
            elif line.startswith('start'):
                post_start = True
    return mutations

#From Kyle's Finalize.py
def find_neighbors(filenum_dir, pdb_path, neighbor_distance = 8.0):
    mutations = read_mutations_resfile(filenum_dir)
    open_filename = pdb_path
    parser = PDBParser(PERMISSIVE=1)
    open_strct = parser.get_structure('Open', open_filename)

    # There should only be one model in PDB file
    num_models = 0
    for model in open_strct.get_models():
        num_models += 1
    assert( num_models == 1 )

    chain_list = [chain.get_id() for chain in open_strct[0].get_chains()]
    neighbors = set()
    for mutation in mutations:
        res_id, chain_id, pikaa, mut_aa = mutation
        mut_chain = str(chain_id)
        try:
            mut_pos = int( res_id )
            mut_insertion_code = ' '
        except ValueError:
            mut_pos = int( res_id[:-1] )
            mut_insertion_code = res_id[-1]

        mut_residue = open_strct[0][mut_chain][(' ', mut_pos, mut_insertion_code)]
        for chain in chain_list:
            for residue in [res.get_id() for res in open_strct[0][chain].get_residues()]:
                try:
                    # Kyle note - might be good to do something else for consistency, since not all residues have CB
                    dist = mut_residue['CB'] - open_strct[0][chain][residue]['CB']
                    if dist < neighbor_distance:
                        neighbors.add( (residue, chain) )
                except KeyError:
                    pass

    return neighbors

#Parses dataset .json file and outputs chain to move and input PDB file directory
def json_parser():
    jsonload = open("data/blank_job_dict.json")
    jsonfile = json.load(jsonload)
    key = sorted(jsonfile.keys())[0] #SGE_TASK_ID
    chaintomove = jsonfile[key]["%%chainstomove%%"]
    inputdir = jsonfile[key]['input_file_list'][0]
    return chaintomove, inputdir

#Finds neighbors within 8A and adds position and Chain information to a list
def neighbors_list(pdb_filepath, pdb_file):
    neighbors = find_neighbors(pdb_filepath, pdb_file, 8)
    #Generating pivotlist with sets
    pivotlist = set()
    for i in neighbors:
        pivotlist.add(str(i[0][1]) + str(i[1]))
        pivotlist.add(str(i[0][1] + 1) + str(i[1]))
        pivotlist.add(str(i[0][1] - 1) + str(i[1]))
    parser = PDBParser()
    
    #Adds +1/-1 residues for items in pivotlist
    #Dirty AF but it works
    
    structure = parser.get_structure('TEST', 'data/59648/1TM1_EI.pdb')
    chain_list = Selection.unfold_entities(structure, 'C')
    res_list = Selection.unfold_entities(chain_list, 'R')
    
    all_residues = set()
    
    for chain in chain_list:
        for residue in [res.get_id() for res in res_list]:
            if residue[2] != ' ':
                all_residues.add(str(residue[1]) + str(residue[2]) + str(chain))
            else:
                all_residues.add(str(residue[1]) + str(chain.get_id()))
    
    pivotlist_alpha = set()
    pivotlist_alpha = pivotlist.intersection(all_residues)
    
    pivotlist_final = ''
    for i in pivotlist_alpha:
        pivotlist_final = pivotlist_final + '%s,' %i
    
    pivotlist_final = pivotlist_final[:-1]

    return pivotlist_final

#Reads resfile and returns mutation position+chain and type
def resfile_stuff(pdb_filepath):
    resfile = read_mutations_resfile(pdb_filepath)
    position = []
    for i in resfile:
        position.append(i[0] + i[1])
    return position
    
#Prints CMD input with PDBID, associated mutation, and pivot residues
def bash(chaintomove, inputdir, outputdir):
    #Removes PDB file from path, saves in variable filenum_dir
    inputdir_parse = re.sub("/",' ', str(inputdir))
    data, filenum, pdbtemp = inputdir_parse.split()
    filenum_dir = data + "/" + filenum
    PDBID = pdbtemp[:-4]
    predIDoutdir = outputdir + filenum
    
#    #Makes a folder for data dumping
#    print 'Making directory %s%s...' %(outputdir, filenum)
#    os.makedirs(predIDoutdir)
  
    #Assigns function output to variables for bash input (pivot_residues, target, resfile_relpath)
    target = resfile_stuff(filenum_dir)
    pivot_residues = neighbors_list(filenum_dir, inputdir)
    resfile_relpath = os.path.relpath(filenum_dir, predIDoutdir)
    pdb_relpath = os.path.relpath(inputdir, predIDoutdir)
    
    targetlist = ''
    for i in target:
        targetlist = targetlist + i + ','
    targetlist = targetlist[:-1]
    
    print os.getcwd()
    
#    arg = ['/Users/jameslucas/Rosetta/main/source/bin/rosetta_scripts.macosclangrelease',
    arg = ['/home/james.lucas/Rosetta/main/source/bin/rosetta_scripts.linuxgccrelease',
           '-s',
           pdb_relpath,
           '-parser:protocol',
           '../../../RosettaScripts/DDG_Protocol_v1.xml',
           '-ignore_unrecognized_res',
           '-parser:script_vars',
           'resfile_relpath=%s' %(resfile_relpath),
           'pivot_residues=%s' %(pivot_residues),
           'chain=%s' %(chaintomove),
           '-nstruct 5',
           '-mute protocols.canonical_sampling.MetropolisHastingsMover'
          ]
    
    submit = ''
    for i in arg:
        submit = submit + '%s ' % i
    print submit
#    print 'Working on: %s %s' %(filenum, PDBID)
    
#    outfile_path = os.path.join(predIDoutdir, 'rosetta.out')
#    rosetta_outfile = open(outfile_path, 'w')
#    print 'Running RosettaScript...'
#    rosetta_process = subprocess.Popen(arg, stdout=rosetta_outfile, cwd=predIDoutdir)
#    return_code = rosetta_process.wait()
#    print 'Task return code:', return_code, '\n'
#    rosetta_outfile.close()    
    
    return filenum

#Define paths
outputdir = 'output/'

#ACTION!!!
chaintomove, inputdir = json_parser()
filenum = bash(chaintomove, inputdir, outputdir)
#print 'FINISHED!!!'


## RosettaOut_Parser.py
This script parses DDG_Test.xml RosettaScript output and extracts individual and sum DDG energies into a bunch of nested dictionaries (which I understand is basically json format?).

Goes through a filetree formatted like this:

Working directory (specified by my_working_directory variable)<br>
----PredictionID 1<br>
--------rosetta.out<br>
----PredictionID 2<br>
--------rosetta.out<br>
----PredictionID 3<br>
--------rosetta.out<br>

In [60]:
#Iterates through directories to open Rosetta.out and creates Nested Dictionaries:
#>Prediction ID
#>>Structure ID (nstruct 1-100)
#>>>Structure Type (WT, Mut, DDG)
#>>>>Score Type

import os
import linecache
import json
import datetime

#Parses rosetta.out and Runscript output and adds values to dicitonary fattydict
#Counts unfinished jobs in dictionary unfinished
def parse_rosetta_out(workingdir, verbose = True):
    fattydict = {}
    unfinished = {}

    task_dirs = [os.path.join(workingdir, d) for d in os.listdir(workingdir) if os.path.isdir( os.path.join(workingdir, d) )]
#    r = Reporter('parsing task directories', entries = 'directories')
#    r.set_total_count( len(task_dirs) )

#For each subdirectory (PredictionID) in the working directory (output/)...
    for task_dir in task_dirs:
        try:
            i = long(os.path.basename(task_dir))
        except ValueError:
            print 'Directory %s is not a number' % i
            continue
        
        fattydict[i] = {}
        fattydict[i]['structIDs'] = {}
        structID = 1
        counter = 0
        filename = os.path.join(task_dir, "rosetta.out")
        
        if not os.path.isfile( filename ):
            print 'Missing output file:', filename
            continue
        
        temp_dict = {}
        temp_dict['WT'] = {}
        temp_dict['Mutant'] = {}
        score_list = ["fa_atr","fa_rep","fa_sol","fa_intra_rep","mm_bend","fa_elec","hbond_sr_bb","hbond_lr_bb","hbond_bb_sc","hbond_sc","rama","omega","fa_dun","p_aa_pp","yhh_planarity","ref"]

        for line in enumerate(open(filename, 'r')):
            #WT or Mutant scores for current structID
            if counter % 2 == 0:
                struct_type = 'WT'
            else:
                struct_type = 'Mutant'
            #Looks for scoretype as first phrase in line, adds score to dict if present
            for score in score_list:
                if score in line[1].split()[0]:
                    parsed_scores = line[1].split()
                    temp_dict[struct_type][parsed_scores[0]] = float( parsed_scores[1] )
                
            if "Sum ddg: " in line[1]:
                parsed_sumscore = line[1].split()
                temp_dict[struct_type][parsed_sumscore[0]] = float( parsed_sumscore[2] )
                counter = counter + 1
                if counter % 2 == 0:
                    if len(temp_dict['Mutant']) != 0:
                        fattydict[i]['structIDs'][structID] = temp_dict
                        temp_dict = {}
                        temp_dict['Mutant'] = {}
                        temp_dict['WT'] = {}

            if "reported success in" in line[1]:
                timeline = line[1].split()
                fattydict[i]['structIDs'][structID]['Runtime'] = float( timeline[5] )
                structID = structID + 1

        if verbose:
            print str(i) + ": " + str(structID - 1) + " structures completed"

        #Parse output file for Max VMem usage (GB), start/end times, and return code
        files = os.listdir(os.path.join(workingdir,  str(i)))
        for doc in files:
            if doc.startswith("SubmitRun_DDG_Zemu_General_v2.py.o"): ###Change for each run!!!!
                output = doc

                date_format_string = '%Y-%m-%d %H:%M:%S'
                outputfile = os.path.join(workingdir, os.path.join(str(i), output))
                for line in enumerate(open(outputfile, 'r')):
                    if line[1].startswith("Starting time:"):
                        starting_time = line[1][15:].strip()
                        fattydict[i]['Starting time'] = datetime.datetime.strptime(starting_time, date_format_string)
                    elif line[1].startswith("Ending time:"):
                        ending_time = line[1][13:].strip()
                        fattydict[i]['Ending time'] = datetime.datetime.strptime(ending_time, date_format_string)
                    elif line[1].startswith("Task return code:"):
                        task_return_code = line[1].split()
                        fattydict[i]['Task return code'] = long(task_return_code[3])
                    elif line[1].startswith("Max virtual memory usage:"):
                        mem_usage_parsed = line[1].split()
                        mem_usage = float(mem_usage_parsed[4][:-1])
                        size = mem_usage_parsed[4][-1:]
                        if size == 'M':
                            mem_usage = mem_usage / 1000
                        elif size == 'G':
                            continue
                        else:
                            print "Memory usage not measured in MB or GB!"
                        fattydict[i]['Max virtual memory usage:'] = float(mem_usage)

        #Keeps track of unfinished jobs
        if structID - 1 < 50: ###Change for each run!!!!
            unfinished[i] = structID - 1
        else:
            continue
#        r.increment_report()
#    r.done()

    return fattydict, unfinished

def main():
#    my_working_directory = str(os.getcwd() + '/')
    my_working_directory = '/Users/jameslucas/Kortemme_Rotation/TestJobs/output/Rosettaout_test/'
    print my_working_directory
    parsed_dict, unfinished_jobs = parse_rosetta_out(my_working_directory)

    print parsed_dict
    os.chdir(my_working_directory)

#    open("DDG_Data.json", "w").write(json.dumps(parsed_dict, sort_keys=True,separators=(',', ': ')))

if __name__ == '__main__':
    main()

/Users/jameslucas/Kortemme_Rotation/TestJobs/output/Rosettaout_test/
68328: 50 structures completed
{68328L: {'Max virtual memory usage:': 0.8949, 'Starting time': datetime.datetime(2016, 4, 12, 23, 9, 56), 'Ending time': datetime.datetime(2016, 4, 13, 10, 48, 2), 'Task return code': 0L, 'structIDs': {1: {'WT': {'hbond_bb_sc': -1.103, 'ref': 0.0, 'omega': 0.0, 'Sum': -37.7484, 'hbond_sr_bb': 0.0, 'yhh_planarity': 0.0, 'fa_dun': 0.0, 'fa_intra_rep': 0.0, 'fa_atr': -55.41, 'p_aa_pp': 0.0, 'fa_sol': 30.969, 'mm_bend': 0.0, 'hbond_sc': -2.604, 'fa_rep': 4.594, 'hbond_lr_bb': -5.731, 'rama': 0.0, 'fa_elec': -8.462}, 'Runtime': 810.0, 'Mutant': {'hbond_bb_sc': -0.927, 'ref': 0.0, 'omega': 0.0, 'Sum': -35.809, 'hbond_sr_bb': 0.0, 'yhh_planarity': 0.0, 'fa_dun': 0.0, 'fa_intra_rep': 0.0, 'fa_atr': -53.133, 'p_aa_pp': 0.0, 'fa_sol': 32.185, 'mm_bend': 0.0, 'hbond_sc': -3.254, 'fa_rep': 4.209, 'hbond_lr_bb': -6.088, 'rama': 0.0, 'fa_elec': -8.801}}, 2: {'WT': {'hbond_bb_sc': -1.164, 'ref': 0.0, 

## List_PDBs.py
Does pretty much what it says, lists all of the .pdb files in subdirectories within a specificed directory (specified by workingdir variable). Probably should have used a set to make the list non-redundant, but I just took the output and sorted it in excel. :|

In [None]:
#Prints out all PDB files in subdirectories of workingdir
#List_PDBs.py
import os

workingdir = '/Users/jameslucas/Kortemme_Rotation/PDB_REDO'
#workingdir = '/netapp/home/james.lucas/160315-kyleb_james-backrub-rscript/data/'
#workingdir = '/Users/jameslucas/Kortemme_Rotation/output/'
for i in os.listdir(workingdir):
    if i.endswith('.pdb'):
        print i
    else:
        continue
#    if os.path.isdir(workingdir+i):
#        for j in os.listdir(workingdir+i):
#            if j.endswith('.pdb'):
#                print j
#            else:
#                continue
#    else:
#        continue

## PDB_REDO .pdb Scraper
Made a non-redundant list of PDBs using List_PDBs.py and Excel (yay) to download from the PDB_REDO database. Takes each PDBID from the list and downloads the PDB_REDO version. Heads up, many of the PDB_REDO versions of the structures in the Zemu dataset do not exist. The script still reads the .csv and creates a blank .pdb file if the PDB_REDO version doesn't exist, so I did need to go back and toss out the empty .pdb files. 

In [None]:
#Downloads all PDB_REDO's from a list
import pandas as pd
import requests
import os

os.chdir('/Users/jameslucas/Kortemme_Rotation')
cwd = os.getcwd()
new_wd = cwd + '/PDB_REDO'

try:
    os.mkdir(new_wd)
except:
    print 'PDB_REDO already exists!'

os.chdir(new_wd)

df = pd.read_csv('/Users/jameslucas/Kortemme_Rotation/PDB_List.csv')

for i,j in df.iterrows():
    url = 'http://www.cmbi.ru.nl/pdb_redo/%s/%s/%s_final.pdb' %(j[0][1:-1],j[0],j[0])
    r = requests.get(url)
    print "%s_%s" % (j[0].upper(), j[1])
    with open("%s_%s.pdb" % (j[0].upper(), j[1]), "wb") as pdbfile:
        pdbfile.write(r.content)

print 'DONE!'


## JSON File Editor
This scripts edits entries in a .json file to match the contents of a desired data directory. This was written to remove PredictionID directories from the Zemu dataset .json file with missing/nonexistent PDB_REDO structures. Takes the PDB name from a json (jsoninfo) and checks against directory containing PDB_REDO .pdb's (stuff). Continues over .json entry if PDB exists and deletes entry if PDB is missing.

In [None]:
#Removes unnecessary entries in json file based on existing PDB_REDO pdb's in a directory
import json
import os
import re

#Initialize stuff
stuff = '/Users/jameslucas/Kortemme_Rotation/PDB_REDO/'
os.chdir(stuff)
jsoninfo  = json.load(open("/Users/jameslucas/Kortemme_Rotation/data/blank_job_dict.json"))
del_list = []

#Checks if PDB_REDO exists for each jsoninfo entry, adds unwanted entries to list
for i in jsoninfo:
    parsed = re.sub("/", ' ', jsoninfo[i]["input_file_list"][0])
    data, filenum, pdbfile = parsed.split()
    if os.path.isfile(stuff+pdbfile):
        continue 
    else:
        del_list.append(i)

#Sorts list for aesthetics
del_list.sort()
print del_list

#Deletes unwanted entries from jsoninfo
for item in del_list:
    jsoninfo.pop(item)

#Write updated jsoninfo to file
open("blank_job_dict_updated.json", "w").write(
    json.dumps(jsoninfo, sort_keys=True,separators=(',', ': ')))

## PDB_REDO data setup
For each entry in a .json file (jsoninfo), this scripts takes all of the information from the original Zemu dataset (source) sans .pdb and combines it with the correct PDB_REDO .pdb file from another directory (PDB_REDO) in a new destination folder (dest).

In [None]:
#Copies subdirectories into new data directory, ignores .pdb's
#Copies and replaces REDO_PDB .pdb's into new data directory
import os
import shutil
import json

jsoninfo = json.load(open("/netapp/home/james.lucas/PDB_REDO/blank_job_dict_updated.json"))
source = '/netapp/home/james.lucas/160322-james-backrub-rscript-full/data'
dest = '/netapp/home/james.lucas/Zemu-PDB_REDO_Dataset/data'
PDB_REDO = '/netapp/home/james.lucas/PDB_REDO/'
for i in jsoninfo:
    parsed = re.sub("/", ' ', jsoninfo[i]["input_file_list"][0])
    data, filenum, pdbfile = parsed.split()
    #Copy Resfiles and stuff from source to PDB_REDO data directory
    shutil.copytree(source + i, dest + i, ignore = shutil.ignore_patterns('*.pdb'))
    #Copy PDB_REDO pdb's from PDB_REDO directory to new data directory
    shutil.copy2(PDB_REDO + i + "/" + pdbfile, dest + i + "/" + pdbfile)

## Resfile_Editor.py
Literally iterates through all directories in /data and replaces 'NATRO' with 'NATAA' in mutation.resfile and dumps that into a new file mutation_repack.resfile.

In [None]:
#Edits all resfiles to allow for repack only
import os

def resfile_editor(workingdir):
    for i in os.listdir(workingdir):
        if os.path.isdir(workingdir + i):
            resfile = open(workingdir + i + '/mutations.resfile', 'r')
            outfile = open(workingdir + i + '/mutations_repack.resfile', 'w')
            for line in resfile:
                if line.strip() == 'NATRO':
                    outfile.write('NATAA\n')
                else:
                    outfile.write(line)
            outfile.close()
            
datapath = os.path.relpath('/Users/jameslucas/Kortemme_Rotation/data', os.getcwd())
os.chdir(datapath)
workingdir = os.getcwd() + '/'
resfile_editor(workingdir)

## Strip PDBs
This script reads PDB IDs from a .csv (generated from List_PDBs.py, manually sorted for PDB_REDO only structures) and opens the original pdb file (pdb_original). It looks at the name to see which chains are needed and only writes those chains to a new PDB file (pdb_stripped).

In [None]:
import pandas as pd
import os

def strip_pdbs(df):
    for i in df.iterrows():
        pdb_original = open(os.getcwd() + '/PDB_REDO/' + i[1]['PDB'].upper() + '_' + i[1]['Chains'] + '.pdb', 'r')
        pdb_stripped = open(os.getcwd() + '/PDB_REDO_Stripped/' + i[1]['PDB'].upper() + '_' + i[1]['Chains'] + '.pdb', 'w')
        for line in pdb_original:
            parsed = line.split()
            if parsed[0].strip() == 'ATOM':
                if parsed[4] in i[1]['Chains']:
                    pdb_stripped.write(line)
                else:
                    continue
            elif parsed[0].strip() == 'HETATM':
                if parsed[4] in i[1]['Chains']:
                    pdb_stripped.write(line)
                else:
                    continue
            else:
                pdb_stripped.write(line)
                
df = pd.read_csv('/PDB_REDO/PDB_List_REDO.csv')

try:
    os.mkdir('PDB_REDO_Stripped')
except:
    print 'PDB_REDO_Stripped already exists'

strip_pdbs(df)

## Zip
Goes through subdirectories in output and zips if all desired files are present, then deletes original directory (!!!). Set home variable to output directory.

In [None]:
import os
import shutil
import zipfile

def zippify(outputdir):
    for dirname, subdirs, files in os.walk(outputdir):
        if len(files) == 14: ##Number of desired files present
            os.chdir(dirname)
            myzip = zipfile.ZipFile('%s.zip' % dirname, mode = 'w')
            for j in files:
                myzip.write(j)
            myzip.close()
            #os.removedirs(dirname)
            home
        else:
            continue
            
home = os.chdir('/home/james/DDGBenchmarks_Test/')
#outputdir = os.chdir('/netapp/home/james.lucas/160322-james-backrub-rscript-full/output')
outputdir = os.getcwd()
zippify(outputdir)

# Scratch Space
An area to test bits of code and keep old cells

In [None]:
#Zipimport os

id = 99
pwd = os.getcwd()
print pwd

new_id = '12345678' + str(id)
new_id = new_id[-4:]
print new_id

In [None]:
target = ['23A','34B','45C','56','67','78','89']

targetlist = ''
for i in target:
    targetlist = targetlist + i + ','
targetlist = targetlist[:-1]
print targetlist

import os
print os.getcwd()

In [None]:
import os

os.chdir('/home/james.lucas/Rotation/DDGBenchmarks_Test/data/')
data = os.getcwd()

for datadir in os.listdir(os.getcwd()):
    if os.path.isdir(datadir):
        for files in os.listdir(datadir):
            print files
    

In [None]:
#qsub commands
qsub
qhold (job #)
qrls (job #).(task #'s):1

In [None]:
#Submit script!!!
#OLD VERSION, NO LONGER IN USE

import json
import re
import pandas as pd
from finalize_JL import find_neighbors
from finalize_JL import read_mutations_resfile
import subprocess

#Parses dataset .json file and outputs chain to move and input PDB file directory
def json_parser():
    
    asdf = open("/Users/jameslucas/Kortemme_Rotation/blank_job_dict_copy.json")
    jsonfile = json.load(asdf)
    
    key = sorted(jsonfile.keys())[0]
    chaintomove = jsonfile[key]["%%chainstomove%%"]
    directory = jsonfile[key]['input_file_list'][0]
    
    return chaintomove, directory

#Finds neighbors within 8A and adds position and Chain information
def neighbors_list(pdb_filepath, pdb_file):
    neighbors = find_neighbors("/Users/jameslucas/Kortemme_Rotation/"+pdb_filepath, "/Users/jameslucas/Kortemme_Rotation/" + pdb_file, 8)
    pivotlist = ''
    for i in neighbors:
        string_parse = re.sub("[(),']",'', str(i))
        for s in string_parse.split():
            if s.isdigit():
                pivotlist = pivotlist + s
            else:
                pivotlist = pivotlist + s + ','
                
    pivotlist = pivotlist[:-1]
    return pivotlist

#Reads resfile and returns mutation position+chain and type
def resfile_stuff(pdb_filepath):
    resfile = read_mutations_resfile("/Users/jameslucas/Kortemme_Rotation/"+pdb_filepath)
    for i in resfile:
        position = i[0]
        chain = i[1]
        mut_to = i[3]
    return position, chain, mut_to
    
#Prints CMD input with PDBID, associated mutation, and pivot residues
def bash(chaintomove, pdb_file):
    #Working directory
    workingdir = '/Users/jameslucas/Kortemme_Rotation/output/'
    #Removes PDB file from path, saves in variable data_dir
    pdb_file_parse = re.sub("/",' ', str(pdb_file))
    data, filenum, pdbtemp = pdb_file_parse.split()
    data_dir = data + "/" + filenum
    PDBID = pdbtemp[:-4]
    
    #Makes a folder for data dumping
    os.mkdir(workingdir + filenum)
    
    #Dictionary: 1- to 3-letter code
    res_dict = {
        'A':'ALA',
        'C':'CYS',
        'D':'ASP',
        'E':'GLU',
        'F':'PHE',
        'G':'GLY',
        'H':'HIS',
        'I':'ILE',
        'K':'LYS',
        'L':'LEU',
        'M':'MET',
        'N':'ASN',
        'P':'PRO',
        'Q':'GLN',
        'R':'ARG',
        'S':'SER',
        'T':'THR',
        'V':'VAL',
        'W':'TRP',
        'Y':'TYR'
    }
        
    #Assigns function output to variables for bash input (pivot_residues, target, new_res)
    target, chain, new_res_one = resfile_stuff(data_dir)
    new_res_three = res_dict[new_res_one]
    pivot_residues = neighbors_list(data_dir, pdb_file)
    
####All the variables and stuff for printing out the bash script
    
#Local Testing - All things

    arg = ['/Users/jameslucas/rosetta_src_2016.02.58402_bundle/main/source/bin/rosetta_scripts.macosclangrelease',
           '-s',
           '/Users/jameslucas/Kortemme_Rotation/%s/%s.pdb' %(data_dir, PDBID),
           '-parser:protocol',
           '/Users/jameslucas/Kortemme_Rotation/DDG_Test.xml',
           '-ignore_unrecognized_res',
           '-out:path:pdb',
           workingdir + filenum,
           '-parser:script_vars',
           'target=%s' %(target),
           'new_res=%s' %(new_res_three),
           'pivot_residues=%s' %(pivot_residues),
           '-nstruct 5'
          ]
    
    outfile_path = os.path.join(workingdir, 'rosetta.out')
    rosetta_outfile = open(outfile_path, 'w')
    rosetta_process = subprocess.Popen(arg, stdout=rosetta_outfile, cwd=workingdir)
    rosetta_outfile.close()

#    subprocess.call(arg)
        
chaintomove, pdb_file = json_parser()
bash(chaintomove, pdb_file)

In [None]:
##pyRMSD method for mutation_rmsd() in StructuralAnalysisMethods
    ref_sorted = []
    alt_sorted = []
    
    for i in ref_atoms:
        for j in alt_atoms:
            if i.get_full_id()[2] == j.get_full_id()[2]:
                if i.get_full_id()[3] == j.get_full_id()[3]:
                    if i.get_full_id()[4] == j.get_full_id()[4]:
                        ref_sorted.append(i.get_coord())
                        alt_sorted.append(j.get_coord())
   
    ref_atoms_array = np.asarray(ref_sorted)
    alt_atoms_array = np.asarray(alt_sorted)
    
    print ref_atoms_array
    print alt_atoms_array
    
    assert( len(ref_atoms) == len(alt_atoms) )
    if 'QCP_CUDA_MEM_CALCULATOR' in availableCalculators():
        pyrmsd_calc = 'QCP_CUDA_MEM_CALCULATOR'
    else:
        pyrmsd_calc = 'QCP_SERIAL_CALCULATOR'
    calculator = pyRMSD.RMSDCalculator.RMSDCalculator(pyrmsd_calc, np.array([ref_atoms_array, alt_atoms_array]))
    
    print ref_atoms_array
    print alt_atoms_array
    
    return calculator.pairwiseRMSDMatrix()[0]