From 94cdf61d9c67f70d5411b2fa43740b4950ec4720 Mon Sep 17 00:00:00 2001
From: Niels Hulstaert <nielshulstaert@gmail.com>
Date: Wed, 21 Oct 2015 15:58:46 +0200
Subject: [PATCH] command line script added

---
 scop3D/command_line.py | 591 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 591 insertions(+)
 create mode 100644 scop3D/command_line.py

diff --git a/scop3D/command_line.py b/scop3D/command_line.py
new file mode 100644
index 0000000..3b1f4cc
--- /dev/null
+++ b/scop3D/command_line.py
@@ -0,0 +1,591 @@
+#__author__ = 'elien'
+#scop3D 2.0
+#situation: protein + multiple sequences as input
+
+from Bio.Align.Applications import MuscleCommandline
+from StringIO import StringIO
+from Bio import AlignIO
+from Bio.Align import AlignInfo
+from weblogolib import *
+from Bio.Blast import NCBIWWW
+from Bio.Blast import NCBIXML
+from Bio import PDB
+from Bio.PDB.Polypeptide import PPBuilder
+from Bio import SeqIO
+from Bio.PDB import *
+
+import sys
+import shutil
+import os
+import subprocess
+import math
+from math import log
+
+#multiple sequence alignment
+#1. Retrieval of data input
+print('!The analysis can be performed with maximum 500 sequences!')
+myProtMultInput = raw_input('Enter a filename: ')
+#   test to see if the file was loaded
+with open(myProtMultInput, 'r') as f:
+    first_line = f.readline()
+    print('The first line of your input is printed as verification that the correct file is used in the analysis:')
+    print(first_line)
+myMuscleInput = myProtMultInput
+
+myCount = 0
+myInputanalyze = open(myProtMultInput, 'r')
+myInputlines = myInputanalyze.readlines()
+myInputanalyze.close()
+for myInputline in myInputlines:
+    if myInputline.startswith('>'):
+        myCount = myCount + 1
+print('number of sequences in analysis: '+str(myCount))
+
+#2. Multiple sequence alignment with Muscle
+#       Reference: Edgar RC (2004) MUSCLE: multiple sequence alignment with high accuracy and high throughput.
+#                   Nucleic Acids Res 32(5), 1792-1797
+print('You have to specify your muscle location:')
+myMuscleLoc = raw_input('Enter your muscle location: ')
+print(myMuscleLoc)
+myMuscleExe = myMuscleLoc+'\muscle.exe'
+print(myMuscleExe)
+myAlignmentOutput = 'output_multiple_sequence_alignment.txt'
+muscle_cline = MuscleCommandline(myMuscleExe, input=myMuscleInput, out=myAlignmentOutput, clwstrict=True)
+muscle_cline()
+alignment = open(myAlignmentOutput, "r")
+print_alignment = alignment.read()
+print print_alignment
+alignment.close()
+
+#3. Determination of similarity and identity between the different sequences
+os.mkdir('temp_sec')
+os.mkdir('pairwise_alignments')
+#for each sequence a fasta-file will be created with the aid of seqretsplit from emboss
+#   Rice P, Longden I, Bleasby A (2000) EMBOSS: The European Molecular Biology Open Software Suite.
+#   Trends in Genetics 16(6), 276-277
+subprocess.call(['C:\mEMBOSS\seqretsplit.exe','-nofeature', '-sequence', myProtMultInput, '-nofirstonly', '-auto', '-osdirectory_outseq', 'temp_sec'])
+
+myFastafiles = os.listdir('temp_sec')
+myFastaIter = iter(myFastafiles)
+
+for myFasta in myFastaIter:
+    if myFasta.endswith('fasta'):
+        print(myFasta)
+        myFastab = 'temp_sec\\'+myFasta
+        for myFasta2 in os.listdir('temp_sec'):
+            myFasta2b = 'temp_sec\\'+myFasta2
+
+#a pairwise alignment among all sequences is performed with the aid of needle
+#   Needleman SB, Wunsch CD (1970) A general method applicable to the search for similarities in the amino acid sequence of two proteins
+#   J Mol Biol 48(3), 443-453
+            subprocess.call(['C:\mEMBOSS\\needle.exe', '-asequence', myFastab, '-bsequence', myFasta2b, '-noendweight', '-endopen', '10.0', '-endextend', '0.5', '-brief', '-aformat', 'srspair', '-auto', '-aname_outfile', myFasta+'_'+myFasta2, '-adirectory_outfile', 'pairwise_alignments'])
+
+        os.remove(myFastab)
+        print(myFastafiles)
+
+os.rmdir('temp_sec')
+
+#similarity and identity between the different sequences is retrieved
+myIdent = open('identity','w')
+myIdentrel = open('identity_percentage','w')
+mySim = open('similarity','w')
+mySimrel = open('similarity_percentage','w')
+
+myIdentdict = {}
+myIdentreldict = {}
+mySimdict = {}
+mySimreldict = {}
+
+myAlignfiles = os.listdir('pairwise_alignments')
+for myAlign in myAlignfiles:
+    if myAlign.endswith('.needle'):
+        print(myAlign)
+        myAlignb = 'pairwise_alignments\\'+myAlign
+        myAlignfile = open(myAlignb, 'r')
+        mySeq1 = 'ND'
+        mySeq2 = 'ND'
+        for myLine in myAlignfile:
+            if '# 1: ' in myLine:
+                mySeq1 = myLine[5:-1]
+            if '# 2: ' in myLine:
+                mySeq2 = myLine[5:-1]
+            if '# Identity' in myLine:
+                mySplit = myLine.split()
+                myIdentity = mySplit[2]
+                myIdentityrel = mySplit[3][1:-1]
+            if '# Similarity' in myLine:
+                mySplit = myLine.split()
+                mySimilarity = mySplit[2]
+                mySimilarityrel = mySplit[3][1:-1]
+        print('sequences: '+mySeq1+' and '+mySeq2+' identity: '+myIdentity+' identity percent: '+myIdentityrel+' similarity: '+mySimilarity+' similarity percent: '+mySimilarityrel)
+        myIdentdict[mySeq1+'\t'+mySeq2] = myIdentity
+        myIdentreldict[mySeq1+'\t'+mySeq2] = myIdentityrel
+        mySimdict[mySeq1+'\t'+mySeq2] = mySimilarity
+        mySimreldict[mySeq1+'\t'+mySeq2] = mySimilarityrel
+        myAlignfile.close()
+
+for myKey in myIdentdict:
+    myIdent.write(myKey+'\t'+myIdentdict[myKey]+'\n')
+for myKey in myIdentreldict:
+    myIdentrel.write(myKey+'\t'+myIdentreldict[myKey]+'\n')
+for myKey in mySimdict:
+    mySim.write(myKey+'\t'+mySimdict[myKey]+'\n')
+for myKey in mySimreldict:
+    mySimrel.write(myKey+'\t'+mySimreldict[myKey]+'\n')
+
+myIdent.close()
+myIdentrel.close()
+mySim.close()
+mySimrel.close()
+
+#4. Determination of the consensus sequence
+#   Rice P, Longden I, Bleasby A (2000) EMBOSS: The European Molecular Biology Open Software Suite.
+#   Trends in Genetics 16(6), 276-277
+print("calculation of the consensus sequence")
+subprocess.call(['C:\mEMBOSS\cons.exe', '-sequence', myAlignmentOutput, '-plurality', '1', '-identity', '0', '-auto', '-outseq', 'consensus', '-name', 'consensus'])
+myConsensus = open('consensus', "r")
+print_consensus = myConsensus.readlines()
+print(print_consensus)
+shutil.copy2('consensus', 'consensus.fasta')
+myCons = ""
+for myLine in print_consensus:
+    if myLine.startswith('>'):
+        myCons = ""
+    else:
+        myLine = myLine[:-1]
+        myCons = "".join((myCons, myLine))
+myConsensus.close()
+
+#5. Calculation of the abundance matrices - biopython module
+myMatrixinput = AlignIO.read(myAlignmentOutput, "clustal")
+mySummary_align = AlignInfo.SummaryInfo(myMatrixinput)
+myFrequency_matrix = mySummary_align.pos_specific_score_matrix(myCons)
+myFrequency_matrix_str = str(myFrequency_matrix)
+
+#Abundance matrix in absolute values
+print(myFrequency_matrix_str)
+myAbunout = open('abundance_matrix.txt', 'w')
+myAbunout.write(myFrequency_matrix_str)
+myAbunout.close()
+
+#Abundance matrix in percentage
+myAbunout2 = open('abundance_matrix.txt', 'r')
+myLines = myAbunout2.readlines()
+myAbunout2.close()
+
+myAbunperout = open('abundance_matrix_percentage.txt', 'w')
+for myLine in myLines:
+    mySplit = myLine.split()
+    if myLines.index(myLine) == 0:
+        myLine = myLine[:-1]
+        myAbunperout.write('\t'+"\t".join(mySplit))
+    else:
+        myLine = myLine[:-1]
+        for mySplitel in mySplit:
+            if mySplit.index(mySplitel) == 0:
+                myAbunperout.write('\n'+mySplit[0]+'\t')
+            else:
+                myPercent = (float(mySplitel) * 100) / float(myCount)
+                #print(myPercent)
+                myAbunperout.write(str(myPercent)+'\t')
+myAbunperout.close()
+
+#6. Calculation of the entropy
+#retrieval of the input needed (abundance_matrix.txt)
+myAbinput = open('Abundance_matrix.txt', 'r')
+myAbinputlines = myAbinput.readlines()
+myAbinput.close()
+
+myEntropyout = open('entropy.txt', 'w')
+
+print('line with residues:'+myAbinputlines[0])
+myAAlist = myAbinputlines[0].split()
+print(myAAlist)
+myN = len(myAAlist)
+print(str(myN))
+
+for myAbinputline in myAbinputlines:
+    mySum = 0
+    if myAbinputline == myAbinputlines[0]:
+        print("already processed: "+ myAbinputline)
+    else:
+        print(myAbinputline)
+        myEsplit = myAbinputline.split()
+        for myE in myEsplit:
+            if myE == myE[0]:
+                print('residue: '+ myE)
+                myEntropyout.write(myE)
+            else:
+                if myE != '0.0':
+                    myPi = (float(myE) / float(myCount))
+                    print('pi = '+str(myPi))
+                    mySum = mySum + (myPi * math.log(myPi,2))
+                    print("mySum = "+str(mySum))
+        mySum = -float(mySum)
+        myEntropy = (float(mySum) / math.log(float(myN),2)) * 100
+        print('entropy: '+str(myEntropy))
+        if myEntropy == -0.0:
+            myEntropy = '0.00'
+            print(myEntropy)
+            myEntropyout.write('\t'+myEntropy+'\n')
+        else:
+            myRound = round(myEntropy, 2)
+            print(myRound)
+            myEntropyout.write('\t'+str(myRound)+'\n')
+
+myEntropyout.close()
+
+#retrieval of the percent value for the consensus residue
+myAbperinput = open('abundance_matrix_percentage.txt', 'r')
+myAbperinputlines = myAbperinput.readlines()
+myAbperinput.close()
+
+myConsAbperout = open('consensus_frequency_mostabundant.txt', 'w')
+myConsVarperout = open('variation.txt', 'w')
+
+for myAbperinputline in myAbperinputlines:
+    if myAbperinputline == myAbperinputlines[0]:
+        print(myAbperinputline)
+    else:
+        print(myAbperinputline)
+        myApsplit = myAbperinputline.split()
+        myConsAbperout.write(myApsplit[0])
+        myConsVarperout.write(myApsplit[0])
+        print(myApsplit[0])
+        print(max(myApsplit[1:]))
+        myVar = 100 - float(max(myApsplit[1:]))
+        myConsAbperout.write('\t'+max(myApsplit[1:])+'\n')
+        myConsVarperout.write('\t'+str(myVar)+'\n')
+
+myConsAbperout.close()
+myConsVarperout.close()
+
+#6. Calculation of the Weblogo
+#   Crooks GE, Hon G, Chandonia JM, Brenner SE (2004) WebLogo: a sequence logo generator.
+#   Genome Research 14, 1188-1190
+
+fin = open("output_multiple_sequence_alignment.txt")
+seqs = read_seq_data(fin)
+data = LogoData.from_seqs(seqs)
+options = LogoOptions()
+options.title = "sequence logo"
+format = LogoFormat(data, options)
+fout = open("logo.eps", "w")
+eps = eps_formatter(data, format)
+fout.write(eps)
+
+myAAdict = {'A':'ALA', 'C':'CYS', 'D':'ASP', 'E':'GLU', 'F':'PHE', 'G':'GLY', 'H':'HIS', 'I':'ILE',
+			'K':'LYS', 'L':'LEU', 'M':'MET', 'N':'ASN', 'P':'PRO', 'Q':'GLN', 'R':'ARG', 'S':'SER',
+			'T':'THR', 'V':'VAL', 'W':'TRP', 'Y':'TYR'}
+
+#7. Retrieval of the pdb-file
+print('Do you have a pdb-file of the protein you would like to analyze? Yes/No')
+myAnswer = raw_input('Your answer: ')
+print(myAnswer)
+myPDBfile = ()
+if myAnswer == 'Yes':		#when the user has a pdb-file
+	myPDBfile = raw_input('Enter your pdb-filename (without extensions): ')
+	with open(myPDBfile+'.pdb', 'r') as f:
+		myFirstline = f.readline()
+		print(myFirstline)
+elif myAnswer == 'No':		#when the user does not have a pdb-file, a blast against the pdb will be performed
+	print('Blast (www) will be performed with the consensus sequence')
+	mySeq = open('consensus').read()
+	print(mySeq)
+	print('The blast might take some time.')
+	result_handle = NCBIWWW.qblast('blastp', 'pdb', mySeq)
+	myBlastoutput = open('blast_results.xml', 'w')
+	myBlastoutput.write(result_handle.read())
+	myBlastoutput.close()
+	result_handle.close()
+	
+	blastresult_handle = open('blast_results.xml')
+	blastsummary = open('blast_summary.txt', 'w')
+	blast_record = NCBIXML.read(blastresult_handle)
+	global choose_pdbChoices
+	for alignment in blast_record.alignments:
+		for hsp in alignment.hsps:
+			title = alignment.title
+			mySplit = title.split('|')
+			myPdb = mySplit[3]
+			e_value = hsp.expect
+			length = alignment.length
+			identities = hsp.identities
+			positives = hsp.positives
+			gaps = hsp.gaps
+			blastsummary.write("title: " + str(title) + "\n" + "score: " + str(e_value) + "\n" +
+						"length: " + str(length) + "\n" +
+						"identities: " + str(identities) + "\n" +
+						"positives: " + str(positives) + "\n" +
+						"gaps: " + str(gaps) + "\n" + myPdb + "\n\n" )
+			print("title: " + str(title[1:100]) + " ..." +"\n" + "score: " + str(e_value) + "\n" +
+						"length: " + str(length) + "\n" +
+						"identities: " + str(identities) + "\n" +
+						"positives: " + str(positives) + "\n" +
+						"gaps: " + str(gaps) + "\n" + myPdb + "\n\n" )
+	blastsummary.close()
+
+	print('Which PDB-file would you like to work with?')
+	print('As a rule of thumb, a structure with high sequence coverage, identity and similarity is preferred.')
+	myPDBfile = raw_input('Your choice of PDB: ')
+
+else:
+	print('Your answer was neither Yes or No. Scop3D will end.')
+	sys.exit()
+
+#8. adjustment of the pdb-file
+print('pdbfile which you will work with: '+myPDBfile)
+
+#8a. selection of the chains to adjust
+structure = ()
+
+try:
+	pdbl = PDB.PDBList()
+	pdbl.retrieve_pdb_file(myPDBfile)
+	parser = PDB.PDBParser()
+	structure = parser.get_structure(myPDBfile, myPDBfile[1:3].lower()+'/pdb'+myPDBfile.lower()+'.ent')
+
+	if myAnswer == 'No':
+		io = PDBIO()
+		io.set_structure(structure)
+		io.save(myPDBfile+'.pdb')
+
+except:
+	print('you structure is not available in the PDB')
+	parser = PDB.PDBParser()
+	structure = PDB.PDBParser().get_structure(myPDBfile,myPDBfile+'.pdb')
+
+chains = [chain for chain in structure.get_chains()]
+print(chains)
+ppb = PPBuilder()
+for chain in chains:
+		print(chain)
+ 		print(chain.get_id())
+ 		for chainseq in ppb.build_peptides(chain):
+ 			print(chainseq.get_sequence())
+ 			mySeq = chainseq.get_sequence()
+
+ 			myFastaA = '>'+myPDBfile+'_'+chain.get_id()+'\n'+chainseq.get_sequence()
+ 			print(myFastaA)
+ 			myFastaAfile = open(myPDBfile+'_'+chain.get_id()+'.fasta', 'w')
+ 			myFastaAfile.write('>'+myPDBfile+'_'+chain.get_id()+'\n'+str(mySeq))
+ 			myFastaAfile.close()
+
+ 			myFastafiles = os.listdir('.')
+ 			myFastaIter = iter(myFastafiles)
+
+ 			for myFasta in myFastaIter:
+ 				if myPDBfile+'_'+chain.get_id() in myFasta:
+ 					print(myFasta)
+
+ 					#a pairwise alignment among all sequences is performed with the aid of needle
+ 					#   Needleman SB, Wunsch CD (1970) A general method applicable to the search for similarities in the amino acid sequence of two proteins
+ 					#   J Mol Biol 48(3), 443-453
+ 					subprocess.call(['C:\mEMBOSS\\needle.exe', '-asequence', myFasta, '-bsequence', 'consensus', '-noendweight', '-endopen', '10.0', '-endextend', '0.5', '-brief', '-aformat', 'markx3', '-auto'])
+
+myEntinput = open('entropy.txt', 'r')
+myEntlines = myEntinput.readlines()
+myEntinput.close()
+myVarinput = open('variation.txt', 'r')
+myVarlines = myVarinput.readlines()
+myVarinput.close()
+
+print('Do you know which chains have to be adjusted? Yes/No')
+myAnswer = raw_input('Your answer: ')
+print(myAnswer)
+if myAnswer == 'Yes':
+	print('Give the chains you would like to adjust. (Example: A,C,D)')
+	myChainanswer = raw_input('Your answer: ')
+	myChainlist = myChainanswer.split(',')
+	print(myChainlist)
+
+	for myEl in myChainlist:
+		myReorganize = open(myPDBfile+'_'+myEl+'_sequences', 'w')
+ 		myIterator = SeqIO.parse(myPDBfile+'_'+myEl+'.needle', 'fasta')
+ 		mySeqlist = list(myIterator)
+ 		for mySeq in mySeqlist:
+ 			print(mySeq)
+ 			myReorganize.write('>'+str(mySeq.description)+'\n'+str(mySeq.seq)+'\n')
+ 		myReorganize.close()
+
+ 		myDatafile = open(myPDBfile+'_'+myEl+'_data.txt', 'w')
+ 		myReorganize = open(myPDBfile+'_'+myEl+'_sequences', 'r')
+ 		myReorganizelines = myReorganize.readlines()
+ 		myReorganize.close()
+ 		myDatafile.write(myReorganizelines[0][1:7]+'\t')
+ 		myDatafile.write(myReorganizelines[2][1:7]+'\n')
+ 		myListcons = list(myReorganizelines[3])
+ 		myListchain = list(myReorganizelines[1])
+ 		for i in range(len(myListchain)):
+ 			if myListchain[i] != '-':
+				myDatafile.write(myListchain[i]+'\t')
+				myDatafile.write(myListcons[i]+'\t')
+				n = i
+				print(n)
+				if myListcons[i] == '-':
+					myEntropy = '120.00'
+					myDatafile.write(myEntropy+'\t')
+					myVariation = '120.00'
+					myDatafile.write(myVariation+'\n')
+					n = i+1
+				else:
+					if n > len(myEntlines)-1:
+						print('end of the data')
+						break
+					else:
+						line = myEntlines[n][:-1]
+						myList = line.split('\t')
+						print(myList)
+						print(myList[1])
+						myEntropy = "%.2f" % float(myList[1])
+						myDatafile.write(myEntropy+'\t')
+						myLine = myVarlines[n][:-1]
+						myVarlist = myLine.split('\t')
+						print(myVarlist)
+						myVariation = "%.2f" % float(myVarlist[1])
+						myDatafile.write(myVariation+'\n')
+		myDatafile.close()
+
+else:
+	myIdentdict = {}
+	myFiles = os.listdir('.')
+	myFilesiter = iter(myFiles)
+	for myFile in myFilesiter:
+		if myFile.endswith('.needle') and myFile.startswith(myPDBfile.lower()):
+ 			print('bestand: '+myFile)
+			myChain = myFile[-8:-7].upper()
+ 			print('keten: '+myChain)
+ 			myAlignchain = open(myFile, 'r')
+ 			for myLine in myAlignchain:
+ 				if myLine.startswith('# Identity:'):
+ 					print('identiteitlijn: '+myLine)
+ 					mySplit = myLine.split('(')
+ 					myIdent = mySplit[1][:-3]
+ 					print('identiteit: '+myIdent)
+ 					myIdentdict[myChain] = myIdent
+	print('chainsdictionary: ')
+	print(myIdentdict)
+
+	for key,val in myIdentdict.iteritems():
+ 		if val == max(myIdentdict.values()):
+ 			print(key)
+ 			myReorganize = open(myPDBfile+'_'+key+'_sequences', 'w')
+ 			myIterator = SeqIO.parse(myPDBfile+'_'+key+'.needle', 'fasta')
+ 			mySeqlist = list(myIterator)
+ 			for mySeq in mySeqlist:
+ 				print(mySeq)
+ 				myReorganize.write('>'+str(mySeq.description)+'\n'+str(mySeq.seq)+'\n')
+ 			myReorganize.close()
+
+ 			myDatafile = open(myPDBfile+'_'+key+'_data.txt', 'w')
+ 			myReorganize = open(myPDBfile+'_'+key+'_sequences', 'r')
+ 			myReorganizelines = myReorganize.readlines()
+ 			myReorganize.close()
+ 			myDatafile.write(myReorganizelines[0][1:7]+'\t')
+ 			myDatafile.write(myReorganizelines[2][1:7]+'\n')
+ 			myListcons = list(myReorganizelines[3])
+ 			myListchain = list(myReorganizelines[1])
+ 			for i in range(len(myListchain)):
+ 				if myListchain[i] != '-':
+					myDatafile.write(myListchain[i]+'\t')
+					myDatafile.write(myListcons[i]+'\t')
+					n = i
+					print(n)
+					if myListcons[i] == '-':
+						myEntropy = '120.00'
+						myDatafile.write(myEntropy+'\t')
+						myVariation = '120.00'
+						myDatafile.write(myVariation+'\n')
+						n = i+1
+					else:
+						if n > len(myEntlines)-1:
+							print('end of the data')
+							break
+						else:
+							line = myEntlines[n][:-1]
+							myList = line.split('\t')
+							print(myList)
+							print(myList[1])
+							myEntropy = "%.2f" % float(myList[1])
+							myDatafile.write(myEntropy+'\t')
+							myLine = myVarlines[n][:-1]
+							myVarlist = myLine.split('\t')
+							print(myVarlist)
+							myVariation = "%.2f" % float(myVarlist[1])
+							myDatafile.write(myVariation+'\n')
+			myDatafile.close()
+
+#8b. adjustment of the B-values (towards entropy and conservation)
+myChainlist = []
+myFiles = os.listdir('.')
+myFilesiter = iter(myFiles)
+for myFile in myFilesiter:
+ 	if myFile.endswith('_data.txt') and myFile.startswith(myPDBfile):
+  		print('bestand: '+myFile)
+ 		myChain = myFile[-10:-9].upper()
+  		print('keten: '+myChain)
+ 		myChainlist.extend(myChain)
+print(myChainlist)
+
+myPDBin = open(myPDBfile+'.pdb', 'r')
+myLines = myPDBin.readlines()
+myPDBin.close()
+
+myPDBout = open(myPDBfile+'_entropy.pdb','w')
+myPDBvarout = open(myPDBfile+'_variation.pdb','w')
+
+for myLine in myLines:
+ 	if myLine.startswith('ATOM'):
+  		print('line: '+myLine)
+  		myChain = myLine[21:22]
+  		myRespdb = myLine[17:20]
+  		myBvalue = myLine[60:66]
+  		myPos = myLine[22:26]
+  		print('summary: '+myChain+'\t'+myRespdb+'\t'+myBvalue+'\t'+myPos)
+  		myI = myLines.index(myLine)
+  		print('line index: '+str(myI))
+
+  		if myChain in myChainlist:
+  			myBinfo = open(myPDBfile+'_'+myChain+'_data.txt', 'r')
+  		 	myBlines = myBinfo.readlines()
+  		 	myBinfo.close()
+
+   			mySplit = myBlines[1].split('\t')
+  		 	myRes = mySplit[0]
+  		 	print('res from data: '+myRes)
+			myEnt = mySplit[2]
+  		 	myVar = mySplit[3].strip('\n').strip('\r')
+
+  		 	if myRespdb == myAAdict[myRes.upper()]:
+  				myPDBout.write((myLine[0:60]+' '+myEnt+'\n'))
+				myPDBvarout.writelines((myLine[0:60]+' '+myVar+'\n'))
+
+  				myIplus = myI + 1
+  				myTest = myLines[myIplus]
+ 				print('control of next line: '+myTest)
+  				if myTest[22:26] != myPos:
+  		 	 		myBinfo = open(myPDBfile+'_'+myChain+'_data.txt', 'w')
+  		 	 		counter = 0
+  		 	 		for myBline in myBlines:
+ 						if counter == 1:
+ 						 	print('line to remove: '+myBline)
+ 							counter = counter + 1
+ 						else:
+ 							myBinfo.write(myBline)
+ 							counter = counter + 1
+ 					myBinfo.close()
+
+  			else:
+  				print('look at this!')
+ 				print(myRespdb+'-'+myRes+'-'+myAAdict[myRes])
+
+  		else:
+  			myPDBout.write(myLine[0:60]+'120.00'+'\n')
+			myPDBvarout.write(myLine[0:60]+'120.00'+'\n')
+
+  	else:
+  		myPDBout.write(myLine)
+		myPDBvarout.write(myLine)
+
+myPDBout.close()
+myPDBvarout.close()
\ No newline at end of file