Permalink
Browse files

Adding NMR files kindly contributed by Bob Bussell

  • Loading branch information...
chapmanb
chapmanb committed Jan 21, 2004
1 parent 842e2c3 commit 7bcfbce3af0896a0befecf8079de433542b6068f
Showing with 502 additions and 0 deletions.
  1. +75 −0 Bio/NMR/NOEtools.py
  2. +7 −0 Bio/NMR/__init__.py
  3. +235 −0 Bio/NMR/xpktools.py
  4. +1 −0 CONTRIB
  5. +14 −0 Doc/examples/nmr/noed.xpk
  6. +169 −0 Doc/examples/nmr/simplepredict.py
  7. +1 −0 setup.py
View
@@ -0,0 +1,75 @@
+# NOEtools.py: A python module for predicting NOE coordinates from
+# assignment data.
+#
+# The input and output are modelled on nmrview peaklists.
+# This modules is suitable for directly generating an nmrview
+# peaklist with predicted crosspeaks directly from the
+# input assignment peaklist.
+
+import string
+import sys
+sys.path=[sys.path,"/usr/people/robert/development/xpktools/"]
+import xpktools
+
+def predictNOE(peaklist,originNuc,detectedNuc,originResNum,toResNum):
+# Predict the i->j NOE position based on self peak (diagonal) assignments
+#
+# example predictNOE(peaklist,"N15","H1",10,12)
+# where peaklist is of the type xpktools.peaklist
+# would generate a .xpk file entry for a crosspeak
+# that originated on N15 of residue 10 and ended up
+# as magnetization detected on the H1 nucleus of
+# residue 12.
+# CAVEAT: The initial peaklist is assumed to be diagonal (self peaks only)
+# and currently there is not checking done to insure that this
+# assumption holds true. Check your peaklist for errors and
+# off diagonal peaks before attempting to use predictNOE.
+
+ returnLine="" # The modified line to be returned to the caller
+
+ datamap=_data_map(peaklist.datalabels)
+
+ # Construct labels for keying into dictionary
+ originAssCol = datamap[originNuc+".L"]+1
+ originPPMCol = datamap[originNuc+".P"]+1
+ detectedPPMCol = datamap[detectedNuc+".P"]+1
+
+ # Make a list of the data lines involving the detected
+ if (peaklist.residue_dict(detectedNuc).has_key(str(toResNum)) and
+ peaklist.residue_dict(detectedNuc).has_key(str(originResNum))):
+ detectedList=peaklist.residue_dict(detectedNuc)[str(toResNum)]
+ originList=peaklist.residue_dict(detectedNuc)[str(originResNum)]
+ returnLine=detectedList[0]
+
+ for line in detectedList:
+
+ aveDetectedPPM =_col_ave(detectedList,detectedPPMCol)
+ aveOriginPPM =_col_ave(originList,originPPMCol)
+ originAss =string.splitfields(originList[0])[originAssCol]
+
+ returnLine=xpktools.replace_entry(returnLine,originAssCol+1,originAss)
+ returnLine=xpktools.replace_entry(returnLine,originPPMCol+1,aveOriginPPM)
+
+ return returnLine
+
+
+def _data_map(labelline):
+# Generate a map between datalabels and column number
+# based on a labelline
+ i=0 # A counter
+ datamap={} # The data map dictionary
+ labelList=string.splitfields(labelline) # Get the label line
+
+ # Get the column number for each label
+ for i in range(len(labelList)):
+ datamap[labelList[i]]=i
+
+ return datamap
+
+def _col_ave(list,col):
+# Compute average values from a particular column in a string list
+ sum=0; n=0
+ for element in list:
+ sum=sum+string.atof(string.split(element)[col])
+ n=n+1
+ return sum/n
View
@@ -0,0 +1,7 @@
+"""Code for working with NMR data
+
+This directory currently contains contributions from
+
+Bob Bussell <rgb2003@med.cornell.edu> -- NOEtools and xpktools
+"""
+__all__ = ["NOEtools", "xpktools"]
View
@@ -0,0 +1,235 @@
+# xpktools.py: A python module containing function definitions and classes
+# useful for manipulating data from nmrview .xpk peaklist files.
+#
+# ********** INDEX of functions and classes **********
+#
+# XpkEntry class: A class suited for handling single lines of
+# non-header data from an nmrview .xpk file. This class
+# provides methods for extracting data by the field name
+# which is listed in the last line of the peaklist header.
+
+
+import string
+
+# * * * * * INITIALIZATIONS * * * * *
+HEADERLEN=6
+# * * * * * _______________ * * * * *
+
+class XpkEntry:
+ # Usage: XpkEntry(xpkentry,xpkheadline) where xpkentry is the line
+ # from an nmrview .xpk file and xpkheadline is the line from
+ # the header file that gives the names of the entries
+ # which is typcially the sixth line of the header (counting fm 1)
+ # Variables are accessed by either their name in the header line as in
+ # self.field["H1.P"] will return the H1.P entry for example.
+ # self.field["entrynum"] returns the line number (1st field of line)
+
+ def __init__(self,entry,headline):
+ self.fields={} # Holds all fields from input line in a dictionary
+ # keys are data labels from the .xpk header
+ datlist = string.split(entry)
+ headlist = string.split(headline)
+
+ i=0
+ for i in range(len(datlist)-1):
+ self.fields[headlist[i]]=datlist[i+1]
+ i=i+1
+
+ try:
+ self.fields["entrynum"]=datlist[0]
+ except IndexError, e:
+ pass
+
+class Peaklist:
+ # This class reads in an entire xpk file and returns
+ # Header file lines are available as attributes
+ # The data lines are available as a list
+ def __init__(self,infn):
+
+ self.data=[] # init the data line list
+
+ infile=open(infn,'r')
+
+ # Read in the header lines
+ self.firstline=string.split(infile.readline(),"\012")[0]
+ self.axislabels=string.split(infile.readline(),"\012")[0]
+ self.dataset=string.split(infile.readline(),"\012")[0]
+ self.sw=string.split(infile.readline(),"\012")[0]
+ self.sf=string.split(infile.readline(),"\012")[0]
+ self.datalabels=string.split(infile.readline(),"\012")[0]
+
+ # Read in the data lines to a list
+ line=infile.readline()
+ while line:
+ self.data.append(string.split(line,"\012")[0])
+ line=infile.readline()
+
+ def residue_dict(self,index):
+ # Generate a dictionary idexed by residue number or a nucleus
+ # The nucleus should be given as the input argument in the
+ # same form as it appears in the xpk label line (H1, 15N for example)
+
+ maxres=-1; minres=-1
+
+ # Cast the data lines into the xpentry class
+ self.dict={}
+ for i in range(len(self.data)):
+ line=self.data[i]
+ ind=XpkEntry(line,self.datalabels).fields[index+".L"]
+ key=string.split(ind,".")[0]
+
+ res=string.atoi(key)
+
+ if (maxres==-1):
+ maxres=res
+ if (minres==-1):
+ minres=res
+
+ maxres=max([maxres,res])
+ minres=min([minres,res])
+
+ if self.dict.has_key(str(res)):
+ # Append additional data to list under same key
+ templst=self.dict[str(res)]
+ templst.append(line)
+ self.dict[str(res)]=templst
+
+ else:
+ # This is a new residue, start a new list
+ self.dict[str(res)]=[line] # Use [] for list type
+
+ self.dict["maxres"]=maxres
+ self.dict["minres"]=minres
+
+ return self.dict
+
+ def write_header(self,outfn):
+ outfile=_try_open_write(outfn)
+ outfile.write(self.firstline);outfile.write("\012")
+ outfile.write(self.axislabels);outfile.write("\012")
+ outfile.write(self.dataset);outfile.write("\012")
+ outfile.write(self.sw);outfile.write("\012")
+ outfile.write(self.sf);outfile.write("\012")
+ outfile.write(self.datalabels);outfile.write("\012")
+ outfile.close()
+
+def _try_open_read(fn):
+# Try to open a file for reading. Exit on IOError
+ try:
+ infile=open(fn,'r')
+ except IOError, e:
+ print "file", fn, "could not be opened for reading - quitting."
+ sys.exit(0)
+ return infile
+
+def _try_open_write(fn):
+# Try to open a file for writing. Exit on IOError
+ try:
+ infile=open(fn,'w')
+ except IOError, e:
+ print "file", fn, "could not be opened for writing - quitting."
+ sys.exit(0)
+ return infile
+
+
+def replace_entry(line,fieldn,newentry):
+ # Replace an entry in a string by the field number
+ # No padding is implemented currently. Spacing will change if
+ # the original field entry and the new field entry are of
+ # different lengths.
+ # This method depends on xpktools._find_start_entry
+
+ start=_find_start_entry(line,fieldn)
+ leng=len(string.splitfields(line[start:])[0])
+ newline=line[:start]+str(newentry)+line[(start+leng):]
+ return newline
+
+def _find_start_entry(line,n):
+ # find the starting point character for the n'th entry in
+ # a space delimited line. n is counted starting with 1
+ # The n=1 field by definition begins at the first character
+ # This function is used by replace_entry
+
+ infield=0 # A flag that indicates that the counter is in a field
+
+ if (n==1):
+ return 0 # Special case
+
+ # Count the number of fields by counting spaces
+ c=1
+ leng=len(line)
+
+ # Initialize variables according to whether the first character
+ # is a space or a character
+ if (line[0]==" "):
+ infield=0
+ field=0
+ else:
+ infield=1
+ field=1
+
+
+ while (c<leng and field<n):
+ if (infield):
+ if (line[c]==" " and not (line[c-1]==" ")):
+ infield=0
+ else:
+ if (not line[c]==" "):
+ infield=1
+ field=field+1
+
+ c=c+1
+
+ return c-1
+
+
+def data_table(fn_list, datalabel, keyatom):
+# Generate and generate a data table from a list of
+# input xpk files <fn_list>. The data element reported is
+# <datalabel> and the index for the data table is by the
+# nucleus indicated by <keyatom>.
+
+ outlist=[]
+
+ [dict_list,label_line_list]=_read_dicts(fn_list,keyatom)
+
+ # Find global max and min residue numbers
+ minr=dict_list[0]["minres"]; maxr=dict_list[0]["maxres"]
+
+ for dict in dict_list:
+ if (maxr < dict["maxres"]):
+ maxr = dict["maxres"]
+ if (minr > dict["minres"]):
+ minr = dict["minres"]
+
+ res=minr
+ while res <= maxr: # s.t. res numbers
+ count=0
+ line=str(res)
+ for dict in dict_list: # s.t. dictionaries
+ label=label_line_list[count]
+ if ( dict.has_key(str(res)) ):
+ line=line+"\t"+XpkEntry(dict[str(res)][0],label).fields[datalabel]
+ else:
+ line=line+"\t"+"*"
+ count=count+1
+ line=line+"\n"
+ outlist.append(line)
+ res=res+1
+
+ return outlist
+
+def _sort_keys(dict):
+ keys=dict.keys()
+ sorted_keys=keys.sort()
+ return sorted_keys
+
+def _read_dicts(fn_list, keyatom):
+# Read multiple files into a list of residue dictionaries
+ dict_list=[]; datalabel_list=[]
+ for fn in fn_list:
+ peaklist=Peaklist(fn); dict=peaklist.residue_dict(keyatom)
+ dict_list.append(dict)
+ datalabel_list.append(peaklist.datalabels)
+
+ return [dict_list, datalabel_list]
View
@@ -11,6 +11,7 @@ Sebastian Bassi <sbassi@asalup.org>
Yves Bastide <ybastide@irisa.fr>
Yair Benita <Y.Benita@pharm.uu.nl>
Peter Bienstman <Peter.Bienstman@rug.ac.be>
+Bob Bussell <rgb2003@med.cornell.edu>
Diego Brouard <diego@conysis.com>
Hye-Shik Chang <perky@fallin.lv>
Jeffrey Chang <jchang@smi.stanford.edu>
View
@@ -0,0 +1,14 @@
+label dataset sw sf
+H1 15N2 N15
+test.nv
+{1571.86 } {1460.01 } {1460.00 }
+{599.8230 } { 60.7860 } { 60.7860 }
+ H1.L H1.P H1.W H1.B H1.E H1.J 15N2.L 15N2.P 15N2.W 15N2.B 15N2.E 15N2.J N15.L N15.P N15.W N15.B N15.E N15.J vol int stat
+0 3.hn 8.853 0.021 0.010 ++ 0.000 3.n 120.104 0.344 0.010 PP 0.000 3.n 120.117 0.344 0.010 PP 0.000 1.18200 1.18200 0
+1 4.hn 8.685 0.030 0.010 ++ 0.000 4.n 112.262 1.852 0.010 E+ 0.000 4.n 112.260 1.852 0.010 E+ 0.000 0.99960 0.99960 0
+2 5.hn 8.878 0.026 0.010 ++ 0.000 5.n 117.892 0.449 0.010 EE 0.000 5.n 117.894 0.449 0.010 EE 0.000 0.93720 0.93720 0
+3 6.hn 8.978 0.010 0.010 ++ 0.000 6.n 120.085 0.343 0.010 PP 0.000 6.n 120.101 0.343 0.010 PP 0.000 0.57500 0.57500 0
+4 7.hn 8.137 0.084 0.010 ++ 0.000 7.n 112.673 0.346 0.010 PP 0.000 7.n 112.641 0.346 0.010 PP 0.000 0.60770 0.60770 0
+5 8.hn 8.140 0.026 0.010 ++ 0.000 8.n 121.050 0.355 0.010 PP 0.000 8.n 121.050 0.355 0.010 PP 0.000 0.56960 0.56960 0
+6 9.hn 8.924 0.028 0.010 ++ 0.000 9.n 113.323 0.319 0.010 +P 0.000 9.n 113.322 0.319 0.010 +P 0.000 0.46630 0.46630 0
+8 10.hn 7.663 0.021 0.010 ++ 0.000 10.n 118.341 0.324 0.010 +E 0.000 10.n 118.476 0.324 0.010 +E 0.000 0.49840 0.49840 0
Oops, something went wrong.

0 comments on commit 7bcfbce

Please sign in to comment.