version 1.3

cran · Jul 23, 2014 · a85b6e4 · a85b6e4
1 parent 28a87d6
commit a85b6e4
Show file tree

Hide file tree

Showing 52 changed files with 3,538 additions and 1,278 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -3,18 +3,14 @@ Type: Package
 Title: Detecting, combining and filtering isotope, adduct and homologue
         series relations in high-resolution mass spectrometry (HRMS)
         data
-Version: 1.2
-Date: 2012-10-15
+Version: 1.3
+Date: 2014-07-23
 Author: Martin Loos
 Maintainer: Martin Loos <Martin.Loos@eawag.ch>
-Description: Screening a HRMS data set for peaks related by (1) isotope
-        patterns, (2) different adducts of the same molecule and/or (3)
-        homologue series. The resulting isotopic pattern and adduct
-        groups can then be combined to so-called components, with
-        homologue series information attached. Also allows plotting and
-        filtering HRMS data for mass defects, frequent m/z distances
-        and components vs. non-components.
+Description: Screening a HRMS data set for peaks related by (1) isotope patterns, (2) different adducts of the same molecule and/or (3) homologue series. The resulting isotopic pattern and adduct groups can then be combined to so-called components, with homologue series information attached. Also allows plotting and filtering HRMS data for mass defects, frequent m/z distances and components vs. non-components.
 License: GPL-2
-Packaged: 2012-10-18 09:06:40 UTC; loosmart
+Depends: enviPat(>= 1.7), nontargetData(>= 1.1), mgcv(>= 1.7-22)
+Packaged: 2014-07-23 10:40:11 UTC; uchemadmin
+NeedsCompilation: yes
 Repository: CRAN
-Date/Publication: 2012-10-18 18:11:44
+Date/Publication: 2014-07-23 13:15:40
diff --git a/MD5 b/MD5
@@ -1,45 +1,46 @@
-70cfe11b07025ff86b550a19e4e5bd2a *DESCRIPTION
-df390c53434517b304ac5db487184641 *NAMESPACE
-c6bcfb9808f58e711d2fc603fed6cbc2 *R/adduct.search.R
+4b581855f0337ef51a8d983cb03f4604 *DESCRIPTION
+b3e3ed15978dd61558086d6842cf1a23 *NAMESPACE
+a33d0fabd405d186553ed6d8760a6038 *NEWS
+776df5ea4ac41b8b1a72f9d9aa65eae8 *R/adduct.search.R
 80b9c2f147a4d9cef81491c1b9d95d25 *R/combine.R
 2053804fc57a6fdb694416d680c5bb95 *R/deter.iso.R
-41405f255745478ca4662fe774ac68d1 *R/homol.search.R
-ec8797996668be63d3a0ecfe701100ab *R/make.isos.R
+cff0eb3c38d9beb31b7c1e474a94baf0 *R/homol.search.R
+f3d6c963d9ad4e432bc21682e0a212d1 *R/make.isos.R
 e9ac33bbec7b0ca7443f65e1bb4bddbd *R/ms.filter.R
-dce6444dc8fd5cbbe466df7632e24466 *R/nontarget-internal.R
-683c241820eec5ce926ffa27d5dcaa2f *R/nontarget.R
-c3697e5e9d73666db084adbe03080d1c *R/pattern.search.R
+7dce7b1cd09d25bae22c5e1d389e0fc5 *R/nontarget.R
+2e1ff48684adc67837a05e9ea705b0f8 *R/pattern.search.R
+ae4632d4bc7d2efd6bb192f2e8c5bed1 *R/pattern.search2.R
 d58307aa7a623d10412b51fb5fd6d48d *R/plotadduct.R
 94e2bf21892c52a1dc329995e852eeb5 *R/plotall.R
 faca91d002c38892c60e9132a3cbe349 *R/plotcomp.R
-384b902494f176e0c2a6446679cf7b5e *R/plotdefect.R
+c3842b0e1161ff48c0e6b4d2b7b2eef3 *R/plotdefect.R
 ad1c6ab0d4736672fca83ff8722eded0 *R/plotdiff.R
-015326b4fb3fda5e7eb1d18254e40c42 *R/plotgroup.R
-25f4f6d55fb82be99910b05e111a9b8b *R/plothomol.R
+17178087618d84df22f43e14d9020b6e *R/plotgroup.R
+56ae814c20986784a8e5ced14e584053 *R/plothomol.R
 01bdb350695c3f910be559128accf021 *R/plotisotopes.R
 aea23e170739fce51498cf0b53f3c707 *R/rm.sat.R
-f7f31b70e62c8967b83b8c28ed760ca2 *data/adduct_list.rda
-181d9ec48728e569c9b3692ca5bcca57 *data/iso_list.rda
 e6f1efb5ed75468cc251f234d2b663a1 *data/peaklist.rda
-ef490821f3aec95909a5c6ef7616ebb8 *man/adduct.search.Rd
-52e6e905c3e6d811f87a5ce8e3e2c97d *man/adduct_list.Rd
-be2f4fe791c1d1f241c195aed02e2eb0 *man/combine.Rd
-e9e7afc1bc2b2cf3cdbfa6e94ea3c144 *man/deter.iso.Rd
-fc2490de1e0f997fcec81e7bbecdabf2 *man/homol.search.Rd
-8370c4f2a5f451313eec8bec238fd91a *man/iso_list.Rd
-b7e423b815e5feee1c0735d4a7a259c0 *man/make.isos.Rd
-e59139a02e75467c10c14eb3b4118555 *man/ms.filter.Rd
-7ee636f3ed4d32eff5361863d80d7379 *man/nontarget-package.Rd
-921308307d868203fa1c803b44e37377 *man/pattern.search.Rd
+e973c2f1adaf26bc5a742809a04b6ede *man/adduct.search.Rd
+f22c96186ea98fe0eb25be65269dbb97 *man/combine.Rd
+88e8e4f0a8c97681ad947349f7d684e0 *man/deter.iso.Rd
+352c56ddca5c875775e84cbe9a86afea *man/homol.search.Rd
+e7232f60d6e2f7325653ad7980f15f15 *man/make.isos.Rd
+1dcbe4bbafdce4960e57c1d87647089c *man/ms.filter.Rd
+b68b7b152f39b2e6a4d182867b86ecd6 *man/nontarget-package.Rd
+3908a96bc7952343af381cb70a0975d0 *man/pattern.search.Rd
+9d3e0cbc2bf80f28cf40c5083889c10f *man/pattern.search2.Rd
 98afa2cfbc0238d2fc169c16f7288f81 *man/peaklist.Rd
 e1008b7237691b69e5d11034e2883f3f *man/plotadduct.Rd
 7d7547f0028451045db1143b07ceb503 *man/plotall.Rd
 db80295da663f0570824baad81f5c229 *man/plotcomp.Rd
-0b51997f72774d7028f231dfc70a3fa7 *man/plotdefect.Rd
-5003f21330f0409ed9c9b870213748cc *man/plotdiff.Rd
-59de92488ced0ac8dd52920250910560 *man/plotgroup.Rd
-6803ec3c28ec363cb8031f33677220fb *man/plothomol.Rd
+3706d5485ff21072e82360b49cac144d *man/plotdefect.Rd
+d901b5237252096dc1fbd3a2e4f38408 *man/plotdiff.Rd
+0595e639c597ca8cf33869d80833ec8c *man/plotgroup.Rd
+b139c1ffd8a08c61b750faececce91f4 *man/plothomol.Rd
 bc3f4eff3a70a155d144a38e0054b385 *man/plotisotopes.Rd
-9e5ecb95fef23842f9fbced149ff9354 *man/rm.sat.Rd
-7c88148ffd26c3aae6db0fb071410960 *src/adductCpp.cpp
-d36e4f6e765c9f6905730995141d01a4 *src/massCpp.cpp
+4d0ac0448b61ca0ee8b5d1868befb46a *man/rm.sat.Rd
+dc7a5173afbaaf36888f2f4ce17fdadb *src/adductCpp.cpp
+0438bad0fc74c7a44b815690433ae25e *src/homologues.cpp
+35b326b0393ca3bb794c07c68b263086 *src/massCpp.cpp
+6c112f852affd92c0d3801dca797538f *src/quantization.cpp
+ef0b6a860b904cd6f3e9816c50965d55 *src/searchTree.cpp
diff --git a/NAMESPACE b/NAMESPACE
@@ -1 +1,5 @@
-exportPattern("^[[:alpha:]]+")
+exportPattern(adduct.search,combine,make.isos,deter.iso,homol.search,ms.filter,nontarget,pattern.search,pattern.search2,plotadduct,plotall,plotcomp,plotdefect,plotdiff,plotgroup,plothomol,plotisotopes,rm.sat)
+useDynLib(nontarget,adduct,mass,homol_1,metagroup,peak_search,boxtree,kdtree4,search_boxtree,search_kdtree)
+importFrom("enviPat")
+importFrom("nontargetData")
+importFrom("mgcv")
diff --git a/NEWS b/NEWS
@@ -0,0 +1,7 @@
+nontarget News
+
+CHANGES in version 1.3:
+
+	(1) src, R, man - several bugs debugged
+	(2) R - pattern.search2 introduced
+	(3) R - homol.search upgraded
diff --git a/R/adduct.search.R b/R/adduct.search.R
@@ -1,17 +1,22 @@
 adduct.search <-
 function(
-                    peaklist,adduct_list,
-                    rttol=0,mztol=2,massfrac=0.1,ppm=TRUE,
-                    adducts=c("M+H","M+K","M+Na"),ion_mode="positive",
+                    peaklist,
+					adducts,
+                    rttol=0,
+					mztol=2,
+					massfrac=0.1,
+					ppm=TRUE,
+                    use_adducts=c("M+H","M+K","M+Na"),
+					ion_mode="positive",
                     entry=20
-                    ){
+        ){
 
     ############################################################################
     # (0) check inputs #########################################################
-    if(massfrac>1 || massfrac<=0){ stop("massfrac must be >0 and <=1") };
+    if( massfrac>1 || massfrac<=0 ){ stop("massfrac must be >0 and <=1") };
     if(ion_mode!="positive" & ion_mode!="negative"){stop("ion mode: positive or negative?")}
-    for(i in 1:length(adducts)){if(any(adduct_list[,1]==adducts[i])!=TRUE &  any(adduct_list[adduct_list[,1]==adducts[i],6]==ion_mode)){stop(paste("Adduct ",adducts[i]," not in adduct_list!",sep=""))}};
-    for(i in 1:length(adducts)){if((adduct_list[adduct_list[,1]==adducts[i],6]!=ion_mode)){stop(paste(adducts[i]," not in ion mode ",ion_mode,sep=""))}};
+    for(i in 1:length(use_adducts)){if(any(adducts[,1]==use_adducts[i])!=TRUE &  any(adducts[adducts[,1]==use_adducts[i],6]==ion_mode)){stop(paste("Adduct ",use_adducts[i]," not in adducts!",sep=""))}};
+    for(i in 1:length(use_adducts)){if((adducts[adducts[,1]==use_adducts[i],6]!=ion_mode)){stop(paste(use_adducts[i]," not in ion mode ",ion_mode,sep=""))}};
     if(length(peaklist)>3){stop("peaklist with > 3 columns not allowed")}
     ############################################################################
     cat("\n (1) Assemble lists...");
@@ -25,29 +30,29 @@ function(
     getit2<-rep("0",alls);        # (2) from which peak?
     getit4<-rep("0",alls);        # (3) to which peak?
     getit5<-rep("0",alls);        # (4) within [1] large or [2] small mass tolerance?
-    # (1.3) retrieve selected subset of adduct_list
-    adducts<-as.character(levels(as.factor(adducts)))
-    these<-match(adducts,adduct_list[,1]);
+    # (1.3) retrieve selected subset of adducts
+    use_adducts<-as.character(levels(as.factor(use_adducts)))
+    these<-match(use_adducts,adducts[,1]);
     these<-these[is.na(these)==FALSE];
-    add<-adduct_list[these,];
+    add<-adducts[these,];
     add<-add[add[,6]==ion_mode,];
-    if(length(add[,1])<1){stop("No selected adducts in list!")};
+    if(length(add[,1])<1){stop("No selected use_adducts among adducts ... abort!")};
     add2<-data.frame(0,0,0,0,0);
     add3<-c();
     names(add2)<-c("mult1","mass1","mult2","mass2","count")
     that<-c(2);
     for(i in 1:(length(add[,1]))){
-      for(j in 1:length(add[,1])){
-          if(i!=j){
-          add2<-rbind(add2,rep(0,5));
-          add2[that,1]<-add[i,4];
-          add2[that,2]<-add[i,5];
-          add2[that,3]<-add[j,4];
-          add2[that,4]<-add[j,5];
-          that<-c(that+1);
-          add3<-c(add3,paste(add[i,1],add[j,1],sep="<->"));
-          }
-       }; #j
+		for(j in 1:length(add[,1])){
+			if(i!=j){
+				add2<-rbind(add2,rep(0,5));
+				add2[that,1]<-add[i,4];
+				add2[that,2]<-add[i,5];
+				add2[that,3]<-add[j,4];
+				add2[that,4]<-add[j,5];
+				that<-c(that+1);
+				add3<-c(add3,paste(add[i,1],add[j,1],sep="<->"));
+			}
+		}; #j
     }; #i
     add2<-add2[-1,];
     #data.frame(add3,add2);
@@ -63,46 +68,83 @@ function(
     getit4a<-rep(0,alls*entry);
     getit5a<-rep(0,alls*entry);
     maxmass<-max(peaklist[,1]);
-    #dyn.load("C:\\Program Files\\R\\R-2.13.1\\bin\\i386\\adductCpp.dll");
-    #dyn.load(paste(.libPaths(),"/nontarget/temp/adductCpp.dll",sep=""));
     result<-.C("adduct",
-      as.double(samples[,1]),as.double(samples[,3]),as.integer(length(samples[,1])),  # 3
-      as.double(mztol*2),as.double(massfrac*2),as.double(rttol),    # 6
-      as.integer(length(add2[,1])),                                                               # 7
-      as.double(add2[,1]),as.double(add2[,2]),as.double(add2[,3]),as.double(add2[,4]),as.integer(add2[,5]),    # 12
-      as.integer(entry),as.integer(ppm2),                                       # 14
-      as.integer(getit1a),as.integer(getit2a),as.integer(getit4a),as.integer(getit5a) # 18
-      ,PACKAGE="nontarget"
+		as.double(samples[,1]),
+		as.double(samples[,3]),
+		as.integer(length(samples[,1])),  	# 3
+		as.double(mztol*2),
+		as.double(massfrac*2),
+		as.double(rttol),    				# 6
+		as.integer(length(add2[,1])),       # 7
+		as.double(add2[,1]),
+		as.double(add2[,2]),
+		as.double(add2[,3]),
+		as.double(add2[,4]),
+		as.integer(add2[,5]),    			# 12
+		as.integer(entry),
+		as.integer(ppm2),                   # 14
+		as.integer(getit1a),
+		as.integer(getit2a),
+		as.integer(getit4a),
+		as.integer(getit5a), 				# 18
+		PACKAGE="nontarget"
     );
     # (1) which adduct?
-    for(i in 1:(alls-1)){for(j in 1:entry){if(result[15][[1]][(i-1)*entry+j]!=0){getit1[i]<-paste(getit1[i],result[15][[1]][(i-1)*entry+j],sep="/")}}};
+    for(i in 1:(alls-1)){
+		for(j in 1:entry){
+			if(result[15][[1]][(i-1)*entry+j]!=0){
+				getit1[i]<-paste(getit1[i],result[15][[1]][(i-1)*entry+j],sep="/")
+			}
+		}
+	};
     # (2) from which peak?
-    for(i in 1:(alls-1)){for(j in 1:entry){if(result[16][[1]][(i-1)*entry+j]!=0){getit2[i]<-paste(getit2[i],result[16][[1]][(i-1)*entry+j],sep="/")}}};
+    for(i in 1:(alls-1)){
+		for(j in 1:entry){
+			if(result[16][[1]][(i-1)*entry+j]!=0){
+				getit2[i]<-paste(getit2[i],getback[result[16][[1]][(i-1)*entry+j]],sep="/")
+			}
+		}
+	};
     # (3) to which peak?
-    for(i in 1:(alls-1)){for(j in 1:entry){if(result[17][[1]][(i-1)*entry+j]!=0){getit4[i]<-paste(getit4[i],result[17][[1]][(i-1)*entry+j],sep="/")}}};
+    for(i in 1:(alls-1)){
+		for(j in 1:entry){
+			if(result[17][[1]][(i-1)*entry+j]!=0){
+				getit4[i]<-paste(getit4[i],getback[result[17][[1]][(i-1)*entry+j]],sep="/")
+			}
+		}
+	};
     # (4) tolerance: small or large?
-    for(i in 1:(alls-1)){for(j in 1:entry){
-      if(result[18][[1]][(i-1)*entry+j]==1){getit5[i]<-paste(getit5[i],"small",sep="/")};
-      if(result[18][[1]][(i-1)*entry+j]==2){getit5[i]<-paste(getit5[i],"large",sep="/")};
-    }};
-    if(result[13][[1]]!=entry){cat("WARNING: entry overflow -> links missing!")};
+    for(i in 1:(alls-1)){
+		for(j in 1:entry){
+			if(result[18][[1]][(i-1)*entry+j]==1){
+				getit5[i]<-paste(getit5[i],"small",sep="/")
+			};
+			if(result[18][[1]][(i-1)*entry+j]==2){
+				getit5[i]<-paste(getit5[i],"large",sep="/")
+			};
+		}
+	};
+    if(result[13][[1]]!=entry){
+		cat("WARNING: entry overflow -> links missing!")
+	};
     #data.frame(ID,getit4,getit2,getit1,getit5)
     rm(result);
-    #dyn.unload(paste(.libPaths(),"/nontarget/temp/adductCpp.dll",sep=""));
     ############################################################################
 
     ############################################################################
-    # correct outputs for missing adduct combis (only submatrix searched!)
+    # correct outputs for missing adduct combis (only submatrix searched!) #####
     for(i in 1:alls){
-      if(getit1[i]!="none"){
-        this12<-as.numeric(strsplit(as.character(getit1[i]),"/")[[1]][-1]);
-        if(length(this12)==1){
-          getit1[i]<-paste("none//",add3[this12],"//",sep="");
-        }else{
-          getit1[i]<-paste("none//",add3[this12[1]],"//",sep="");
-          for(j in 2:length(this12)){getit1[i]<-paste(getit1[i],add3[this12[j]],"//",sep="");}
-        };
-      };
+		if(getit1[i]!="none"){
+			this12<-as.numeric(strsplit(as.character(getit1[i]),"/")[[1]][-1]);
+			if(length(this12)==1){
+				getit1[i]<-paste("none//",add3[this12],"//",sep="");
+			}else{
+				getit1[i]<-paste("none//",add3[this12[1]],"//",sep="");
+				for(j in 2:length(this12)){
+					getit1[i]<-paste(getit1[i],add3[this12[j]],"//",sep="");
+				}
+			};
+		};
     };
     for(i in 1:alls){
       if(getit4[i]!="0"){
@@ -116,20 +158,20 @@ function(
         for(j in 1:length(this2a)){
               this2<-c(this2,strsplit(as.character(this2a[j]),"<->")[[1]][1]);
               this3<-c(this3,strsplit(as.character(this2a[j]),"<->")[[1]][2]);
-              };
+        };
         for(j in 1:length(this1)){
-          this10<-strsplit(as.character(getit4[as.numeric(this1[j])]),"/")[[1]]
-          this11<-strsplit(as.character(getit1[as.numeric(this1[j])]),"//")[[1]]
-          if((any(this10==as.character(i)) & any(this11==paste(this3[j],"<->",this2[j],sep=""))) == FALSE ){
-               getit4[as.numeric(this1[j])]<-paste(getit4[as.numeric(this1[j])],i,sep="/");
-               if(getit1[as.numeric(this1[j])]=="none"){
-                getit1[as.numeric(this1[j])]="none//"
-                getit1[as.numeric(this1[j])]<-paste(getit1[as.numeric(this1[j])],paste(this3[j],"<->",this2[j],"//",sep=""),sep="");
-               }else{
-                getit1[as.numeric(this1[j])]<-paste(getit1[as.numeric(this1[j])],paste(this3[j],"<->",this2[j],"//",sep=""),sep="");
-               }
-               getit5[as.numeric(this1[j])]<-paste(getit5[as.numeric(this1[j])],this5[j],sep="/");
-          };
+			this10<-strsplit(as.character(getit4[as.numeric(this1[j])]),"/")[[1]]
+			this11<-strsplit(as.character(getit1[as.numeric(this1[j])]),"//")[[1]]
+			if((any(this10==as.character(i)) & any(this11==paste(this3[j],"<->",this2[j],sep=""))) == FALSE ){
+				getit4[as.numeric(this1[j])]<-paste(getit4[as.numeric(this1[j])],i,sep="/");
+				if(getit1[as.numeric(this1[j])]=="none"){
+					getit1[as.numeric(this1[j])]="none//"
+					getit1[as.numeric(this1[j])]<-paste(getit1[as.numeric(this1[j])],paste(this3[j],"<->",this2[j],"//",sep=""),sep="");
+				}else{
+					getit1[as.numeric(this1[j])]<-paste(getit1[as.numeric(this1[j])],paste(this3[j],"<->",this2[j],"//",sep=""),sep="");
+				}
+				getit5[as.numeric(this1[j])]<-paste(getit5[as.numeric(this1[j])],this5[j],sep="/");
+			};
         };
       };
     };
@@ -139,11 +181,11 @@ function(
 
     ############################################################################
     # (4) group ################################################################
-    cat("\n (3) Group peaks...");
+    cat("\n (3) Group peaks ...");
     ############################################################################
     group1<-c(); # groupnumber?
     group2<-c(); # which peaks?
-    group3<-c(); # which adducts?
+    group3<-c(); # which use_adducts?
     group4<-rep(0,alls); # groupnumber? 1-alls
     groupnumber<-c(1);
     getit1b<-getit1;
@@ -154,9 +196,9 @@ function(
         this2a<-strsplit(as.character(getit1b[i]),"//")[[1]][-1];
         this2<-c();this3<-c();
         for(j in 1:length(this2a)){
-              this2<-c(this2,strsplit(as.character(this2a[j]),"<->")[[1]][1]);
-              this3<-c(this3,strsplit(as.character(this2a[j]),"<->")[[1]][2]);
-              };
+            this2<-c(this2,strsplit(as.character(this2a[j]),"<->")[[1]][1]);
+            this3<-c(this3,strsplit(as.character(this2a[j]),"<->")[[1]][2]);
+        };
         this4<-levels(as.factor(this2));
         for(j in 1:length(this4)){
           # assemble group information  ########################################
@@ -227,7 +269,7 @@ function(
     };
     ############################################################################
     # count hits !
-    hits<-data.frame(adducts,rep(0,length(adducts)));
+    hits<-data.frame(use_adducts,rep(0,length(use_adducts)));
     names(hits)<-c("names","counts");
     for(i in 1:length(group1)){
       this1<-strsplit(as.character(group3[i]),"/")[[1]];
@@ -245,12 +287,12 @@ function(
         group1[k]<-paste("/",group1[k],"/",sep="")
     }
     grouping<-data.frame(group1,group2,group3);
-    names(grouping)<-c("group ID","peak IDs","adducts");
+    names(grouping)<-c("group ID","peak IDs","use_adducts");
     ############################################################################
-    adducts<-data.frame(samples[,1:3],ID,group4,getit4,getit1,getit5);
-    names(adducts)<-c(names(samples)[1:3],"peak ID","group ID","to ID","adduct(s)","mass tolerance");
-    adduct<-list(adducts,parameters,grouping,hits,overlaps);
-    names(adduct)<-c("Adducts","Parameters","Peaks in adduct groups","Number of adducts","Number of peaks with grouped adducts overlapping");
+    list_adducts<-data.frame(peaklist[,1:3],ID,group4,getit4,getit1,getit5);
+    names(list_adducts)<-c(names(peaklist)[1:3],"peak ID","group ID","to ID","adduct(s)","mass tolerance");
+    adduct<-list(list_adducts,parameters,grouping,hits,overlaps);
+    names(adduct)<-c("adducts","Parameters","Peaks in adduct groups","Adduct counts","Number of peaks with grouped adducts overlapping");
     cat("done.\n\n");
     ############################################################################
 

diff --git a/R/combine.R b/R/combine.R
diff --git a/R/deter.iso.R b/R/deter.iso.R