diff --git a/DESCRIPTION b/DESCRIPTION new file mode 100755 index 0000000..53ef924 --- /dev/null +++ b/DESCRIPTION @@ -0,0 +1,13 @@ +Package: Kpart +Type: Package +Title: Spline Fitting +Version: 1.0 +Date: 2012-08-02 +Author: Eric Golinko +Depends: leaps +Maintainer: +Description: Kpart spline fitting +License: GPL (>= 2) +Packaged: 2012-08-06 12:20:32 UTC; tf2 +Repository: CRAN +Date/Publication: 2012-08-06 18:42:00 diff --git a/MD5 b/MD5 new file mode 100644 index 0000000..8ad88c4 --- /dev/null +++ b/MD5 @@ -0,0 +1,12 @@ +ae77ff834325c20eea4c679fd78a9b20 *DESCRIPTION +682aa6143db5599c48a55c2c627a6f6d *NAMESPACE +77f662926661ebeff90a3a1b413c7051 *R/Kpart.example.R +54417a55a4b1fcf5648257776d504863 *R/Kpart.knots.R +c11c45b66b5d93baaf3ba360327caba3 *R/Kpart.plot.R +24632d59a6ce136f0cb6360d6e25dd29 *R/lm.Kpart.R +9489e67ee4ef536779777b2ab4f2aecd *inst/extdata/example_data.txt +7964959f7a09cc72b6bd015cba55342c *man/Kpart-package.Rd +95f21b8d4f078bab3a39d96a1cc4cecf *man/Kpart.example.Rd +2ddd746c19ad688a680126ca1c47bf8a *man/Kpart.knots.Rd +2fd46e3f24d575946304fa11aa0de26a *man/Kpart.plot.Rd +7b95d60a5b2862bd5d04d562695f8227 *man/lm.Kpart.Rd diff --git a/NAMESPACE b/NAMESPACE new file mode 100755 index 0000000..c107872 --- /dev/null +++ b/NAMESPACE @@ -0,0 +1,7 @@ +export( +Kpart.knots, +lm.Kpart, +Kpart.plot, +Kpart.example +) +exportPattern("^[[:alpha:]]+") diff --git a/R/Kpart.example.R b/R/Kpart.example.R new file mode 100755 index 0000000..d9bfca8 --- /dev/null +++ b/R/Kpart.example.R @@ -0,0 +1,103 @@ +Kpart.example <- +function(data=NULL,K=4) +{ + + if(is.null(data)) { + p = paste(system.file("extdata", package="Kpart"), "/example_data.txt", sep="") + data = read.table(p, header=T) + } + + #variable assignment + + x=data[,2] + x.2=x^2 + x.3=x^3 + y=data[,1] + L=floor(length(y)/K) + + #Min/Max algorithm to find absolute maximum deviate in the kth partition + + m=matrix(nrow=K,ncol=1) + for (k in 1:K) m[k]=abs(max(y[(L*(k-1)+1):(k*L)])-mean(y[(L*(k-1)+1):(k*L)])) + + W=matrix(nrow=L, ncol=K) + for (l in 1:L) for (k in 1:K) W[l,k]=ifelse(m[k] != abs(y[(L*(k-1)+l)]-mean(y[(L*(k-1)+1):(k*L)])), 0, x[(L*(k-1)+l)]) + pops=colSums(W) + + #matrix of potential spline knots + + TT=matrix(nrow=length(x), ncol=K) + for (i in 1:length(x)) for (k in 1:K) TT[i,k]=(ifelse(0 > (x[i]-pops[k])^3 , 0 ,(x[i]-pops[k])^3)) + bigT=data.frame(TT) + for(i in 1:length(pops)) + colnames(bigT)[i]=paste("X",as.character(pops[i]),sep="") + + #model selection process + + X=cbind(x,x.2,x.3,bigT) + r=regsubsets(X,y,,method="exhaustive",nbest=1,nmax=(K+4)) + a=(summary(r)[c("bic")]) + AA=data.frame(a) + V=cbind(1:length(t(AA)),AA) + aa=matrix(nrow=length(t(AA)),ncol=1) + for (i in 1:length(t(AA))) aa[i,1]=ifelse(min(V[,2])==V[i,2],V[i,1],0) + aaa=data.frame(aa) + aaaa=colSums(aaa) + c=coef(r,id=aaaa) + cc=data.frame(c) + C=matrix(nrow=2,ncol=length(c)) + C[2,]=cc[,1] + C[1,]=names(c) + + #matrix of spline knots + + t=matrix(nrow=length(C[1,]),ncol=length(names(bigT))) + for (i in 1:length(C[1,])) + for (j in 1:length(names(bigT))) + t[i,j]=(ifelse(C[1,i]==(names(bigT)[j]),(names(bigT)[j]),NA)) + tt=na.omit(c(t)) + + P=matrix(nrow=K, ncol=1) + for (i in 1:length(tt)) P[i]=ifelse(tt[i]=="NA", NA, paste("bigT",tt[i], sep="$")) + PP=na.omit(as.character(P)) + + #matrix of x,x^2, and x^3 terms + + qq=matrix(nrow=3,ncol=3) + for (i in 2:4) qq[(i-1),1]=ifelse(C[1,i]=="x","x",NA) + for (i in 2:4) qq[(i-1),2]=ifelse(C[1,i]=="x.2","x.2",NA) + for (i in 2:4) qq[(i-1),3]=ifelse(C[1,i]=="x.3","x.3",NA) + QQ=na.omit(as.character(qq)) + Q=c(QQ) + + EE=c(Q,PP) + E=paste(EE,collapse="+") + + #paste of terms to be estimated by lm() + + FF=as.formula(paste("y~ ", E)) + f=lm(FF) + + #plot of x and y axis and fitted values with vertical lines at knots + + plot(x,y,xlab="your_x",ylab="your_y") + lines(x,f$fitted.values, col="red") + + lineyears=matrix(nrow=length(PP),ncol=length(colSums(W))) + for(i in 1:length(PP)) + for(j in 1:length(colSums(W))) + lineyears[i,j]=ifelse(data.frame(strsplit(PP[i],"X"))[2,1]==colSums(W)[j],colSums(W)[j],0) + + spline.knots=colSums(lineyears)[which(colSums(lineyears)!=0)] + + par(new=TRUE) + for (i in 1:length(spline.knots)) + abline(v=spline.knots[i], lty=2) + #prints spline knots used in model + + print(spline.knots) + #returns linear model which summary(lm(.)) can be used + + return(f) + +} diff --git a/R/Kpart.knots.R b/R/Kpart.knots.R new file mode 100755 index 0000000..d12f433 --- /dev/null +++ b/R/Kpart.knots.R @@ -0,0 +1,81 @@ +Kpart.knots <- +function(data,K) +{ + + #library(leaps) + + #variable assignment + x=data[,2] + x.2=x^2 + x.3=x^3 + y=data[,1] + L=floor(length(y)/K) + + #Min/Max algorithm to find absolute maximum deviate in the kth partition + m=matrix(nrow=K,ncol=1) + for (k in 1:K) m[k]=abs(max(y[(L*(k-1)+1):(k*L)])-mean(y[(L*(k-1)+1):(k*L)])) + + W=matrix(nrow=L, ncol=K) + for (l in 1:L) for (k in 1:K) W[l,k]=ifelse(m[k] != abs(y[(L*(k-1)+l)]-mean(y[(L*(k-1)+1):(k*L)])), 0, x[(L*(k-1)+l)]) + pops=colSums(W) + + #matrix of potential spline knots + TT=matrix(nrow=length(x), ncol=K) + for (i in 1:length(x)) for (k in 1:K) TT[i,k]=(ifelse(0 > (x[i]-pops[k])^3 , 0 ,(x[i]-pops[k])^3)) + bigT=data.frame(TT) + for(i in 1:length(pops)) + colnames(bigT)[i]=paste("X",as.character(pops[i]),sep="") + + #model selection process + + X=cbind(x,x.2,x.3,bigT) + r=regsubsets(X,y,,method="exhaustive",nbest=1,nmax=(K+4)) + a=(summary(r)[c("bic")]) + AA=data.frame(a) + V=cbind(1:length(t(AA)),AA) + aa=matrix(nrow=length(t(AA)),ncol=1) + for (i in 1:length(t(AA))) aa[i,1]=ifelse(min(V[,2])==V[i,2],V[i,1],0) + aaa=data.frame(aa) + aaaa=colSums(aaa) + c=coef(r,id=aaaa) + cc=data.frame(c) + C=matrix(nrow=2,ncol=length(c)) + C[2,]=cc[,1] + C[1,]=names(c) + + #matrix of spline knots + + t=matrix(nrow=length(C[1,]),ncol=length(names(bigT))) + for (i in 1:length(C[1,])) + for (j in 1:length(names(bigT))) + t[i,j]=(ifelse(C[1,i]==(names(bigT)[j]),(names(bigT)[j]),NA)) + tt=na.omit(c(t)) + + P=matrix(nrow=K, ncol=1) + for (i in 1:length(tt)) P[i]=ifelse(tt[i]=="NA", NA, paste("bigT",tt[i], sep="$")) + PP=na.omit(as.character(P)) + + #matrix of x,x^2, and x^3 terms + + qq=matrix(nrow=3,ncol=3) + for (i in 2:4) qq[(i-1),1]=ifelse(C[1,i]=="x","x",NA) + for (i in 2:4) qq[(i-1),2]=ifelse(C[1,i]=="x.2","x.2",NA) + for (i in 2:4) qq[(i-1),3]=ifelse(C[1,i]=="x.3","x.3",NA) + QQ=na.omit(as.character(qq)) + Q=c(QQ) + + EE=c(Q,PP) + E=paste(EE,collapse="+") + + #paste of terms to be estimated by lm() + + FF=as.formula(paste("y~ ", E)) + f=lm(FF) + #returns spline knots used in model + lineyears=matrix(nrow=length(PP),ncol=length(colSums(W))) + for(i in 1:length(PP)) + for(j in 1:length(colSums(W))) + lineyears[i,j]=ifelse(data.frame(strsplit(PP[i],"X"))[2,1]==colSums(W)[j],colSums(W)[j],0) + spline.knots=colSums(lineyears)[which(colSums(lineyears)!=0)] + return(spline.knots) +} diff --git a/R/Kpart.plot.R b/R/Kpart.plot.R new file mode 100755 index 0000000..5964a84 --- /dev/null +++ b/R/Kpart.plot.R @@ -0,0 +1,93 @@ +Kpart.plot <- +function(data,K) +{ + + #library(leaps) + + #variable assignment + + x=data[,2] + x.2=x^2 + x.3=x^3 + y=data[,1] + L=floor(length(y)/K) + + #Min/Max algorithm to find absolute maximum deviate in the kth partition + + m=matrix(nrow=K,ncol=1) + for (k in 1:K) m[k]=abs(max(y[(L*(k-1)+1):(k*L)])-mean(y[(L*(k-1)+1):(k*L)])) + + W=matrix(nrow=L, ncol=K) + for (l in 1:L) for (k in 1:K) W[l,k]=ifelse(m[k] != abs(y[(L*(k-1)+l)]-mean(y[(L*(k-1)+1):(k*L)])), 0, x[(L*(k-1)+l)]) + pops=colSums(W) + + #matrix of potential spline knots + + TT=matrix(nrow=length(x), ncol=K) + for (i in 1:length(x)) for (k in 1:K) TT[i,k]=(ifelse(0 > (x[i]-pops[k])^3 , 0 ,(x[i]-pops[k])^3)) + bigT=data.frame(TT) + for(i in 1:length(pops)) + colnames(bigT)[i]=paste("X",as.character(pops[i]),sep="") + + #model selection process + + X=cbind(x,x.2,x.3,bigT) + r=regsubsets(X,y,,method="exhaustive",nbest=1,nmax=(K+4)) + a=(summary(r)[c("bic")]) + AA=data.frame(a) + V=cbind(1:length(t(AA)),AA) + aa=matrix(nrow=length(t(AA)),ncol=1) + for (i in 1:length(t(AA))) aa[i,1]=ifelse(min(V[,2])==V[i,2],V[i,1],0) + aaa=data.frame(aa) + aaaa=colSums(aaa) + c=coef(r,id=aaaa) + cc=data.frame(c) + C=matrix(nrow=2,ncol=length(c)) + C[2,]=cc[,1] + C[1,]=names(c) + + #matrix of spline knots + + t=matrix(nrow=length(C[1,]),ncol=length(names(bigT))) + for (i in 1:length(C[1,])) + for (j in 1:length(names(bigT))) + t[i,j]=(ifelse(C[1,i]==(names(bigT)[j]),(names(bigT)[j]),NA)) + tt=na.omit(c(t)) + + P=matrix(nrow=K, ncol=1) + for (i in 1:length(tt)) P[i]=ifelse(tt[i]=="NA", NA, paste("bigT",tt[i], sep="$")) + PP=na.omit(as.character(P)) + + #matrix of x,x^2, and x^3 terms + + qq=matrix(nrow=3,ncol=3) + for (i in 2:4) qq[(i-1),1]=ifelse(C[1,i]=="x","x",NA) + for (i in 2:4) qq[(i-1),2]=ifelse(C[1,i]=="x.2","x.2",NA) + for (i in 2:4) qq[(i-1),3]=ifelse(C[1,i]=="x.3","x.3",NA) + QQ=na.omit(as.character(qq)) + Q=c(QQ) + + EE=c(Q,PP) + E=paste(EE,collapse="+") + + #paste of terms to be estimated by lm() + + FF=as.formula(paste("y~ ", E)) + f=lm(FF) + + #plot of x and y axis and fitted values with vertical lines at knots + + plot(x,y,xlab="your_x",ylab="your_y") + lines(x,f$fitted.values, col="red") + + lineyears=matrix(nrow=length(PP),ncol=length(colSums(W))) + for(i in 1:length(PP)) + for(j in 1:length(colSums(W))) + lineyears[i,j]=ifelse(data.frame(strsplit(PP[i],"X"))[2,1]==colSums(W)[j],colSums(W)[j],0) + + spline.knots=colSums(lineyears)[which(colSums(lineyears)!=0)] + + par(new=TRUE) + for (i in 1:length(spline.knots)) + abline(v=spline.knots[i], lty=2) +} diff --git a/R/lm.Kpart.R b/R/lm.Kpart.R new file mode 100755 index 0000000..f33840f --- /dev/null +++ b/R/lm.Kpart.R @@ -0,0 +1,80 @@ +lm.Kpart <- +function(data,K) +{ + + #library(leaps) + + #variable assignment + + x=data[,2] + x.2=x^2 + x.3=x^3 + y=data[,1] + L=floor(length(y)/K) + + #Min/Max algorithm to find absolute maximum deviate in the kth partition + + m=matrix(nrow=K,ncol=1) + for (k in 1:K) m[k]=abs(max(y[(L*(k-1)+1):(k*L)])-mean(y[(L*(k-1)+1):(k*L)])) + + W=matrix(nrow=L, ncol=K) + for (l in 1:L) for (k in 1:K) W[l,k]=ifelse(m[k] != abs(y[(L*(k-1)+l)]-mean(y[(L*(k-1)+1):(k*L)])), 0, x[(L*(k-1)+l)]) + pops=colSums(W) + + #matrix of potential spline knots + + TT=matrix(nrow=length(x), ncol=K) + for (i in 1:length(x)) for (k in 1:K) TT[i,k]=(ifelse(0 > (x[i]-pops[k])^3 , 0 ,(x[i]-pops[k])^3)) + bigT=data.frame(TT) + for(i in 1:length(pops)) + colnames(bigT)[i]=paste("X",as.character(pops[i]),sep="") + + #model selection process + + X=cbind(x,x.2,x.3,bigT) + r=regsubsets(X,y,,method="exhaustive",nbest=1,nmax=(K+4)) + a=(summary(r)[c("bic")]) + AA=data.frame(a) + V=cbind(1:length(t(AA)),AA) + aa=matrix(nrow=length(t(AA)),ncol=1) + for (i in 1:length(t(AA))) aa[i,1]=ifelse(min(V[,2])==V[i,2],V[i,1],0) + aaa=data.frame(aa) + aaaa=colSums(aaa) + c=coef(r,id=aaaa) + cc=data.frame(c) + C=matrix(nrow=2,ncol=length(c)) + C[2,]=cc[,1] + C[1,]=names(c) + + #matrix of spline knots + + t=matrix(nrow=length(C[1,]),ncol=length(names(bigT))) + for (i in 1:length(C[1,])) + for (j in 1:length(names(bigT))) + t[i,j]=(ifelse(C[1,i]==(names(bigT)[j]),(names(bigT)[j]),NA)) + tt=na.omit(c(t)) + + P=matrix(nrow=K, ncol=1) + for (i in 1:length(tt)) P[i]=ifelse(tt[i]=="NA", NA, paste("bigT",tt[i], sep="$")) + PP=na.omit(as.character(P)) + + #matrix of x,x^2, and x^3 terms + + qq=matrix(nrow=3,ncol=3) + for (i in 2:4) qq[(i-1),1]=ifelse(C[1,i]=="x","x",NA) + for (i in 2:4) qq[(i-1),2]=ifelse(C[1,i]=="x.2","x.2",NA) + for (i in 2:4) qq[(i-1),3]=ifelse(C[1,i]=="x.3","x.3",NA) + QQ=na.omit(as.character(qq)) + Q=c(QQ) + + EE=c(Q,PP) + E=paste(EE,collapse="+") + + #paste of terms to be estimated by lm() + + FF=as.formula(paste("y~ ", E)) + f=lm(FF) + #returns linear model which summary(lm(.)) can be used + + return(f) +} diff --git a/inst/extdata/example_data.txt b/inst/extdata/example_data.txt new file mode 100755 index 0000000..75af4f8 --- /dev/null +++ b/inst/extdata/example_data.txt @@ -0,0 +1,52 @@ +rate year +0.0018622 1960 +0.0016779 1961 +0.0015898 1962 +0.0018210 1963 +0.0021384 1964 +0.0020087 1965 +0.0023378 1966 +0.0024430 1967 +0.0024051 1968 +0.0025703 1969 +0.0029570 1970 +0.0030983 1971 +0.0031062 1972 +0.0034611 1973 +0.0036779 1974 +0.0038602 1975 +0.0038146 1976 +0.0040453 1977 +0.0040926 1978 +0.0040292 1979 +0.0044480 1980 +0.0047015 1981 +0.0044973 1982 +0.0041867 1983 +0.0043534 1984 +0.0046313 1985 +0.0056659 1986 +0.0056866 1987 +0.0057288 1988 +0.0060366 1989 +0.0070856 1990 +0.0084375 1991 +0.0087098 1992 +0.0077928 1993 +0.0068141 1994 +0.0063091 1995 +0.0056309 1996 +0.0056429 1997 +0.0051220 1998 +0.0049020 1999 +0.0048561 2000 +0.0043831 2001 +0.0044488 2002 +0.0042924 2003 +0.0042651 2004 +0.0043061 2005 +0.0042249 2006 +0.0044358 2007 +0.0044739 2008 +0.0044513 2009 +0.0037776 2010 diff --git a/man/Kpart-package.Rd b/man/Kpart-package.Rd new file mode 100755 index 0000000..36e46a8 --- /dev/null +++ b/man/Kpart-package.Rd @@ -0,0 +1,27 @@ +\name{Kpart-package} +\alias{Kpart-package} +\alias{Kpart} +\docType{package} +\title{ +Kpart +} +\description{ +Kpart spline fitting +} +\details{ +\tabular{ll}{ +Package: \tab Kpart\cr +Type: \tab Package\cr +Version: \tab 1.0\cr +Date: \tab 2012-08-02\cr +License: \tab Open Source\cr +} +~~ This package is intended for use with non-lineraly associated data. The function lm.Kpart firsts selects points for cubic spline knots using an alogorithm to find the abolute maximum deviate from the partiton mean, then fits a best fitting model by using the best subset method and minimum BIC. The function knots.Kpart returns the values selcted as knots in the model given by lm.Kpart, and plot.Kpart returns a plot with fitted values and vertical lines representing the spline points. Kpart(data,K) takes two arguments, data is a two column matrix where the first column is the repsonse and the second column is the predictor upon which knots are selected. K is a postive interger that indicates how many equally spaced partitions the user would like to produce.~~ +} +\author{ +Eric Golinko + +Maintainer: egolinko@gmail.com +} + +\keyword{ package } diff --git a/man/Kpart.example.Rd b/man/Kpart.example.Rd new file mode 100755 index 0000000..a87ec70 --- /dev/null +++ b/man/Kpart.example.Rd @@ -0,0 +1,38 @@ +\name{Kpart.example} +\alias{Kpart.example} +\title{ +An example of Kpart. +} +\description{ +Shows example usage for the Kpart method. +} +\usage{ +Kpart.example(data, K) +} +\arguments{ + \item{data}{ Two column matrix, the first column is respone, the second column is predictor in which to base knots. } + \item{K}{ A positive interger that will partition the data K times. } +} +\details{ +Example usage of Kpart package. +} +\value{ +Produces a plot. +} +\references{ +Tomas Fitzgerald +} +\author{ +Eric Golinko +} +\note{ +thank you. +} + +\examples{ +# example() +} + +% Add one or more standard keywords, see file 'KEYWORDS' in the +% R documentation directory. +\keyword{ manip } diff --git a/man/Kpart.knots.Rd b/man/Kpart.knots.Rd new file mode 100755 index 0000000..8ac9b48 --- /dev/null +++ b/man/Kpart.knots.Rd @@ -0,0 +1,38 @@ +\name{Kpart.knots} +\alias{Kpart.knots} +\title{ +Returns the spline knots selected by lm.Kpart. +} +\description{ +Displays selected knots by best subsets method after each absolute maximum deviate in partition mean is determined. +} +\usage{ +Kpart.knots(data, K) +} +\arguments{ + \item{data}{ Two column matrix, the first column is respone, the second column is predictor in which to base knots. } + \item{K}{ A positive interger that will partition the data K times. } +} +\details{ +Use of absolute maximum deviate from partition mean. +} +\value{ +Will return the values of the second column(predictor) in which the knots were selected from lm.Kpart. +} +\references{ +Tomas Fitzgerald +} +\author{ +Eric Golinko +} +\note{ +thank you. +} + +\examples{ +# example() +} + +% Add one or more standard keywords, see file 'KEYWORDS' in the +% R documentation directory. +\keyword{ manip } diff --git a/man/Kpart.plot.Rd b/man/Kpart.plot.Rd new file mode 100755 index 0000000..90aab22 --- /dev/null +++ b/man/Kpart.plot.Rd @@ -0,0 +1,38 @@ +\name{Kpart.plot} +\alias{Kpart.plot} +\title{ +A Kpart plot +} +\description{ +Displays plot of fitted values of lm.Kpart. +} +\usage{ +Kpart.plot(data, K) +} +\arguments{ + \item{data}{ Two column matrix, the first column is respone, the second column is predictor in which to base knots. } + \item{K}{ A positive interger that will partition the data K times. } +} +\details{ +Plot of fitted values, as well as vertical lines representing the values of knots selected. +} +\value{ +Plot. +} +\references{ +Tomas Fitzgerald +} +\author{ +Eric Golinko +} +\note{ +thank you. +} + +\examples{ +# example() +} +% Add one or more standard keywords, see file 'KEYWORDS' in the +% R documentation directory. +\keyword{ manpi } + diff --git a/man/lm.Kpart.Rd b/man/lm.Kpart.Rd new file mode 100755 index 0000000..391d9c5 --- /dev/null +++ b/man/lm.Kpart.Rd @@ -0,0 +1,39 @@ +\name{lm.Kpart} +\alias{lm.Kpart} +\title{ +something +} +\description{ +Offers overall cubic spline model model summary. +} +\usage{ +lm.Kpart(data, K) +} +\arguments{ + \item{data}{ Two column matrix, the first column is respone, the second column is predictor in which to base knots. } + \item{K}{ A positive interger that will partition the data K times. } +} +\details{ +For each kth partition potential knot selected, linear, quadratic, and cubic terms, use of the regsubsets() function from package(leaps) is implemented to determine the model whch has the lowest BIC. Then after identifying which predictors these are, fits an overall model using lm(). +} +\value{ +A lm() summary. +} +\references{ +Tomas Fitzgerald +} +\author{ +Eric Golinko +} +\note{ +thank you. +} + +\examples{ +# example() +} + +% Add one or more standard keywords, see file 'KEYWORDS' in the +% R documentation directory. +\keyword{ manpi } +