diff --git a/DESCRIPTION b/DESCRIPTION new file mode 100755 index 0000000..3ef7586 --- /dev/null +++ b/DESCRIPTION @@ -0,0 +1,17 @@ +Package: MAPLES +Type: Package +Title: Smoothed age profile estimation +Version: 1.0 +Date: 2011-04-18 +Author: Roberto Impicciatore +Maintainer: Roberto Impicciatore +Description: MAPLES is a general method for the estimation of age + profiles that uses standard micro-level demographic survey + data. The aim is to estimate smoothed age profiles and relative + risks for time-fixed and time-varying covariates. +Depends: mgcv +License: GPL (>= 2) +LazyData: yes +Packaged: 2011-04-26 16:50:02 UTC; roberto +Repository: CRAN +Date/Publication: 2011-04-26 17:36:50 diff --git a/NAMESPACE b/NAMESPACE new file mode 100755 index 0000000..aab2daa --- /dev/null +++ b/NAMESPACE @@ -0,0 +1,2 @@ +export(epdata,splitter,ageprofile,plotap,tabx,tabm,listvar,mkdate) +importFrom(mgcv,gam) diff --git a/R/MAPLES.r b/R/MAPLES.r new file mode 100755 index 0000000..63e502e --- /dev/null +++ b/R/MAPLES.r @@ -0,0 +1,739 @@ + +#_______________________________________________________________________________ +# MAPLES library +# this file contains the following objects: +# Functions: ageprofile,epdata, splitter, plotap, tabx, tabm, mkdate, listvar +# +#_______________________________________________________________________________ + + + + +#_______________________________________________________________________________ +# ageprofile estimates age profiles +#_______________________________________________________________________________ + +ageprofile<-function(formula,epdata, + tr.name="Transition",win,method="car", + agelimits=c(0,100),outfile=FALSE, + tails=c(FALSE,FALSE),subset=TRUE, + weight,sig.eff=TRUE, sig.lev=0.05) +{ + + #_______________________________________________________________________________ + # aggreg aggregates micro data into a transition-specific matrix + aggreg<-function(e,method,covk) + { + ncat<-nlevels(factor(covk)) # number of levels of covariate + categ<- levels(factor(covk)) # levels of covariate + event<-matrix(rep(0,100*ncat),ncol=100) # initialize event counter + expos<-matrix(rep(0,100*ncat),ncol=100) # initialize exposure counter + + # cohort-age rates + if (method=="car"){ + for (k in 1:ncat) { + ds<-subset(e,covk==categ[k])#Selects temporary subset + N<-dim(ds)[1] + ti<-pmax(trunc(ds$Astart),0) + tf<-pmin(trunc(ds$Astop),100) + st<-ifelse(ds$status==1 | ds$status==2,1,0) + if (N>0) { + for (j in 1:N) { # selects j-th individual + expos[k,][ti[j]:tf[j]] <-expos[k,][ti[j]:tf[j]]+ ds$weight[j] + expos[k,][tf[j]] <-expos[k,][tf[j]] -(1-(ds$Astop[j]- + tf[j]))*ds$weight[j] + expos[k,][ti[j]]<-expos[k,][ti[j]]-((ds$Astart[j]-ti[j])*ds$weight[j]) + event[k,][tf[j]]<-event[k,][tf[j]]+st[j]*ds$weight[j] + }}}} + # cohort-period rates + if (method=="cpr"){ + for (k in 1:ncat) { + ds<-subset(e,covk==categ[k]) #Selects temporary subset + N<-dim(ds)[1] + ti<-pmax(trunc(trunc(ds$Wstart)-ds$birth)+1,0) + tf<-pmin(trunc(trunc(ds$Wstop)-ds$birth)+1,100) + st<-ifelse(ds$status==1 | ds$status==2,1,0) + if (N>0) { + for (j in 1:N) { # selects j-th individual + expos[k,][ti[j]:tf[j]] <-expos[k,][ti[j]:tf[j]]+ ds$weight[j] + expos[k,][tf[j]]<-expos[k,][tf[j]]- + ((1-(ds$Wstop[j]-trunc(ds$Wstop[j])))*ds$weight[j]) + expos[k,][ti[j]]<-expos[k,][ti[j]]- + (ds$Wstart[j]-trunc(ds$Wstart[j]))*ds$weight[j] + event[k,][tf[j]]<-event[k,][tf[j]]+st[j]*ds$weight[j] + }}}} + # Cohort-age probabilities + if (method=="cap"){ + for (k in 1:ncat) { + ds<-subset(e,covk==categ[k]) #Selects temporary subset + N<-dim(ds)[1] + ti<-pmax(trunc(ds$Astart),0) + tf<-pmin(trunc(ds$Astop),100) + st<-ifelse(ds$status==1 | ds$status==2,1,0) + if (N>0) { + for (j in 1:N) { # selects j-th individual + expos[k,][ti[j]:tf[j]] <-expos[k,][ti[j]:tf[j]]+ ds$weight[j] + event[k,][tf[j]]<-event[k,][tf[j]]+st[j]*ds$weight[j] + }}}} + # Recomposes data matrix + datr<-data.frame(matrix(rep(0,400*ncat),ncol=4)) + names(datr)<-c("age","event","expos","variab") + for (i in 1:100) { + for (k in 1:ncat) { + datr[ncat*(i-1)+k,1]<-i #age + datr[ncat*(i-1)+k,2]<-event[k,i] #events + datr[ncat*(i-1)+k,3]<-expos[k,i] #exposures + datr[ncat*(i-1)+k,4]<-categ[k] # variab + } + } + return(datr) + } + + #_____________________________________________________________________________ + # tailflat Tail flattening procedure (Baseline smoothing at the hedges) + tailflat<-function(age,profile,prop,tails) + { + smoo<-profile + if (tails[1]){ + lim<- min(age[which(prop>=0.10)]) + fdist<-which(age==lim)-1 + fw<-c(rep(0,fdist)) + r<-10/fdist + cc<-exp(((fdist+1)*r)/2) + fweight<-1/(1+cc*exp(-r*seq(1:fdist))) + if (lim>min(age)) smoo[age=0.90)]) + fdist<-which(age==max(age))-which(age==lim) + fw<-c(rep(0,fdist)) + r<-10/fdist + cc<-exp(((fdist+1)*r)/2) + fweight<-1/(1+cc*exp(-r*seq(1:fdist))) + if (limlim]<-profile[age>lim]*(1-fweight) + } + return(smoo) + } + + + # CHECK Arguments ____________________________________________________________ + if (missing(formula)) stop ("Formula is missing (try with formula=~1)" + ,call.=FALSE) + if (missing(epdata)) stop("Must have an epdata argument",call.=FALSE) + if (!is.data.frame(epdata)) stop("epdata must be a data.frame",call.=FALSE) + if (!all(names(epdata)[1:7]==c("id","Tstart","Tstop","status","agestart", + "agestop","birth"))) + stop ("epdata must have: Tstart,Tstop,status,agestart,agestop,birth",call.=FALSE) + if (!missing(weight)) {if (!is.numeric(weight)) stop ("weight must be numeric",call.=FALSE)} + if (!missing(formula) & formula!=~1) { + covar <- model.frame(formula=formula, data=epdata) + factors<-names(covar) + nlev<-0 + for (k in 1:length(factors)) { + lev.labels<- + nlev<-nlev+nlevels(covar[,k]) + } + } + if (!missing(win)) {if (any(is.na(win))) stop("NA win for one or more obs.",call.=FALSE)} + if (method!="cpr" & method!="car" & method!="cap") + stop("Method must be 'cpr','car', or 'cap'",call.=FALSE) + if (agelimits[1]>=agelimits[2] | agelimits[1]<0 ) + stop("Unconsistent agelimits for one or more obs",call.=FALSE) + + # weight + if (!missing(weight)) epdata$weight<-weight + if (missing(weight)) epdata$weight<-rep(1,dim(epdata)[1]) + + # Calendar Window of observation______________________________________________ + if (!missing(win)) { + epdata$Wstart<-pmax(win[,1],epdata$Tstart,na.rm=TRUE) + epdata$Wstop<-pmin(win[,2],epdata$Tstop, na.rm=TRUE) + chk<-ifelse(epdata$Wstop=agelimits[1] & a$age<=agelimits[2] & a$expos>0.001) + age0<-a$age + event0<-a$event + expos0<-a$expos + unsmoo<-cbind(age0,round(a$event/a$expos,4)) + colnames(unsmoo)<-c("age","base") + # GAM Model + m0<-gam(round(event0)~offset(log(expos0))+ s(age0),family=poisson) + baseline0<-as.vector(exp(predict.gam(m0,type="terms",terms="s(age0)")+m0$coeff[1])) + + # Check number of events + if (sum(event0)<=50) + warning("The number of events in the window of observation is lower than 50.", + immediate.=TRUE,call.=FALSE) + # Tail flattening + if (any(tails)) baseline0<-tailflat(age0,baseline0,cumsum(event0/sum(event0)),tails) + tab<-cbind(age0,round(baseline0,7)) + colnames(tab)<-c("age","baseline") + + + # MODEL WITH COVARIATES_______________________________________________________ + if (formula!=~1) { + # Initialize objects + catcnt<-1 + anovatest<-rep(NA,length(factors)) + # initialize (empty) arrays for final output + outKNOT<-matrix(NA,0,2) + outBETA<-matrix(NA,0,4) + outEVENT<-matrix(NA,0,4) + outSE<-matrix(NA,0,4) + outPVALUE<-matrix(NA,0,4) + + # select k-th covariate + for (k in 1:length(factors)) { # k-th covariate + # Aggregate data + ak<-aggreg(epdata,method,epdata[,factors[k]]) + #select age range and rows with non-zero exposure time + ak<-subset(ak,ak$age>=agelimits[1] & ak$age<=agelimits[2] & + ak$expos>0.001) + agek<-ak$age + eventk<-ak$event + exposk<-ak$expos + evexp<-eventk/exposk # UNSMOOTHED RATES + variab<-factor(ak$variab) + ncat<-nlevels(variab) # number of levels of covariate + categ<-levels(variab) # levels of covariate + if (k==1) lev.labels<-categ + if (k>1) lev.labels<-c(lev.labels,categ) + + for (z in 1:ncat) { # z-th level + rrisk<-rep(0,length(age0)) + # defining knots and age intervals (knots are at exact ages) + prop<-cumsum(evexp[variab==categ[z]]/sum(evexp[variab==categ[z]])) + knot<-c(0,0) + knot[1]<- min(agek[variab==categ[z]][which(prop>=0.33)]) + knot[2]<- min(agek[variab==categ[z]][which(prop>=0.667)]) + # mid-points between jumps and distances between mid-points + midpt<-rep(0,3) + midpt[1]<- min(agek[variab==categ[z]][which(prop>=0.15)]) + midpt[2]<- min(agek[variab==categ[z]][which(prop>=0.50)]) + midpt[3]<- min(agek[variab==categ[z]][which(prop>=0.85)]) + ddist<-c(0,0) + ddist[1]<-which(agek[variab==categ[z]]==midpt[2])- + which(agek[variab==categ[z]]==midpt[1]) + ddist[2]<-which(agek[variab==categ[z]]==midpt[3])- + which(agek[variab==categ[z]]==midpt[2]) + + int<-as.factor(ifelse(agek=knot[1] & agek<=knot[2],2,3))) + # 'deviation coding' dummies (for each level and age subint.) + dummy<-matrix(rep(0,3*(ncat-1)*dim(ak)[1]),ncol=3*(ncat-1)) + j<-0 + if (z=midpt[1] & age0midpt[2] & age0<=midpt[3]]<- + bc[2]*(1-w2)+bc[3]*w2 + rrisk[age0>midpt[3]]<-bc[3] + rrisk<-rrisk*baseline + if (any(tails)) rrisk<-tailflat(age0,rrisk,prop,tails) + + # Preparing output + tab<-cbind(tab,rrisk) # Age profiles + outKNOT<-rbind(outKNOT,knot) # knots + outEVENT<-rbind(outEVENT,c( # events + sum(eventk[variab==categ[z] & int==1]), + sum(eventk[variab==categ[z] & int==2]), + sum(eventk[variab==categ[z] & int==3]), + sum(eventk[variab==categ[z]]))) + outBETA<-rbind(outBETA,betax) # relative risks + outSE<-rbind(outSE,se) # Standard errors + outPVALUE<-rbind(outPVALUE,pvalue) # P-values + temp<-rep(0,length(age0)) + for (i in 1:length(age0)){ + if (any(agek==age0[i] & variab==categ[z])) + {temp[i]<-evexp[agek==age0[i] & variab==categ[z]]}} + unsmoo<-cbind(unsmoo,temp) + catcnt<-catcnt+1 # Counter + } # close iteration for z-th level + + # Total column (Model without age intervals) + contrasts(variab)<-'contr.sum' + m1<-gam(round(eventk)~offset(log(exposk))+ s(agek)+variab,family=poisson) + + # changes the order of levels + last<-factor(as.character(variab), levels=c(categ[ncat],categ[1:(ncat-1)])) + contrasts(last)<-'contr.sum' + m2<-gam(round(eventk)~offset(log(exposk))+ s(agek)+last,family=poisson) + + outBETA[(catcnt-ncat):(catcnt-1),4]<- + c(exp(m1$coeff[2:ncat]),exp(m2$coeff[2])) + outSE[(catcnt-ncat):(catcnt-1),4]<- + c(summary.gam(m1)$se[2:ncat],summary.gam(m2)$se[2]) + outPVALUE[(catcnt-ncat):(catcnt-1),4]<- + c(summary.gam(m1)$p.pv[2:ncat],summary.gam(m2)$p.pv[2]) + + # ANOVA test (comparison between base model and model with covariate__ + m0k<-gam(round(eventk)~offset(log(exposk))+ s(agek),family=poisson) + test<-anova.gam(m0k,m1,test="Chisq") + anovatest[k]<-round(test$P[2],3) # ANOVA test pvalue + + } # close iteration for k-th covariate + # FINAL OUTPUT + rownames(outBETA)<-lev.labels + rownames(outEVENT)<-lev.labels + rownames(outSE)<-lev.labels + rownames(outPVALUE)<-lev.labels + rownames(outKNOT)<-lev.labels + colnames(tab)<-c("age","baseline",lev.labels) + colnames(unsmoo)<-c("age","baseline",lev.labels) + colnames(outKNOT)<-c("knot1","knot2") + colnames(outBETA)<-c("int1","int2","int3","tot") + colnames(outEVENT)<-c("int1","int2","int3","tot") + colnames(outSE)<-c("int1","int2","int3","tot") + colnames(outPVALUE)<-c("int1","int2","int3","tot") + outlist<-list(tab,unsmoo,outKNOT, round(outEVENT),round(outBETA,4), + round(outSE,4),round(outPVALUE,3),factors,anovatest,tr.name,method, + agelimits,tails) + names(outlist)<-c("profiles","unsmoothed","knot","event","rrisk","se", + "pvalue","factors","ANOVAtest","tr.name","method","agelimits","tails") + } # close non-missing formula + else + { + outlist<-list(tab,unsmoo,tr.name,method,agelimits,tails) + names(outlist)<-c("profiles","unsmoothed","tr.name","method", + "agelimits","tails") + } + # WRITE TEXT FILE + if (outfile) { + sink(paste(tr.name,".txt",sep="")) + print(outlist) + sink() + } + if (any(outEVENT<5)) + warning("Few events (<5) for one or more levels of specified factors. Check the output carefully" + ,call.=FALSE) + class(outlist)<-"ap" + return(outlist) +} + + + + +#_______________________________________________________________________________ +# epdata creates episode data structure from dates +#_______________________________________________________________________________ + +epdata<-function(start, event, lcensor, rcensor,subset=TRUE, birth,id, addvar) +{ + # CHECK arguments + if (missing(start)) stop("Must have a start argument.",call.=FALSE) + if (missing(event)) stop("Must have a event argument.",call.=FALSE) + if (!is.logical(subset)) stop("subset is not logical.",call.=FALSE) + if (missing(id)) id<- seq(1,length(start)) + if (missing(lcensor)) lcensor<- rep(NA,length(start)) + if (missing(rcensor)) rcensor<- rep(NA,length(start)) + if (length(start)!=length(event) | length(start)!=length(id)) + stop ("Arguments must have same length.",call.=FALSE) + if (!missing(addvar)){ + if (dim(as.matrix(addvar))[1]!=length(start)) + stop ("Arguments must have same length.",call.=FALSE) + addvar<-subset(addvar,subset) + } + id<-id[subset] + if (any(is.na(id))) stop("NA id for one or more obs.",call.=FALSE) + start<-start[subset] + if (!is.numeric(start)) stop ("Start variable is not numeric.",call.=FALSE) + event<-event[subset] + if (!is.numeric(event)) stop ("Event variable is not numeric.",call.=FALSE) + birth<-birth[subset] + if (any(is.na(birth))) stop("NA date of birth for one or more obs.",call.=FALSE) + lcensor<-lcensor[subset] + rcensor<-rcensor[subset] + if (any(is.na(start) & is.na(lcensor))) + stop("Both start and lcensor are NA for some obs.",call.=FALSE) + if (any(is.na(event) & is.na(rcensor))) + stop("Both event and rcensor are NA for some obs.",call.=FALSE) + + # episode (start and stop) + Tstart<-pmax(lcensor,start,na.rm=TRUE) + Tstop<-pmin(rcensor,event,na.rm=TRUE) + # STATUS variable 0:right cens; 1:event; 2:left cens; 3:interval cens. + status<- ifelse(is.na(event), + ifelse(is.na(lcensor),0,3), + ifelse(is.na(lcensor),1,2)) + # Ages + agestart<-Tstart-birth # age at the beginning of the episode + agestop<-Tstop-birth # age at the end of the episode + + # Final dataframe + e<- cbind(id,Tstart,Tstop,status,agestart,agestop,birth) + if (!missing(addvar)) e<-cbind(e,addvar) + return (e) +} + + +#_______________________________________________________________________________ +# splitter creates time varying variables through episode splitting +#_______________________________________________________________________________ + + +splitter<-function(epdata,split,tvar.name="Tvar",tvar.lev) +{ + # checks arguments____________________________________________________________ + if (missing(epdata)) stop("Must have a epdata argument.",call.=FALSE) + if (!is.data.frame(epdata)) stop("epdata must be a data.frame.",call.=FALSE) +if (!all(names(epdata)[1:7]==c("id","Tstart","Tstop","status","agestart", + "agestop","birth"))) + stop ("epdata must have: Tstart,Tstop,status,agestart,agestop,birth",call.=FALSE) + if (missing(split)) stop("Must have a split argument.",call.=FALSE) + N<-ifelse(is.vector(split),1,dim(split)[2]) + len<-ifelse(is.vector(split),length(split),dim(split)[1]) + if (dim(epdata)[1]!=len) stop ("epdata and split have different length",call.=FALSE) + + if (missing(tvar.lev)) tvar.lev<-seq(1,N+1) + if (length(tvar.lev)!=N+1) stop("Unconsistent tvar.lev",call.=FALSE) + if (N>1) { + for (k in 1:(N-1)) { + if (any(!is.na(split[,k]) & !is.na(split[,k+1]) & split[,k]>split[,k+1])) + stop("Multiple split dates must be sequentials",call.=FALSE) + if (any(is.na(split[,k]) & !is.na(split[,k+1]))) + stop("In multiple split dates a split date cannot be followed by non-NA date" + ,call.=FALSE) + } + } + # Prepare wide format (subepisodes on the same record)________________________ + if (N==1) epdata[,"tvar"]<-split + if (N>1) {for (k in 1:(N)) {epdata[,paste("tvar",k,sep="")]<-split[,k]}} + co<-dim(epdata)[2] + epdata$Ts1<-epdata$Tstart # Tstart of 1st subepisode + for (k in 1:N) { + if (N>1) splitk<-split[,k] + if (N==1) splitk<-split + + # Tstop of k-th subepisode + epdata[,paste("Tf",k,sep="")]<- + ifelse(!is.na(splitk) & splitkepdata[,paste("Ts",k,sep="")], + splitk,ifelse(is.na(splitk) | splitkepdata[,paste("Ts",k,sep="")], + ifelse(epdata$status<=1,0,3),epdata$status) + # Tstart of (k+1)-th subepisode + epdata[,paste("Ts",k+1,sep="")]<-epdata[,paste("Tf",k,sep="")] + } + epdata[,paste("Tf",N+1,sep="")]<-epdata$Tstop # Tstop of (k+1)-th subepisode + epdata[,paste("st",N+1,sep="")]<-epdata$status # Status of (k+1)-th subepisode + + # create long format: one record for each subepisode__________________________ + elong<-reshape(epdata, direction="long", varying=names(epdata)[(co+1):(co+3+3*N)], + sep="",idvar="idt") + elong<-elong[order(elong$id),] # sort cases by id + elong<-subset(elong, elong$Ts=elong$tvar,tvar.lev[2],elong[,tvar.name]) + elong$tvar<-NULL + } + if (N>1){ + for (k in 1:N){ + elong[,tvar.name]<-ifelse(!is.na(elong[,paste("tvar",k,sep="")]) & + elong$Ts>=elong[,paste("tvar",k,sep="")],tvar.lev[k+1],elong[,tvar.name]) + elong[,paste("tvar",k,sep="")]<-NULL + } + } + # arranging final dataframe___________________________________________________ + elong$time<-NULL + elong$idt<-NULL + row.names(elong)<-NULL + elong$Tstart<-elong$Ts + elong$Tstop<-elong$Tf + elong$status<-elong$st + elong$Ts<-NULL + elong$Tf<-NULL + elong$st<-NULL + elong$agestart<-elong$Tstart-elong$birth + elong$agestop<-elong$Tstop-elong$birth + elong[,tvar.name]<-factor(elong[,tvar.name]) + return(elong) +} + + +#_______________________________________________________________________________ +# plotap plots ageprofile +#_______________________________________________________________________________ + +plotap<-function(x,lev.labels, baseline=TRUE,unsmoothed=FALSE, + xlim, ylim, title) +{ + # Check arguments ______________________________________________________________ + if (missing(x)) stop ("Must have an ap argument",call.=FALSE) + if (class(x)!="ap") + stop("Must have an ap argument",call.=FALSE) + if (missing(title)) title<-x$tr.name + age<-x$profiles[,1] + base<-x$profiles[,2] + if (!missing(lev.labels)) { + levsp<-strsplit(lev.labels,split="*",fixed=TRUE) + rr<-data.frame(matrix(1,length(age),length(lev.labels))) + urr<-data.frame(matrix(1,length(age),length(lev.labels))) + for (k in 1:length(lev.labels)) { + for (j in 1: length(levsp[[k]])) { + if (any(levsp[[k]][[j]]==colnames(x$profiles))) { + rr[,k]<-rr[,k]*x$profiles[,levsp[[k]][[j]]]/base + urr[,k]<-urr[,k]*x$unsmoothed[,levsp[[k]][[j]]]/base + } + else { + stop(paste(levsp[[k]][[j]],"not found in the dataframe."),call.=FALSE) + } + } + } + } + if (missing(ylim)) {ylim<-max(base)*2} + if (missing(xlim)) {xlim<-c(min(age),max(age))} + if (x$method=="cpr") ylabel<-"Cohort-period rates" + if (x$method=="car") ylabel<-"Cohort-age rates" + if (x$method=="cap") ylabel<-"Cohort-age probs" + # Open window and plot baseline_______________________________________________ + dev.new() + plot(c(xlim[1],xlim[2]),c(0,ylim), + xlab="Age",ylab=ylabel,type="n") + title(main = title, font.main=3) + if (baseline) lines(age+0.5,base,lty=1,lwd=2) + + # plot age profiles for each factor___________________________________________ + if (!missing(lev.labels)){ + names(rr)<-lev.labels + for (k in 1:length(lev.labels)) { + lines(age+0.5,rr[,k]*base,col=k,lty=k+1) + if (unsmoothed) {points(age+0.5,urr[,k]*base, col=k)} + } + # Plot legend + posx<-0.75* xlim[2]+0.25*xlim[1] + ce<-2-((30+max(nchar(lev.labels)))/37) + legend (posx,ylim,lev.labels,lty=2:(length(lev.labels)+1),col=1:length(lev.labels), + cex=ce,y.intersp=0.8) + } + if (missing(lev.labels) & unsmoothed) points(age+0.5,x$unsmoothed[,2]) +} + + +#_______________________________________________________________ +# tabx Print univariate and bivariate frequency distribution +#_______________________________________________________________ +tabx<-function(x,y,prow=FALSE,pcol=FALSE, chisq=FALSE) +{ + # uni-variate table + if (missing(y)) + { + n<-table(x,useNA="ifany") + N<-sum(n) + perc<-round(n/N,2) + + if (any(is.na(x))) # with NA values + { + valid<-c(round(table(x)/sum(table(x)),2),"") + Tot<-c(sum(n),1,1) + tabf<-rbind(cbind(n,perc,valid),Tot) + } else # no NA values + { + Tot<-c(sum(n),sum(perc)) + tabf<-rbind(cbind(n,perc),Tot) + } + print("Univariate frequency table",quote=FALSE) + print(tabf,quote=FALSE) + } + # bivariate table + if (!missing(y)) { + tabb<-table(x,y,useNA="ifany") + N<-sum(tabb) + # 1. Counts + Tot<-margin.table(tabb,1) + tot.c<-margin.table(tabb,2) + tabf<-rbind(cbind(tabb,Tot),c(tot.c,N)) + rownames(tabf)[dim(tabb)[1]+1]<-"Tot" + print("Bivariate frequency table",quote=FALSE) + print("Counts",quote=FALSE) + print(tabf,quote=FALSE) + if (chisq) print(summary(tabb)) + if (prow) { # row percentages + n<-margin.table(tabb,1) + tab1<-prop.table(tabb,1) + Tot<-margin.table(tab1,1) + tot.c<-round(margin.table(tabb,2)/N,2) + tabf<-rbind(cbind(tab1,Tot,n),c(tot.c,sum(tot.c),N)) + rownames(tabf)[dim(tabb)[1]+1]<-"Tot" + print("Row percentages",quote=FALSE) + print(round(tabf,2),quote=FALSE) + } + if (pcol) { # column percentages + n<-margin.table(tabb,2) + tab1<-prop.table(tabb,2) + Tot<-margin.table(tab1,2) + tot.r<-margin.table(tabb,1)/N + tabf<-cbind(rbind(tab1,Tot,n),c(tot.r,sum(tot.r),N)) + colnames(tabf)[dim(tabb)[2]+1]<-"Tot" + print("Column percentages",quote=FALSE) + print(round(tabf,2),quote=FALSE) + } + } + +} + +#_______________________________________________________________ +# tabm Print multiple regression and logit model estimations +#_______________________________________________________________ +tabm<-function(mod,pvalues=TRUE,digits=3) +{ + Beta<-round(coef(mod),digits) + sign1<-summary(mod)$coef[,4] + sign2<-as.character(cut(sign1, + breaks=c(0,0.01,0.05,0.1,1), + labels=c("***","**","*",""))) + if (attributes(mod)$class[1]=="glm"){ + model<-"Logistic regression." + fit<-paste("AIC:",round(AIC(mod),2)) + expBeta<-as.character(round(exp(Beta),digits)) + if (names(coef(mod))[1]=="(Intercept)") expBeta[1]<-"--" + if (pvalues){ + tabb<-cbind(Beta,expBeta,round(sign1,digits),sign2) + colnames(tabb)<-c("Beta","exp(Beta)","sign.","") + } else { + tabb<-cbind(Beta,expBeta,sign2) + colnames(tabb)<-c("Beta","exp(Beta)","sign.") + } + } + if (attributes(mod)$class[1]=="lm"){ + model<-"Multiple regression." + fit<-paste("AIC:",round(AIC(mod),2)," Adjusted R square:", + round(summary(mod)$adj.r.squared,2)) + if (pvalues){ + tabb<-cbind(Beta,round(sign1,digits),sign2) + colnames(tabb)<-c("Beta","sign.","") + } else { + tabb<-cbind(Beta,sign2) + colnames(tabb)<-c("Beta","sign.") + } + } + print (model,quote=FALSE) + print(tabb,quote=FALSE) + print("Sign.:'***' <0.01; '**' <0.05; '*' <0.1",quote=FALSE) + print(fit,quote=FALSE) +} + + + +#_______________________________________________________________________________ +# mkdate: creates dates in continuous years or cmc +#_______________________________________________________________________________ + +# Dates of event must be (year: XXXX format; month XX format); +# option: cmc=FALSE years continuous time - cmc=TRUE date in Century month code) + +mkdate<-function(year,month,cmc=FALSE) +{ + if (!cmc) mkdate<-year-1900+round((month-0.5)/12,2) + if (cmc) mkdate<-(year-1900)*12+month + return(mkdate) +} + + +#_______________________________________________________________________________ +# listvar: # Show variables in a dataframe and the associate number of columns +#_______________________________________________________________________________ + +listvar=function(df) +{ + temp1=names(df) + temp2=seq(1,length(temp1)) + temp3=data.frame(temp1,temp2) + names(temp3)=c("VAR","COL") + return(temp3) + rm(temp1,temp2,temp3) +} + + + + \ No newline at end of file diff --git a/data/demogr.RData b/data/demogr.RData new file mode 100755 index 0000000..59e5189 Binary files /dev/null and b/data/demogr.RData differ diff --git a/man/MAPLES-package.Rd b/man/MAPLES-package.Rd new file mode 100755 index 0000000..c768045 --- /dev/null +++ b/man/MAPLES-package.Rd @@ -0,0 +1,79 @@ +\name{MAPLES-package} +\alias{MAPLES-package} +\alias{MAPLES} +\docType{package} +\title{Smoothed age profile estimation.} +\description{ +MAPLES is a general method for the estimation of age profiles that uses standard +micro-level demographic survey data. The aim is to estimate smoothed age profiles +and relative risks for time-fixed and time-varying covariates. +} +\details{ +\tabular{ll}{ +Package: \tab MAPLES\cr +Type: \tab Package\cr +Version: \tab 1.0\cr +Date: \tab 2011-04-08\cr +License: \tab GPL-2\cr +LazyLoad: \tab yes\cr +LazyData: \tab yes\cr \cr +} +Main functions in the package:\cr +- epdata: prepare episode data for event history analysis \cr +- splitter: Creates a time-varying factor variables within a episode-data. \cr +- ageprofile: Computes smoothed transition rates by respondent's age (age profiles) \cr +- plotap: plots age profiles. \cr +Utilities: \cr +- tabx: Prints univariate or a bivariate frequency distribution table including marginal +distribution and total number of cases.\cr +- tabm: Print regression estimates for previously fitted linear and logit +regression models.\cr +- mkdate: computes dates in continuous years or CMC.\cr +- listvar: shows variables in a dataframe. +} +\author{ +Roberto Impicciatore +\email{roberto.impicciatore@unimi.it} +} +\references{Impicciatore R. and Billari F.C., (2010), MAPLES: a general method for the estimation +of age profiles from standard demographic surveys (with an application to fertility), +DEAS WP, \cr +http://ideas.repec.org/p/mil/wpdepa/2010-40.html} + + +\examples{ +# creates an episode-data structure relating to the transition +# childless-->first child +ep1<-with(demogr,epdata(start=dbirth, event=dch1, rcensor=dint, + birth=dbirth,id=id, + addvar=subset(demogr,select=c(-id,-dbirth)))) + +# creates a new episode-data structure with a time-varying factor +# variable relating to the status "never married" (not_marr) or +# "ever married" (marr) +ep2<-splitter(ep1,split=ep1$d1marr,tvar.lev=c("not_marr","marr"), + tvar.name="mar") + +# Estimates age profiles for the transition to the first birth +# according to the following factors: +# sex (respondent'sex w/2 levels: 'Male', 'Female'); +# edu ('Level of education w/3 levels: 'low_sec','upp_sec', 'tert'); +# mar (ever married w/2 levels: 'not_marr', 'marr') + +ch1.ap<-ageprofile(formula=~sex+edu+mar, epdata=ep2, + tr.name="First child", agelimits=c(15,50)) + +# Plot age profiles in three different graphs +plotap(ch1.ap,base=TRUE, unsmoo=TRUE, + lev=c("Male","Female"),title='first child by sex') +plotap(ch1.ap,base=TRUE, unsmoo=TRUE, + lev=c("low_sec","upp_sec","tert"),title='first child by education') +plotap(ch1.ap,base=TRUE, unsmoo=TRUE, + lev=c("not_marr","marr"),title='first child by marital status', + ylim=0.4) +} +\keyword{package} +\keyword{smooth} +\keyword{age profiles} +\keyword{transition rates} +\keyword{GAM models} diff --git a/man/ageprofile.Rd b/man/ageprofile.Rd new file mode 100755 index 0000000..03f5cd5 --- /dev/null +++ b/man/ageprofile.Rd @@ -0,0 +1,174 @@ +\name{ageprofile} +\alias{ageprofile} +\title{Computes smoothed transition rates by respondent's age (age profiles).} +\description{ +Computes age profiles for a specific transition between two states according to +a set of time-fixed or time-varying covariates. It needs an micro-level episode-data +structure, i.e. a longitudinal dataset containing the following variables:\cr \cr +start : starting date of observation;\cr +Tstop : ending date of observation;\cr +Status : is 0 if right censored; 1 if event occurred; 2 if left censored; + 3 if interval censored. Status is equal to 1 if and only if the date of the event + precedes the date of right censoring.\cr +Agestart - ages at starting date (Start)\cr +Agestop Age at the ending date (Stop)\cr +An episode-data structure can be obtained through the command epdata.} +\usage{ +ageprofile(formula, epdata, tr.name = "Transition", win, + method = "car", agelimits = c(0, 100), + outfile = FALSE, tails = c(FALSE, FALSE), + subset = TRUE, weight, + sig.eff = TRUE, sig.lev = 0.05) +} +\arguments{ + \item{formula}{ +a formula object specifying the ~ operator and factors variables +separated by '+' operators. Response variable on the left is not required. +The expression ~1 implies that only the baseline (age profile for the whole sample) +will be provided. Note that interaction terms are not recognized by the formula: +~v1:v2 or ~v1*v2 gives the same results as ~v1+v2. +} + \item{epdata}{ +a dataframe containing episode-data prepared by epdata command optionally with +time-varying factor variables created by splitter command. +} + \item{tr.name}{a string containing the name of the considered transition} + \item{win}{ +a matrix with two columns containing the initial and final calendar date specifiyng +a restricted window of observation. Only events and exposure times referring to +this window will be considered in the analysis. If win argument is not specified, +the whole episode is considered. +For example, let us consider an episode data structure coming from a retrospective +survey held on January 1, 2010. Since we want to compute transition rates according to +behaviours experienced in the last 10 years, we can restrict our window ob observation +to the decade January 1, 2000 - January 1, 2010. Thus, the win argument would be a +matrix with two columns, the first containing the exact data 2000 and the second the exact +date 2010.\cr +Window of observation may be also limited to specific ages or events. For example, +win=cbind(date_at_birth+15,date_at_birth+20) restricts observations within the +age group 15-19 (completed) years of age and\cr +win=cbind(date_at_marriage,date_at_the_interview) restricts observatins +between marriage and the interview. + +Note that in the win matrix no missing data are allowed. +} + \item{method}{specifies the type of rates to compute. There are three + alternatives:\cr +'cpr' for Cohort Period Rates +'car' for Cohort Age Rates +'cap' for Cohort Age Probabilities +} + \item{agelimits}{ + a couple of values indicating the lower and the upper limit of age interval + to be considered. It can be useful to restrict age interval when the transition + is never or almost never experienced outside a specific age interval. For example, + the transition rates for the first child birth may be limited to the age interval + (15-50).} + \item{outfile}{ + if TRUE writes the output on a file named 'trname.txt' where 'trname' is the + string specified in the tr.name argument. +} + \item{tails}{ + a vector of two logical elements indicating respectively if the left and the + right tails must be flattened. This option may be useful if we can assume + that the transition rate is approximately zero at the borders of the considered + age interval. +} + \item{subset}{ + an optional vector specifying a subset of observations to be used in the + estimation process. +} + \item{weight}{ + an optional vector containing (post-sampling) weights. +} + \item{sig.eff}{ + if TRUE the age profile for a specific subgroup defined by factor levels is + fixed as identical to the baseline if the relative risk (for aech specific + age-subinterval) is not statistically significant at the level specified by + sig.lev. In other words, the relative risk in a specific age subinterval is zero + if the pvalue is higher than sig.lev (see details). +} + \item{sig.lev}{ + specifies the maximum level significance under which the relative risk in a + specific age-subinterval is non-zero (if sig.eff=TRUE). The default value is 0.05. +} +} +\details{ +p-values for the null hypothesis that the corresponding parameter is zero is +calculated with reference to the t distribution with the estimated residual +degrees of freedom for the model fit if the dispersion parameter has been estimated, +and the standard normal if not. +} +\value{ +Gives a list of objects: +\item{profiles}{a matrix containing the smoothed age profiles for the whole sample (baseline) +and for each factor level considered} +\item{unsmoothed}{a matrix containing the unsmoothed transition rates, i.e. the ratio +between occurences and exposures for each age and factor level} +\item{knot}{a matrix with the two knots (in column) for each factor level (rows)} +\item{event}{a matrix with the number of event occured in the three age sub-intervals defined +by knots (columns) for each factor level (rows)} +\item{rrisk}{a matrix with the estimated relative risks in the three age sub-intervals + (columns) for each factor level (rows)} +\item{se}{a matrix with the standard error related to the relative risk computed +in the three age sub-intervals (columns) for each factor level (rows)} +\item{pvalue}{a matrix with the pvalues related to the hypothesis that the +relative risk in each age sub-interval is different from zero (columns) for each +factor level (rows)} +\item{factors}{a vector of names containing the factors specified in the formula argument} +\item{trname}{the name of the transition in analysisi as specified in the tr.name argument.} +\item{ANOVAtest}{pvalues anova test for each factor covariate} +\item{factors}{vector of string containing the names of factor covariates as specified in +the formula argument.} +\item{method}{the method used for the computaion of rates as speficied in the method argument.} +\item{agelimits}{the age interval as specified in the agelimits argument.} +\item{tails}{tails argument.} +} +\references{Impicciatore R. and Billari F.C., (2010), MAPLES: a general method for the estimation +of age profiles from standard demographic surveys (with an application to fertility),DEAS WP, \cr +http://ideas.repec.org/p/mil/wpdepa/2010-40.html} +\author{Roberto Impicciatore +\email{roberto.impicciatore@unimi.it} +} +\note{ +In order to have an unambiguosly output, it is strongly raccomended to label +each level of factor variables and to avoid using the same label for different +factors. +} + +\seealso{ +\code{\link{epdata}}, +\code{\link{splitter}}, +\code{\link{plotap}} +} +\examples{ +# creates an episode-data structure relating to the transition +# childless-->first child +ep1<-with(demogr,epdata(start=dbirth, event=dch1, rcensor=dint, + birth=dbirth,id=id, + addvar=subset(demogr,select=c(-id,-dbirth)))) + +# creates a new episode-data structure with a time-varying factor +# variable relating to the status "never married" (not_marr) or +# "ever married"(marr) +ep2<-splitter(ep1,split=ep1$d1marr,tvar.lev=c("not_marr","marr"), + tvar.name="mar") + +# Estimates age profiles for the transition to the first birth +# according to the following factors: +# sex (respondent'sex w/2 levels: 'Male', 'Female'); +# edu ('Level of education w/3 levels: 'low_sec','upp_sec', 'tert'); +# mar (ever married w/2 levels: 'not_marr', 'marr') +ch1.ap<-ageprofile(formula=~sex+edu+mar, epdata=ep2, + tr.name="First child", agelimits=c(15,50)) + +# The estimates are obtained under the hypothesis of independence among +# factors. We can relax this hp by considering the interaction between +# factors. The following commands add the interaction between sex and edu. +ep2$inter<-ep2$sex:ep2$edu +ch1.ap<-ageprofile(formula=~sex+edu+mar+inter, epdata=ep2, + tr.name="First child", agelimits=c(15,50)) +} +\keyword{smooth} +\keyword{age|profile} +\keyword{transition|rates} diff --git a/man/demogr.Rd b/man/demogr.Rd new file mode 100755 index 0000000..3225243 --- /dev/null +++ b/man/demogr.Rd @@ -0,0 +1,59 @@ +\name{demogr.RData} +\alias{demogr.RData} +\alias{demogr} +\docType{data} +\title{Longitudinal data on marriage and first child birth.} +\description{ +Longitudinal data on marriage and first child birth. Individuals interwieved +in March 2004. +} +\usage{demogr} +\format{ +A data frame with 2017 observations on 8 variables. + +[,1] id integer ID \cr +[,2] weight numeric Individual weight \cr +[,3] dbirth numeric respondent's date at birth \cr +[,4] dint numeric date at the interview \cr +[,5] d1marr numeric date at marriage \cr +[,6] dch1 numeric date at first child birth \cr +[,7] sex factor sex \cr +[,8] edu factor level od efucation \cr + +} +\details{ +Individuals were interviewed at the end of 2003 retrospectively on family and +fertility life trajectory. \cr + +id: individual identification number (ID) \cr +weight: individual post-sampling weight (with mean = 1) \cr +dbirth: respondent's date at birth in exact years since January 1, 1900 \cr +dint: date at the interview in exact years since January 1, 1900 (March 2004 +for all respondents) \cr +d1marr: date at marriage (if any) in exact years since January 1, 1900. If NA, +the individual has never been married before the interview. \cr +dch1: date at marriage (if any) in exact years since January 1, 1900. If NA, +the individual is childless at the interview. \cr +sex: respondent's sex (factor w/2 levels: "Male", "Female") \cr +edu: respondent's level of education (factor w/3 levels: "low_sec","upp_sec","tert") \cr + +Dates in exact years have been computed considering the midpoint of a specific month. +Thus, March 1995 means March 15, 1995 and the date in exact years is 95+2.5/12=95.21. +} +\source{ +Synthetic data based on the format of the Generation and Gender Survey (GGS) (Vikat et al. 2007). +} +\references{ +Vikat A., Speder Z., Beets G., Billari F.C., Buhler C., Desesquelles A., Fokkema T., +Hoem J., MacDonald A., Neyer G., Pailhe A., Pinnelli A., Solaz A. (2007). +"Generations and Gender Survey (GGS): Toward a better understanding of relationship +and processes in the life course", Demographic research, 17 (14): 389-440. +\cr +\cr +Impicciatore R. and Billari F.C., (2010), MAPLES: a general method for the estimation +of age profiles from standard demographic surveys (with an application to fertility), DEAS WP,\cr +http://ideas.repec.org/p/mil/wpdepa/2010-40.html} +\examples{ +str(demogr) +} +\keyword{datasets} diff --git a/man/epdata.Rd b/man/epdata.Rd new file mode 100755 index 0000000..6c6789a --- /dev/null +++ b/man/epdata.Rd @@ -0,0 +1,72 @@ +\name{epdata} +\alias{epdata} +\title{Prepares episode data for event history analysis.} +\description{Starting from a micro-level dataset containing dates for some +relevant events, it prepares a file with an episode-data format suitable for +event history analysis.} +\usage{epdata(start, event, lcensor, rcensor, + subset = TRUE, birth, id, addvar) +} +\arguments{ + \item{start}{a vector of dates at which the unit starts to be at risk to experience + the transition. It may be unknown in case of left-censoring. When the episode + is no left censored, start coincides with the beginning of the episode.} + \item{event}{a vector of containing the date of the event of interest. It may + be unknown in case of right-censoring.} + \item{lcensor}{an optional vector of left censoring dates.} + \item{rcensor}{an optional vector of right censoring dates.} + \item{subset}{a logical expression indicating elements or rows to keep. + It specifies the subset of cases at risk of experiencing the event of interest. + If TRUE, the whole sample will be considered.} + \item{birth}{a vector containing the respondent date of birth. + This information in useful in order to compute ages at various events (NA not allowed).} + \item{id}{a vector of identification numbers. If id remains unspecified, a + default id is implemented.} + \item{addvar}{a dataframe containing additional variables to add in the resulting output.} +} +\details{All the specified objects must have the same length (i.e. the number of + cases must be the same). For any observation at least one between start and + lcensor and one between event and rcensor must be known. In general, we have + start<=lcensor<= event<=rcensor.} +\value{A data.frame suitable for MAPLES and R survival package. Following the + 'counting process' formulation of survival analysis, each record ha the + following variables: +\item{Tstart}{episode starting date} +\item{Tstop}{episode ending date} +\item{status}{0: right censored: 1: event occurred: 2: left censored: 3: interval censored. + Is equal to 1 if and only if the date of the event precedes the date of right censoring.} +\item{Agestart}{Age at starting date (Tstart)} +\item{Agestop}{Age at the ending date (Tstop)} +} +\references{Impicciatore R. and Billari F.C., (2010), MAPLES: a general method for the +estimation of age profiles from standard demographic surveys (with an application to fertility), +DEAS WP, \cr +http://ideas.repec.org/p/mil/wpdepa/2010-40.html} +\author{Roberto Impicciatore +\email{roberto.impicciatore@unimi.it} +} +\seealso{ +\code{\link{splitter}}, +\code{\link{ageprofile}}, +\code{\link{plotap}} +} +\examples{ +# creates an episode-data structure relating +# to the transition childless-->first child +ep1<-epdata(start=demogr$dbirth, + event=demogr$dch1, + rcensor=demogr$dint, + birth=demogr$dbirth, + id=demogr$id, + addvar=subset(demogr,select=c(-id,-dbirth))) + +# or, using 'with' +ep1<-with(demogr,epdata(start=dbirth, event=dch1, + rcensor=dint, birth=dbirth,id=id, + addvar=subset(demogr,select=c(-id,-dbirth)))) +} +\keyword{datagen} +\keyword{episode data} +\keyword{event history analysis} +\keyword{survival} + diff --git a/man/listvar.Rd b/man/listvar.Rd new file mode 100755 index 0000000..3522d0c --- /dev/null +++ b/man/listvar.Rd @@ -0,0 +1,21 @@ +\name{listvar} +\alias{listvar} +\title{Shows variables in a dataframe.} +\description{Shows variables in a dataframe and the related number of column.} +\usage{listvar(df)} + +\arguments{ + \item{df}{is the dataframe.} +} + +\value{ +a dataframe with two columns: VAR containing the names of the variables and COL +with the number of related column in the original dataframe. +} + +\author{Roberto Impicciatore +\email{roberto.impicciatore@unimi.it} +} +\examples{ +listvar(demogr) +} diff --git a/man/mkdate.Rd b/man/mkdate.Rd new file mode 100755 index 0000000..8ff7f1a --- /dev/null +++ b/man/mkdate.Rd @@ -0,0 +1,36 @@ +\name{mkdate} +\alias{mkdate} +\title{Compute dates.} +\description{ +Computes dates in continuous years or CMC (Century Month Code) since January 1, 1900. +} +\usage{mkdate(year, month, cmc = FALSE)} +\arguments{ + \item{year}{ +a vector containing the year of a date(XXXX format) +} + \item{month}{ +a vector containing the month of a date (XX format) +} + \item{cmc}{ +if TRUE date in CMC from January 1, 1900 is computed. Otherwise, date in continuos time +since January 1, 1900 is computed. +} +} + +\value{a vector containing dates in continuous time or CMC.} +\references{Impicciatore R. and Billari F.C., (2010), MAPLES: a general method for the estimation +of age profiles from standard demographic surveys (with an application to fertility), +DEAS WP, \cr +http://ideas.repec.org/p/mil/wpdepa/2010-40.html} +\author{Roberto Impicciatore +\email{roberto.impicciatore@unimi.it} +} + +\examples{ +dy<-c(1996,2001,2005,2003) +dm<-c(3,9,5,12) +mkdate(year=dy,month=dm) +mkdate(year=dy,month=dm, cmc=TRUE) +} +\keyword{dates} diff --git a/man/plotap.Rd b/man/plotap.Rd new file mode 100755 index 0000000..df9078a --- /dev/null +++ b/man/plotap.Rd @@ -0,0 +1,109 @@ +\name{plotap} +\alias{plotap} +\title{Plots age profiles.} +\description{ +Plots age profiles estimated through command ageprofile. +} +\usage{ +plotap(x, lev.labels, baseline = TRUE, unsmoothed = FALSE, + xlim, ylim, title) +} + +\arguments{ + \item{x}{the resulting list given by the ageprofile command.} + \item{lev.labels}{ +a vector of strings specifying the factor levels to consider. For example: \cr +lev.labels=c("Male","Female","low_sec","upp_sec","tert") \cr +specifies that 5 curves will bedrawn, one for each level specified. +It is also possible to draw a combination of levels (under the hypothesis of +independence between factors)by inserting the symbol "*" between two or more levels. +For example: the string "Male*low_sec" draws a curve for the subgroups of men with +a lower secondary level of education; the string "Male*low_sec*worker" draws a curve for +the subgroup of lower educated men currently employed as a manual worker. +Even though it is possible to draw any number of curves in one graph, we raccomend +to consider no more than 5 levels (or combination of levels) in one graph.} + \item{baseline}{if TRUE the baseline will be drawn.} + \item{unsmoothed}{if TRUE the unsmoothed transition rates, i.e. the ratio +between occurences and exposures for each age and factor level, are plotted as points +in the graph.} + \item{xlim}{ +a vector of two values defining the limits of X axis measured in years of age +(default value: c(min(age), max(age))} + \item{ylim}{ +a value defining the upper limit of the Y axis (transition rates). +Given that transition rates a re strictly positive, the lower limit is always 0 +(default value: 2*max(baseline rates)). } + \item{title}{ +title of the graph (default: transition name as stored in the ageprofile argument. } +} +\references{Impicciatore R. and Billari F.C., (2010), MAPLES: a general method for the estimation +of age profiles from standard demographic surveys (with an application to fertility), +DEAS WP, \cr +http://ideas.repec.org/p/mil/wpdepa/2010-40.html} +\author{ +Roberto Impicciatore +\email{roberto.impicciatore@unimi.it} +} +\seealso{ +\code{\link{epdata}}, +\code{\link{splitter}}, +\code{\link{ageprofile}}, +} +\examples{ +# creates an episode-data structure relating to the +# transition childless-->first child +ep1<-with(demogr,epdata(start=dbirth, event=dch1, rcensor=dint, + birth=dbirth,id=id, + addvar=subset(demogr,select=c(-id,-dbirth)))) + +# creates a new episode-data structure with a time-varying factor variable +# relating to the status "never married"(not_marr) or "ever married"(marr) +ep2<-splitter(ep1,split=ep1$d1marr,tvar.lev=c("not_marr","marr"), + tvar.name="mar") + +# Estimates age profiles for the transition to the first birth +# according to the following factors: +# sex (respondent'sex w/2 levels: 'Male', 'Female'); +# edu ('Level of education w/3 levels: 'low_sec','upp_sec', 'tert'); +# mar (ever married w/2 levels: 'not_marr', 'marr') + +ch1.ap<-ageprofile(formula=~sex+edu+mar, epdata=ep2, + tr.name="First child", agelimits=c(15,50)) + +# Plot age profiles in three different graphs +plotap(ch1.ap,base=TRUE, unsmoo=TRUE, + lev=c("Male","Female"),title='first child by sex') +plotap(ch1.ap,base=TRUE, unsmoo=TRUE, + lev=c("low_sec","upp_sec","tert"),title='first child by education') +plotap(ch1.ap,base=TRUE, unsmoo=TRUE, + lev=c("not_marr","marr"),title='first child by marital status', + ylim=0.4) + +# Plot age profiles for the combined effect of sex and level of education +# under the independence hypothesis +plotap(ch1.ap,base=TRUE, unsmoo=TRUE, + lev=c("Female*low_sec","Female*upp_sec","Female*tert"), + title='first child by education - women (indep hp)') +plotap(ch1.ap,base=TRUE, unsmoo=TRUE, + lev=c("Male*low_sec","Male*upp_sec","Male*tert"), + title='first child by education - men (indep hp)') + +# The estimates are obtained under the hypothesis of independence among +# factors. We can relax this hp by considering the interaction between +# factors. The following commands add the interaction between sex and edu. +ep2$inter<-ep2$sex:ep2$edu +ch1.ap<-ageprofile(formula=~sex+edu+mar+inter, epdata=ep2, + tr.name="First child", agelimits=c(15,50)) + +# Plot age profile for the interaction between sex and level of education +plotap(ch1.ap,base=TRUE, unsmoo=TRUE, + lev=c("Female:low_sec","Female:upp_sec","Female:tert"), + title='first child by education - women') +plotap(ch1.ap,base=TRUE, unsmoo=TRUE, + lev=c("Male:low_sec","Male:upp_sec","Male:tert"), + title='first child by education - men') + +} +\keyword{smooth} +\keyword{age|profiles} +\keyword{transition|rates} diff --git a/man/splitter.Rd b/man/splitter.Rd new file mode 100755 index 0000000..d0f7a81 --- /dev/null +++ b/man/splitter.Rd @@ -0,0 +1,75 @@ +\name{splitter} +\alias{splitter} +\title{Creates a time-varying factor variables within a episode-data.} +\description{ +Splits episode in sub-episodes according to the dates specified as input. + The resulting dataframe contains a new time-varying factor variable with different + user-defined values in each subinterval. +} +\usage{splitter(epdata, split, tvar.name = "Tvar", tvar.lev)} +\arguments{ + \item{epdata}{a dataframe containing an episode-data file (obtained through the epdata command)} + \item{split}{an array containing the date(s) at one (or more) specific event(s) + that causes the change in the resulting time-varying variable. The original episode + will be split at this date(s). If n vectors of dates have been spcified, the original + episode will be splitted in n+1 subepisodes. For example, let us consider a matrix + with three columns containing the dates at the first, second, and third child birth. + The original episode may be split into (up to) four subepisodes according to non-NA + specified dates (see details).} + \item{tvar.name}{a string containing the name of the time-varying factor + variable in the resulting dataframe.} + \item{tvar.lev}{a vector of n+1 strings containing the level labels of the resulting + time-varying variable (where n is the number of columns in the split argument).} +} +\details{ +Split dates refer to event that can be experienced sequentially (first birth, +second birth, third birth, ecc or first marriage, second marriage, third marriage, etc.). +This implies that \cr +1. Split dates must be strictly sequential: date1<=date2<=date3<=etc. \cr +2. NA date cannot be followed by a non-NA date (e.g. for the j-th the sequence +of dates "date1,date2, NA" is allowed and it means that event3 has not been +experienced, whereas "NA, date1, date2" is not allowed because we do not have +any information about the first change ). \cr +The number of rows in the split argument must be the same as the number of rows +in the epdata argument. +} +\value{ +In the resulting dataframe each row is a subepisode obtained through the splitting +procedure. The columns will be the same as the epdata argument plus one time-varying +factor variable with levels specified through tvar.lev argument. \cr +In order to create several time-varying variables on the same dataframe, splitter +procedure should be applied repeatedly for each new time-varying variable. At any +step the input dataframe will be the resulting dataframe in the previous one. +} +\references{Impicciatore R. and Billari F.C., (2010), MAPLES: a general method for the estimation +of age profiles from standard demographic surveys (with an application to fertility), +DEAS WP, \cr +http://ideas.repec.org/p/mil/wpdepa/2010-40.html} + +\author{Roberto Impicciatore +\email{roberto.impicciatore@unimi.it} +} +\seealso{ +\code{\link{epdata}}, +\code{\link{ageprofile}}, +\code{\link{plotap}} +} + +\examples{ +# creates an episode-data structure relating to the transition +# childless-->first child +ep1<-with(demogr,epdata(start=dbirth, event=dch1, rcensor=dint, + birth=dbirth,id=id, + addvar=subset(demogr,select=c(-id,-dbirth)))) + +# creates a new episode-data structure with a time-varying factor +# variable relating to the status "never married" (not_marr) or +# "ever married" (marr) +ep2<-splitter(ep1,split=ep1$d1marr,tvar.lev=c("not_marr","marr"), + tvar.name="mar") +} +\keyword{datagen} +\keyword{episode|data} +\keyword{event|history|analysis} +\keyword{survival} +\keyword{episode|splitting} diff --git a/man/tabm.Rd b/man/tabm.Rd new file mode 100755 index 0000000..b7bcb32 --- /dev/null +++ b/man/tabm.Rd @@ -0,0 +1,31 @@ +\name{tabm} +\alias{tabm} +\title{Print regression estimates for previously fitted linear and logit +regression models.} +\description{ +For linear regression models, it prints some basic information about the model +estimates ( Beta, pvalues, AIC, adjusted R square); for logit models, it also prints +exp(Beta).} +\usage{tabm(mod, pvalues = TRUE, digits = 3)} +\arguments{ + \item{mod}{is an oject of class 'lm' or 'glm'.} + \item{pvalues}{if TRUE pvalues will be printed out.} + \item{digits}{specifies the number of digits in the output.} +} +\author{Roberto Impicciatore +\email{roberto.impicciatore@unimi.it} +} + +\seealso{ +\code{\link{summary.lm}}, +\code{\link{summary.glm}}, +} + +\examples{ +chi<-ifelse(!is.na(demogr$dch1),1,0) +logit.mod<- glm(chi ~ demogr$sex + demogr$edu, + family=binomial(link = "logit")) +tabm(logit.mod) +# for more details +summary(logit.mod) +} diff --git a/man/tabx.Rd b/man/tabx.Rd new file mode 100755 index 0000000..3f4360a --- /dev/null +++ b/man/tabx.Rd @@ -0,0 +1,28 @@ +\name{tabx} +\alias{tabx} +\title{Prints univariate and bivariate frequency table.} +\description{Prints univariate or a bivariate frequency distribution table including marginal +distribution and total number of cases.} +\usage{tabx(x, y, prow = FALSE, pcol = FALSE, chisq=FALSE )} +\arguments{ + \item{x}{a vector which can be interpred as factor.} + \item{y}{an optional second vector which can be interpreted as factor (columns + in the contingency table).} + \item{prow}{if TRUE it adds a bivariate table containing row percentages.} + \item{pcol}{if TRUE it adds a bivariate table containing column percentages.} + \item{chisq}{gives the results for a Chi square test for independence of all factors.} +} +\author{Roberto Impicciatore +\email{roberto.impicciatore@unimi.it} +} + +\seealso{ +\code{\link{table}}, +\code{\link{summary.table}}, +} +\examples{ +tabx(demogr$sex,demogr$edu,chisq=TRUE) +tabx(demogr$sex,demogr$edu,prow=TRUE,pcol=TRUE) +} +\keyword{univar} +\keyword{frquencies}