Skip to content

Commit

Permalink
version 0.1.1
Browse files Browse the repository at this point in the history
  • Loading branch information
Wangjian Zhang authored and cran-robot committed Nov 22, 2023
1 parent 4725209 commit 1c7d66a
Show file tree
Hide file tree
Showing 18 changed files with 115 additions and 58 deletions.
31 changes: 23 additions & 8 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,15 +1,30 @@
Package: rSPARCS
Type: Package
Title: Sites, Population, and Records Cleaning Skills
Version: 0.1.0
Author: Wangjian Zhang, Bo Ye, Zhicheng Du, Ziqiang Lin, Jijin Yao, Yanan Jin, Wayne R. Lawrence
Maintainer: Wangjian Zhang <wzhang27@albany.edu>
Description: Data cleaning including 1) generating datasets for time-series and case-crossover analyses based on raw hospital records, 2)linking individuals to a areal map, 3)picking out cases living within a certain distance from a site, etc. For more information, please refer to Zhang W,etc. (2018) <doi: 10.1016/j.envpol.2018.08.030>.
Version: 0.1.1
Authors@R: c(
person("Wangjian", "Zhang", email="zhangwj227@mail.sysu.edu.cn",role = c("aut","cre")),
person("Zhicheng", "Du", email = "duzhch5@mail.sysu.edu.cn", role = c("aut")),
person('Xinlei', 'Deng', email = 'xinlei.deng@nih.gov', role = c('aut')),
person('Ziqiang', 'Lin', email = 'linziqiang0314@jnu.edu.cn', role = c('aut')),
person('Bo', 'Ye', email = 'yebowithyou2@gmail.com', role = c('aut')),
person('Jijin', 'Yao', email = 'yaojj23@mail.sysu.edu.cn', role = c('aut')),
person('Yanan', 'Jin', email = 'jinyn7@mail.sysu.edu.cn', role = c('aut')),
person('Wayne', 'Lawrence', email = 'wayne.lawrence@nih.gov', role = c('aut')))
Maintainer: Wangjian Zhang <zhangwj227@mail.sysu.edu.cn>
Description: Data cleaning including 1) generating datasets for time-series and case-crossover analyses based on raw hospital records, 2) linking individuals to an areal map, 3) picking out cases living within a buffer of certain size surrounding a site, etc. For more information, please refer to Zhang W,etc. (2018) <doi:10.1016/j.envpol.2018.08.030>.
License: GPL-3
Encoding: UTF-8
LazyData: true
Imports: data.table,spatialEco,geosphere,tigris,raster,sp,plyr
Imports: data.table,sf,geosphere,tigris,raster,sp,plyr,dplyr,methods
NeedsCompilation: no
Packaged: 2020-11-15 21:06:41 UTC; LinLabAdmin
Packaged: 2023-11-21 02:14:21 UTC; wzhan
Author: Wangjian Zhang [aut, cre],
Zhicheng Du [aut],
Xinlei Deng [aut],
Ziqiang Lin [aut],
Bo Ye [aut],
Jijin Yao [aut],
Yanan Jin [aut],
Wayne Lawrence [aut]
Repository: CRAN
Date/Publication: 2020-11-15 21:30:02 UTC
Date/Publication: 2023-11-21 08:20:02 UTC
32 changes: 17 additions & 15 deletions MD5
Original file line number Diff line number Diff line change
@@ -1,20 +1,22 @@
fdd1ac18c91ae46bcabff7c0be80e97d *DESCRIPTION
7fab5dacbcfdafd1c826ac1e1602cfd1 *NAMESPACE
919de63a75e34619c560b2afeacd6006 *DESCRIPTION
eab49ab3a8e010cb8a63859aa8227d06 *NAMESPACE
94ff5404d00104143f040a252df33a0d *R/CXover.data.R
ec506cc6ec38b021541e24ea28c2d779 *R/DBFgeocode.R
84892ff6b39bb87f1dca8712341e589d *R/FIPS.name.R
ab216426bc16c71407a1ab610681ce5a *R/case.series.R
90f75890fc9bf8a732db1cedc5f1b978 *R/desc.comp.R
0985ec7f024f15c0f8713ca884e059c6 *R/DBFgeocode.R
194acf0cb5c4a46491da896efb0962bd *R/FIPS.name.R
62779f4409aa6d24fbe743c673e1fcd4 *R/case.series.R
1b1c7ed31a388220dbef65be80c00909 *R/desc.comp.R
94753389fe82ef1c40c73e23ae5ac8ba *R/dupl.readm.R
52dedc7741863639b316d85b4a3a5dd3 *R/exposure_lag.R
57ad157b748a96f666592f99f952a0b1 *R/mediationking.R
ebfee48932401739b07fb0bc2e862341 *R/pick.cases.R
9c36851e2d19505224c38383b214d726 *R/raster_extract.R
b7a1c8ea040bb6c756d47fea6f3dc53d *man/CXover.data.Rd
ce7d624a49ae173b3ce7f29e1e8e1960 *R/raster_extract.R
be46ce83d6da518244b1bb790fbe09e3 *man/CXover.data.Rd
6e2eabbd39cd03cc539e3c9e5d92d069 *man/DBFgeocode.Rd
f959d33d6b112cd0ea7eee7960d59da1 *man/FIPS.name.Rd
01f074bdab28ca2eb634ed0f5a6008c6 *man/case.series.Rd
d55ad0b449e3eb41daa18d5e6ef3116e *man/desc.comp.Rd
4009cbd66cbf6593cc938c954f2c8c32 *man/dupl.readm.Rd
84f439996a3b6cd9e09a8d17189ab3c2 *man/mediationking.Rd
5dc0e3ba22311779a46f89d380e748e1 *man/pick.cases.Rd
f94537c9268a551cc054a61d5d3b9144 *man/raster_extract.Rd
8feac6ad96334d8f67890a2aa0928c8e *man/FIPS.name.Rd
12736c16b2ea0b7c6ce2611df4cf1200 *man/case.series.Rd
fa868574eba5c9faf2bd7b4e507bf281 *man/desc.comp.Rd
5bd65175178a8066cbf67d70ae9b1da7 *man/dupl.readm.Rd
3a67243c642d7ef5468b242d78fae94f *man/exposure_lag.Rd
a9f2f92b9b476ce74971f16824f6aecd *man/mediationking.Rd
43885c3c463d2e98b4973e1da02c2f50 *man/pick.cases.Rd
b30c80b7ef4abeb7cde23abeddeeee32 *man/raster_extract.Rd
6 changes: 4 additions & 2 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
exportPattern("^[[:alpha:]]+")
importFrom("data.table","as.data.table")
importFrom("data.table","as.data.table","setorderv")
importFrom("geosphere","distGeo")
importFrom("tigris","tracts")
importFrom("raster","crs","extent","mask","extract", "crop","projection")
importFrom("sp","coordinates","CRS","spTransform")
importFrom("spatialEco","point.in.poly")
importFrom("sf","st_as_sf","st_join")
importFrom("stats", "IQR", "aov","glm","as.formula","binomial","lm","quantile" ,"chisq.test", "kruskal.test","median", "sd", "shapiro.test", "t.test", "wilcox.test")
importFrom("plyr", "rbind.fill")
importFrom("dplyr", "arrange")
importFrom("methods", "as")
2 changes: 1 addition & 1 deletion R/DBFgeocode.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,6 @@ DBFgeocode <- function(data,cityname,roadaddress,mailbox=NULL,ZIP){
cityzip=paste(data[,cityname],data[,ZIP])
data$singleline=paste(addr12,", ",cityzip,sep="")
return(data)
print("You may output the data as the dbf file using write.dbf ()")
message("You may output the data as the dbf file using write.dbf ()")
}

7 changes: 6 additions & 1 deletion R/FIPS.name.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,12 @@ FIPS.name<-function(data,ID.case,long.case,lat.case,map=NULL,state.map,level.map
if(is.null(map)&level.map=="tract") NYSmap=tigris::tracts(state=state.map) else NYSmap=map
}
raster::crs(data1)=raster::crs(NYSmap)
data1=spatialEco::point.in.poly(data1,NYSmap)
data1=sf::st_as_sf(data1)
NYSmap=sf::st_as_sf(NYSmap)
if (dim(data1)[2] == 1) data1$pt.ids <- 1:nrow(data1)
if (dim(NYSmap)[2] == 1) NYSmap$poly.ids <- 1:nrow(NYSmap)
data1=sf::st_join(data1, NYSmap,largest = TRUE)
data1=methods::as(data1, "Spatial")
data1=as.data.frame(data1)
data1=cbind(data1[,1:(a-2)],data1[,areaID])
names(data1)[ncol(data1)]="areaID"
Expand Down
4 changes: 2 additions & 2 deletions R/case.series.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ case.series <-function(data,ICD=NULL,diagnosis,date="ADMDT",start="2001/1/1",end
a=paste(unique(data[,by[k]]))
b=paste("Level for", by[k],":",a[1])
for(t in 2:length(a)) b=paste(b,a[t])
print(b)
message(b)
data$var.by=data[,by[k]]
names(data)[ncol(data)]=paste("var",k,sep="")
}
Expand Down Expand Up @@ -66,7 +66,7 @@ case.series <-function(data,ICD=NULL,diagnosis,date="ADMDT",start="2001/1/1",end
Output1=NULL
Output2=med
}
print(Output1)
message(Output1)
return(Output2)
}

Expand Down
10 changes: 7 additions & 3 deletions R/desc.comp.R
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ desc.comp <-function(data,variables,by=NULL,margin=2,avg.num="mean",test.num="me
test=NULL
try({test=tapply(data_med[,1],data_med[,2],shapiro.test)},silent=T)
shapiro=rep(0.5,length(level))
if(length(test)==0) print("Warning: The sample size for some groups may be too small") else {
if(length(test)==0) warning("The sample size for some groups may be too small") else {
for(k in 1:length(test)) shapiro[k]=test[[k]]$p.value
}
if(max(shapiro)>=0.05) result[1,1:length(level)]=paste(mean.in," (",sd.in,")",sep="") else result[1,1:length(level)]=paste(median.in," (",IQR.in,")",sep="")
Expand Down Expand Up @@ -55,6 +55,7 @@ desc.comp <-function(data,variables,by=NULL,margin=2,avg.num="mean",test.num="me
rownames(result)=names(data0)[i]
colnames(result)=c(level,"statistic","P")
} else {
if(!is.factor(data_med[,1])) data_med[,1]=as.factor(data_med[,1])
result=matrix(rep(NA,(length(levels(data_med[,1]))+1)*(length(level)+2)),nrow=length(levels(data_med[,1]))+1)
media=table(data_med[,1],data_med$group)
media1=round(prop.table(media,margin)*100,2)
Expand All @@ -66,10 +67,8 @@ desc.comp <-function(data,variables,by=NULL,margin=2,avg.num="mean",test.num="me
}
result[1,]=""
result[,(ncol(result)-1):ncol(result)]=""
set.seed(4715)
if(length(level)>1){
result[2,ncol(result)-1]=paste("Chisq =",sprintf("%.2f",chisq.test(as.matrix(media),simulate.p.value=T)$statistic))
set.seed(4715)
test=chisq.test(as.matrix(media),simulate.p.value=T)$p.value
test=ifelse(test<0.001,"P<0.001",sprintf("%.3f",test))
result[2,ncol(result)]=test
Expand All @@ -80,5 +79,10 @@ desc.comp <-function(data,variables,by=NULL,margin=2,avg.num="mean",test.num="me
for(k in 1:ncol(result)) result[,k]=as.character(result[,k])
Output=rbind(Output,result)
}
test=dimnames(Output)[[1]]
Output=as.data.frame(Output)
Output$name=test
Output=Output[,c(ncol(Output),1:(ncol(Output)-1))]
if(length(by)==0){Output=Output[,1:2];names(Output)[2]="Overall"}
return(Output)
}
14 changes: 14 additions & 0 deletions R/exposure_lag.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
exposure_lag<-function(data,var,maxlag,ID,Date,lag_suffix=c('_cu_lag','_si_lag')){
data=as.data.table(data)
data=data.table::setorderv(data,c(ID,Date),c(1,1))

for (i in var) {
for (lag in 1:maxlag) {
data[,paste0(i,lag_suffix[2],lag)]=data[,stats::filter(get(i),c(rep(0,lag),1), sides=1),by=ID][,2]# single lag

data[,paste0(i,lag_suffix[1],lag)]=data[,stats::filter(get(i),rep(1/(lag+1),(lag+1)), sides=1),by=ID][,2]# cumulative lag
}
}
return(data)
}

2 changes: 1 addition & 1 deletion R/raster_extract.R
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
raster_extract=function(rastermap,refmap,ID.var="FIPS",ID.code="ALL",cutpoint=NULL){
message("Wait! It may take some time to uniform the projection systems of the rastermap and refmap")
refmap=spTransform(refmap,CRS(projection(rastermap)))
refmap=sp::spTransform(refmap,CRS(projection(rastermap)))
areasnum=ifelse(ID.code=="ALL",length(refmap),length(ID.code))
refmap1=as.data.frame(refmap)
if(ID.code=="ALL") areasname=refmap1[,ID.var] else areasname=ID.code
Expand Down
3 changes: 0 additions & 3 deletions man/CXover.data.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,6 @@
\references{
Zhang W, Lin S, Hopke PK, et al. Triggering of cardiovascular hospital admissions by fine particle concentrations in New York state: Before, during, and after implementation of multiple environmental policies and a recession. Environ. Pollut. 2018;242:14041416.
}
\author{
Wang-Jian Zhang (wzhang27@albany.edu)
}
\examples{
# similated data
set.seed(2018)
Expand Down
8 changes: 3 additions & 5 deletions man/FIPS.name.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,10 @@
\value{
\item{areaID}{The area unique ID such as FIPS code and ZIP code will be added to the original data.}
}
\author{
Wang-Jian Zhang (wzhang27@albany.edu)
}
\examples{
set.seed(2018)
dataset=data.frame(Patient=1:2,lat=rnorm(2,42,0.5),long=rnorm(2,-76,1))
#data.out=FIPS.name(data=dataset,ID.case="Patient",long.case="long",
#lat.case="lat",state.map="36",level.map="tract",areaID="GEOID")
\donttest{data.out=FIPS.name(data=dataset,ID.case="Patient",long.case="long",
lat.case="lat",state.map="36",level.map="tract",areaID="GEOID")
}
}
1 change: 0 additions & 1 deletion man/case.series.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ case.series(data,ICD,diagnosis,date,start,end,by1,by2,by3,by4,by5)
When applied to other medical data without ICD code, users may arbitrarily set a ICD code, meanwhile, define the diagnosis variable in the data to the same ICD code.
}
\examples{
# similated data
set.seed(2018)

data=data.frame(
Expand Down
3 changes: 0 additions & 3 deletions man/desc.comp.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,6 @@ desc.comp(data,variables,by,margin,avg.num,test.num)
\value{
A comprehensive descriptive table with statistics and P value for intergroup comparisons.
}
\author{
Wang-Jian Zhang (wzhang27@albany.edu)
}
\examples{
desc.comp(CO2,variables=2:5,by=1,margin=1)
}
3 changes: 0 additions & 3 deletions man/dupl.readm.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,6 @@
\item{Period}{the time period between the current visit and the previous one for a patient; 0 for the 1st visit; and NA for those with only one record.}
\item{Nadmission}{indicating the times of admission, e.g. 1st, 2nd admission; a patient may have more than one 1st admissions if some periods between two visits are greater than e.g. 365 days.}
}
\author{
Wang-Jian Zhang (wzhang27@albany.edu)
}
\examples{
dataset=data.frame(
ID=c(1,3,4,2,4,6,3,5,7,1),
Expand Down
35 changes: 35 additions & 0 deletions man/exposure_lag.Rd
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
\name{exposure_lag}
\alias{exposure_lag}
\title{
Calculate Individual and Cumulative Lags for Exposure
}
\description{
Calculate individual and cumulative lag exposure for specific variables. Cumulative lag exposure was calculated by using moving average.
}
\usage{
exposure_lag(data,var,maxlag,ID,Date,lag_suffix)
}
\arguments{
\item{data}{A dataframe.}
\item{var}{Variable names in the dataframe to specify variables to be used for the lag calculation.}
\item{maxlag}{A number. The max day for calculating the lag exposure.}
\item{ID}{A variable name. The exposure station ID.}
\item{Date}{A variable name. A variable indicating the date of exposure measurement.}
\item{lag_suffix}{A two-length vector indicating the cumulative lag or the individual lag. The first was the suffix for cumulative lag exposure. The second was for individual lag exposure. Default: c('_cu_lag','_si_lag')}
}

\value{
It returns a dataframe with calculated individual and cumulative lag exposures. 'var_cu_lag5' means the moving average from lag 0 to lag 5 days. 'var_si_lag5' means the exposure 5 days ago.
}
\references{
Deng X, Friedman S, Ryan I, et al. The independent and synergistic impacts of power outages and floods on hospital admissions for multiple diseases [published online ahead of print, 2022 Mar 5]. Sci Total Environ. 2022;828:154305. doi:10.1016/j.scitotenv.2022.154305
}
\examples{
data=data.frame(
ID=rep(1:5,each=5),
Date=seq(as.Date('2022-01-01'),as.Date('2022-01-05'),by='1 day'),
x=rnorm(25)
)

exposure_lag(data,var='x',maxlag=3,ID='ID',Date='Date')
}
6 changes: 2 additions & 4 deletions man/mediationking.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -17,15 +17,13 @@
\item{Direct effect}{The effect of the exposure on the outcome variable that is caused by factors other than the mediator.}
\item{Meditation.proportion}{The proportion of the mediation effect.}

}
\author{
Bo Ye (bye2@albany.edu)
}
\examples{
set.seed(1)
exposure<-rnorm(20,0,1)
mediator<-rnorm(20,10,1)
outcome<-rnorm(20,10,1)
dataset<-data.frame(outcome,mediator,exposure)
#mediationking(dataset,"outcome","mediator","exposure")
\donttest{mediationking(dataset,"outcome","mediator","exposure")
}
}
3 changes: 0 additions & 3 deletions man/pick.cases.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,6 @@
\references{
Zhang W, Lin S, Hopke PK, et al. Triggering of cardiovascular hospital admissions by fine particle concentrations in New York state: Before, during, and after implementation of multiple environmental policies and a recession. Environ. Pollut. [electronic article]. 2018;242:14041416.
}
\author{
Wang-Jian Zhang (wzhang27@albany.edu)
}
\examples{
set.seed(2018)
data=data.frame(Patient=1:100,lat=rnorm(100,41,0.5),long=rnorm(100,-76,1))
Expand Down
3 changes: 0 additions & 3 deletions man/raster_extract.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,6 @@
\item{ID.code}{the column indicating the unique ID for each area, followed by the number of cells for each category/colour within that area.}
\item{Total cells}{the total number of cells within each area.}
}
\author{
Wang-Jian Zhang (wzhang27@albany.edu)
}
\examples{
library(raster)
set.seed(4715)
Expand Down

0 comments on commit 1c7d66a

Please sign in to comment.