version 0.0.2

cran · Dec 11, 2018 · d3b839d · d3b839d
1 parent 835059e
commit d3b839d
Show file tree

Hide file tree

Showing 10 changed files with 36 additions and 33 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,16 +1,15 @@
 Package: rSPARCS
 Type: Package
-Title: Data Management for the SPARCS
-Version: 0.0.1
-Author: Wangjian Zhang, Zhicheng Du, Ziqiang Lin, David Q. Rich, Sally W. Thurston, Jijin Yao, Xiaobo Xue, Shao Lin, Yuantao Hao
+Title: Statistical Package for Analysis Related Cleaning Support
+Version: 0.0.2
+Author: Wangjian Zhang, Zhicheng Du, Ziqiang Lin, Jijin Yao, Yanan Jin, Wayne R. Lawrence, Yuantao Hao
 Maintainer: Wangjian Zhang <wzhang27@albany.edu>
-Description: To clean and analyze the data from the Statewide Planning and Research Cooperative System (SPARCS), and generate sets for statistical modeling.
-    Additionally, other data with similar format or study objectives can also be handled.
+Description: To clean and analyze hospital data, and generate sets for statistical modeling.
 License: GPL-3
 Encoding: UTF-8
 LazyData: true
 Imports: foreign,data.table,spatialEco,geosphere,tigris,raster,sp
 NeedsCompilation: no
-Packaged: 2018-12-02 17:48:38 UTC; zwjed
+Packaged: 2018-12-10 16:36:01 UTC; zwjed
 Repository: CRAN
-Date/Publication: 2018-12-09 15:10:09 UTC
+Date/Publication: 2018-12-11 08:50:03 UTC
diff --git a/MD5 b/MD5
@@ -1,16 +1,16 @@
-7eafabac3fade13122371ec8b28b244f *DESCRIPTION
+16776f82c445ddc514395a79ba6303c9 *DESCRIPTION
 8927c120eaece26e6d1cbe26de42eccb *NAMESPACE
 4db45197f2a5182d3b17a4b132879f5d *R/CXover.data.R
 8b16ea8364b938775656e504a8db1e3e *R/DBFgeocode.R
-1ad44bd688ec549851bd812303fb9366 *R/FIPS.name.R
+284056b5ab71b75ddd7420f09e6d91cf *R/FIPS.name.R
 ab216426bc16c71407a1ab610681ce5a *R/case.series.R
 bb1f81674e0697a39ec3dfa46083eb0a *R/desc.comp.R
 0ad93862fdb1744d2a96ee6e2cae2e85 *R/dupl.readm.R
 ebfee48932401739b07fb0bc2e862341 *R/pick.cases.R
-e5d9c776948025220385c40bddb03c56 *man/CXover.data.Rd
-2cbf6cbf0c16bc815d983275663ee5e3 *man/DBFgeocode.Rd
-a857a5e6e8a76c31de766535ec109960 *man/FIPS.name.Rd
-88701ad21e32eedd5af25b34b54a838f *man/case.series.Rd
-e78f4667dbd49360c0e78cf6f85f7634 *man/desc.comp.Rd
-f0d1129f746468134bb353c55f8d74ef *man/dupl.readm.Rd
-6fc07a7a448802a5e1a81d7e8d75f887 *man/pick.cases.Rd
+b44de1dbcae35d27e6d2b507737cd2c7 *man/CXover.data.Rd
+63fd428d52e7e506d33de563c670438e *man/DBFgeocode.Rd
+cbd5632e948e8f24ff31d54b298d7abf *man/FIPS.name.Rd
+01f074bdab28ca2eb634ed0f5a6008c6 *man/case.series.Rd
+d55ad0b449e3eb41daa18d5e6ef3116e *man/desc.comp.Rd
+4009cbd66cbf6593cc938c954f2c8c32 *man/dupl.readm.Rd
+5dc0e3ba22311779a46f89d380e748e1 *man/pick.cases.Rd
diff --git a/R/FIPS.name.R b/R/FIPS.name.R
@@ -1,4 +1,4 @@
-FIPS.name<-function(data,patco,level="county",add=c("name","FIPS"),state="36",county=NULL,map=NULL,long.case,lat.case,censusFIPS="GEOID"){
+FIPS.name<-function(data,patco,level="county",add=c("name","FIPS"),addfrom=NULL,state="36",county=NULL,map=NULL,long.case,lat.case,censusFIPS="GEOID"){
   COUNTY=data.frame(COUNTY=c("Albany","Allegany","Bronx","Broome",
                              "Cattaraugus","Cayuga","Chautauqua","Chemung","Chenango","Clinton","Columbia","Cortland","Delaware",
                              "Dutchess","Erie","Essex","Franklin","Fulton","Genesee","Greene","Hamilton","Herkimer","Jefferson",
@@ -10,11 +10,12 @@ FIPS.name<-function(data,patco,level="county",add=c("name","FIPS"),state="36",co
                            36031,36033,36035,36037,36039,36041,36043,36045,36047,36049,36051,36053,36055,36057,36059,36061,
                            36063,36065,36067,36069,36071,36073,36075,36077,36079,36081,36083,36085,36087,36091,36093,36095,
                            36097,36099,36089,36101,36103,36105,6107,36109,36111,36113,36115,36117,36119,36121,36123),
-                    SPARCS=c(1,2,58,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,59,23,24,25,26,27,28,60,29,
+                    CODE=c(1,2,58,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,59,23,24,25,26,27,28,60,29,
                              30,31,32,33,34,35,36,37,61,38,62,39,41,42,43,44,45,40,46,47,48,49,50,51,52,53,54,55,56,57))
+  if(length(nrow(addfrom))==0) COUNTY=COUNTY else COUNTY=addfrom
   if(level=="county"){
-    if("FIPS"%in%add) data$FIPS=COUNTY$FIPS[match(data[,patco],COUNTY$SPARCS)]
-    if("name"%in%add) data$county=COUNTY$COUNTY[match(data[,patco],COUNTY$SPARCS)]
+    if("FIPS"%in%add) data$FIPS=COUNTY$FIPS[match(data[,patco],COUNTY$CODE)]
+    if("name"%in%add) data$county=COUNTY$COUNTY[match(data[,patco],COUNTY$CODE)]
   }
   if(level=="census"){
     name=names(data)

diff --git a/man/CXover.data.Rd b/man/CXover.data.Rd
@@ -4,12 +4,12 @@
 \description{Generate the dataset for case crossover analysis.}
 \usage{CXover.data(data,date,ID,direction)}
 \arguments{
-  \item{data}{a data.frame containing the date of each case reported to the SPARCS}
+  \item{data}{a data.frame containing the date of each case.}
   \item{date}{the name of the variable in the data indicating the date of each case reported to the database.}
   \item{ID}{the name of the variable in the data indicating case ID, if not specified, it will automatically generated starting from 1.}
   \item{direction}{"pre4"or"month4". With "pre4", each case day will be matched with same weekdays in previous 4 weeks. With "month4", each case day will be matched with same weekdays in the same month.}
 }
-\details{Not limited to the SPARCS, but also applicable to other hospital admission data.}
+\details{Not limited to hospital data, but also applicable to other surveillance data.}
 \value{
   \item{dataset}{A data.frame ready for the case crossover analysis, with following variables:}
   \item{ID}{same ID represents the same patient.}

diff --git a/man/DBFgeocode.Rd b/man/DBFgeocode.Rd
@@ -22,5 +22,5 @@ datatest=data.frame(county=c("Albany","Albany","Albany"),
                 zip=12206
                 )
 DBFgeocode(data=datatest,cityname="county",roadaddress="address1",
-mailbox="address2",ZIP="zip",output="rSPARCSdat.csv")
+mailbox="address2",ZIP="zip",output="data_output.csv")
 }
diff --git a/man/FIPS.name.Rd b/man/FIPS.name.Rd
@@ -2,23 +2,26 @@
 \alias{FIPS.name}
 \title{Add county/census tract names or FIPS code}
 \description{Identify the residential census tracts for each case, and add county/census tract names or FIPS code.}
-\usage{FIPS.name(data,patco,level,add,state,county,map,long.case,lat.case,censusFIPS)}
+\usage{FIPS.name(data,patco,level,add,addfrom,state,county,map,long.case,lat.case,censusFIPS)}
 \arguments{
   \item{data}{a data.frame containing coordinates of cases for level="census"}
-  \item{patco}{the name of variable in the SPARCS data indicating the county code.}
+  \item{patco}{the name of variable in the data indicating the county code.}
   \item{level}{"county" or "census", indicating the study level.}
   \item{add}{"name" or "FIPS", or a vector containing both, to specify what vairables to be added.}
+  \item{addfrom}{a data.frame containing "COUNTY" (county names),"FIPS" (county FIPS code) and"CODE" (county code, should match those in the health data).} 
   \item{state}{State FIPS code, e,g, "36" for the New York State.}
   \item{county}{County FIPS code, e,g, "36001" for Albany, we use "001" for Albany here.}
   \item{map}{A map for a region outside the U.S. can also be imported as a "spatialpolygonsdataframe" object.}
   \item{long.case}{the name of variable in the data indicating the longitude of cases.}
   \item{lat.case}{the name of variable in the data indicating the latitude of cases.}
   \item{censusFIPS}{the name of variable in the map indicating the FIPS for census tracts. Use the default if the study is within the U.S.}
 }
-\details{Not limited to the SPARCS, but also applicable to other research data.}
+\note{If you are working on the NY SPARCS data,no "addfrom" is required as this package has include a public information from https://www.health.ny.gov/statistics/sparcs/sysdoc/appf.htm
+}
+\details{Not limited to hospital data, but also applicable to other surveillance data.}
 \value{
   \item{FIPS}{the FIPS code at county or census tract level depending on the arguement "level".}
-  \item{county}{the name of counties for the SPARCS data.}
+  \item{county}{the name of counties.}
 }
 \author{
 Wang-Jian Zhang (wzhang27@albany.edu)

diff --git a/man/case.series.Rd b/man/case.series.Rd
@@ -13,7 +13,7 @@ case.series(data,ICD,diagnosis,date,start,end,by1,by2,by3,by4,by5)
   \item{start,end}{the start and end date for the case series to be generated.}
   \item{by1,by2,by3,by4,by5}{the name of the variable in the data used as grouping variables.}
 }
-\details{Not limited for the SPARCS, but also applicable to other hospital admission data.}
+\details{Not limited to hospital data, but also applicable to other surveillance data.}
 \value{
   \item{dataset}{A case series will be generated for time series analysis, trend analysis and displaying, with following variables:}
   \item{date}{from the start date to the end date as user specified, with 1 day bin.}

diff --git a/man/desc.comp.Rd b/man/desc.comp.Rd
@@ -13,7 +13,7 @@ desc.comp(data,variables,by,margin,avg.num,test.num)
   \item{avg.num}{"mean", describe continuous variables with mean and standard deviation; "median", describe continuous variables with median and interquantile range; otherwise, normal distribution test will be conducted, for normal distributed variables, "mean" will be used, otherwise, "median" will be used.}
   \item{test.num}{"metric", t test or anova will be used for intergroup comparison; "nonmetric", Wilcoxon rank sum test or Kruskal-Wallis test will be used; otherwise, normal distribution test will be conducted, for normal distributed variables, "metric" will be used, otherwise, "nonmetric" will be used.}
 }
-\details{Not limited to the SPARCS, but also applicable to other research data.}
+\details{Not limited to hospital data, but also applicable to other surveillance data.}
 \value{
 A comprehensive descriptive table with statistics and P value for intergroup comparisons.
 }

diff --git a/man/dupl.readm.Rd b/man/dupl.readm.Rd
@@ -1,17 +1,17 @@
 \name{dupl.readm}
 \alias{dupl.readm}
 \title{Identify Duplicates and Re-admissions}
-\description{Identify the duplicates and re-admissions in the SPARCS or other research data with subject identifications.}
+\description{Identify the duplicates and re-admissions in hospital data with subject identifications.}
 \usage{dupl.readm(data,UniqueID,date,period)}
 \arguments{
   \item{data}{a data.frame containing "UniqueID" and "date"}
   \item{UniqueID}{the name of the variable in the data indicating case ID.}
   \item{date}{the name of the variable in the data indicating the admission/onset date.}
   \item{period}{the time period used to define an re-admission; period=365 by default.}
 }
-\details{Not limited to the SPARCS, but also applicable to other research data with "UniqueID" and "date".}
+\details{Not limited to hospital data, but also applicable to other surveillance data with "UniqueID" and "date".}
 \value{
-  \item{id.dupl}{indicating whether it is a duplicated record with exactly the same "UniqueID" and "date" as a previous record. In the SPARCS data,some patients may be reported twice or even more due to insurance issues. For most studies, researchers may remove this kind of duplicates to avoid potential overcounting problems.}
+  \item{id.dupl}{indicating whether it is a duplicated record with exactly the same "UniqueID" and "date" as a previous record. In some hospital data,some patients may be reported twice or even more due to insurance issues. For most studies, researchers may remove this kind of duplicates to avoid potential overcounting problems.}
   \item{onlyone}{indicating whether this is the only record with this ID.}
   \item{Period}{the time period between the current visit and the previous one for a patient; 0 for the 1st visit; and NA for those with only one record.}
   \item{Nadmission}{indicating the times of admission, e.g. 1st, 2nd admission; a patient may have more than one 1st admissions if some periods between two visits are greater than e.g. 365 days.}

diff --git a/man/pick.cases.Rd b/man/pick.cases.Rd
@@ -11,7 +11,7 @@
   \item{lat.sites}{a numeric vector containing the latitude of sites.}
   \item{radius}{radius of the buffer, e.g."15 miles", "20 kms".}
 }
-\details{Not limited to the SPARCS, but also applicable to other research data.}
+\details{Not limited to hospital data, but also applicable to other surveillance data.}
 \value{
   \item{which.site}{the closest site to the case.}
   \item{minDIST}{the distance of the case to the closest site; in the same unit as "radius".}