imports per line, vignette, readme

frictionlessdata · Jan 28, 2018 · abccbf0 · abccbf0
1 parent 2221df0
commit abccbf0
Show file tree

Hide file tree

Showing 8 changed files with 150 additions and 21 deletions.
diff --git a/.gitignore b/.gitignore
@@ -4,31 +4,24 @@
 
 # Session Data files
 .RData
-
 # Example code in package build process
 *-Ex.R
-
 # Output files from R CMD build
 /*.tar.gz
-
 # Output files from R CMD check
 /*.Rcheck/
-
 # RStudio files
 .Rproj.user/
-
 # produced vignettes
 vignettes/*.html
 vignettes/*.pdf
-
 # OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3
 .httr-oauth
-
 # knitr and R markdown default cache directories
 /*_cache/
 /cache/
-
 # Temporary files created by R markdown
 *.utf8.md
 *.knit.md
-.Rproj.user
+.Rproj.user
+inst/doc
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -10,7 +10,7 @@ Description: A library for working with Data Package (<http://frictionlessdata.i
 License: MIT + file LICENSE
 Encoding: UTF-8
 LazyData: true
-Imports: 
+Imports:
     config, 
     devtools, 
     future, 
@@ -29,7 +29,7 @@ Imports:
     urltools, 
     utils, 
     V8
-Remotes: 
+Remotes:
     okgreece/tableschema-r
 Suggests:
     covr,
@@ -38,8 +38,10 @@ Suggests:
     httptest,
     testthat,
     webmockr,
-    mockery
-Collate: 
+    mockery,
+    knitr,
+    rmarkdown
+Collate:
     'DataPackageError.R'
     'Package.R'
     'helpers.R'
@@ -52,3 +54,4 @@ Collate:
     'resource.R'
     'validate.R'
 RoxygenNote: 6.0.1
+VignetteBuilder: knitr
diff --git a/R/Package.R b/R/Package.R
@@ -98,7 +98,7 @@ Package <- R6::R6Class(
 
       # Profile
 
-      if (private$nextDescriptor_$profile == config::get("DEFAULT_DATA_PACKAGE_PROFILE")) {
+      if (isTRUE(private$nextDescriptor_$profile == config::get("DEFAULT_DATA_PACKAGE_PROFILE"))) {
         if (length(private$resources_) >= 1 && rlist::list.all(private$resources_, r ~ isTRUE(r$tabular))) {
 
           private$currentDescriptor_$profile = 'tabular-data-package'

diff --git a/R/helpers.R b/R/helpers.R
@@ -487,7 +487,12 @@ findFiles = function(pattern, path = getwd()) {
                               files,
                               fixed = FALSE,
                               ignore.case = FALSE)]
-  matched_files = matched_files[1]
+
+  matched_files = matched_files[grepl(stringr::str_c(".","csv"), 
+                                      matched_files, 
+                                      fixed = TRUE, 
+                                      ignore.case = FALSE)]
+
   return(matched_files)
 }
 

diff --git a/R/profile.R b/R/profile.R
@@ -26,8 +26,8 @@ Profile <- R6::R6Class(
 
       if (is.character(private$profile_)) {
 
-        private$profile_ = stringr::str_interp("inst/profiles/${private$profile_}.json")
-        # private$profile_ = system.file(stringr::str_interp("profiles/${private$profile_}.json"), package = "datapackage.r")
+        private$profile_ =system.file(stringr::str_interp("profiles/${private$profile_}.json"), package = "datapackage.r")
+        # private$profile_ =  stringr::str_interp("inst/profiles/${private$profile_}\.json")
 
         if(private$profile_ =="" | is.null(private$profile_)) {
 

diff --git a/README.Rmd b/README.Rmd
@@ -55,9 +55,9 @@ To install [RStudio][Rstudio], you can download [RStudio Desktop][Rstudiodown] w
 4. Select the appropriate file for your system
 5. Run installation file
 
-To install the `datapackage` library it is necessary to install first `devtools` library to make installation of github libraries available.
+To install the `datapackage` library it is necessary to install first [`devtools` library](https://cran.r-project.org/package=devtools) to make installation of github libraries available.
 
-```{r, eval=FALSE, include=T}
+```{r, eval=FALSE, include=TRUE}
 # Install devtools package if not already
 install.packages("devtools")
 ```
@@ -67,7 +67,7 @@ Install `datapackage.r`
 
 ```{r, eval=FALSE, include=T}
 # And then install the development version from github
-devtools::install_github("okgreece/datapackage.r")
+devtools::install_github("frictionlessdata/datapackage.r")
 ```
 
 ## Load library
@@ -147,7 +147,7 @@ dataPackage = Package.load()
 Now we're ready to infer a data package descriptor based on data files we have. Because we have two csv files we use glob pattern `*.csv`:
 
 ```{r eval=FALSE, include=TRUE}
-dataPackage$infer('**.csv')
+dataPackage$infer('csv')
 dataPackage$descriptor
 ```
 

diff --git a/vignettes/using_data_packages_in_r.R b/vignettes/using_data_packages_in_r.R
@@ -0,0 +1,41 @@
+## ---- eval=FALSE, include=TRUE-------------------------------------------
+#  # Install devtools package if not already
+#  install.packages("devtools")
+
+## ---- eval=FALSE, include=TRUE-------------------------------------------
+#  devtools::install_github("frictionlessdata/datapackage.r")
+
+## ---- eval=FALSE, include=TRUE-------------------------------------------
+#  library(datapackage.r)
+
+## ------------------------------------------------------------------------
+# dataPackage = Package.load()
+# dataPackage$descriptor['name'] = 'period-table'
+# dataPackage$descriptor['title'] = 'Periodic Table'
+
+## ------------------------------------------------------------------------
+# import io
+# import csv
+# from jsontableschema import infer
+# 
+# filepath = './data.csv'
+# 
+# with io.open(filepath) as stream:
+#     headers = stream.readline().rstrip('\n').split(',')
+#     values = csv.reader(stream)
+#     schema = infer(headers, values)
+#     dp.descriptor['resources'] = [
+#         {
+#             'name': 'data',
+#             'path': filepath,
+#             'schema': schema
+#         }
+#     ]
+
+## ------------------------------------------------------------------------
+# with open('datapackage.json', 'w') as f:
+#   f.write(dp.to_json())
+
+## ------------------------------------------------------------------------
+# datapackage
+
diff --git a/vignettes/using_data_packages_in_r.Rmd b/vignettes/using_data_packages_in_r.Rmd
@@ -0,0 +1,87 @@
+---
+title: "Using Data Packages in R"
+author: "Kleanthis Koupidis"
+date: "`r Sys.Date()`"
+output: rmarkdown::html_vignette
+vignette: >
+  %\VignetteIndexEntry{Vignette Title}
+  %\VignetteEngine{knitr::rmarkdown}
+  %\VignetteEncoding{UTF-8}
+---
+
+This tutorial will show you how to install the R library for working with Data Packages and Table Schema, load a CSV file, infer its schema, and write a Tabular Data Package.
+
+
+#Setup
+
+For this tutorial, we will need the Data Package R library ([datapackage.r](https://github.com/frictionlessdata/datapackage-r)).
+
+To install the datapackage library it is necessary to install first [devtools library](https://cran.r-project.org/package=devtools) to make installation of github libraries available.
+
+```{r, eval=FALSE, include=TRUE}
+# Install devtools package if not already
+install.packages("devtools")
+```
+
+And then install the development version of [datapackage.r](https://github.com/frictionlessdata/datapackage-r) from github
+
+```{r, eval=FALSE, include=TRUE}
+devtools::install_github("frictionlessdata/datapackage.r")
+```
+
+#Load
+
+```{r, eval=TRUE, include=TRUE}
+library(datapackage.r)
+```
+
+You can start using the library by importing `datapackage`. You can add useful metadata by adding keys to metadata dict attribute. Below, we are adding the required `name` key as well as a human-readable `title` key. For the keys supported, please consult the full [Data Package spec](https://frictionlessdata.io/specs/data-package/#metadata). Note, we will be creating the required `resources` key further down below.
+
+```{r}
+dataPackage = Package.load()
+dataPackage$descriptor['name'] = 'period-table'
+dataPackage$descriptor['title'] = 'Periodic Table'
+```
+
+#Infer a CSV Schema
+Let's say we have a file called data.csv ([download](https://github.com/frictionlessdata/example-data-packages/blob/master/periodic-table/data.csv)) in our working directory that looks like this:
+
+We can guess at our CSV's [schema](https://frictionlessdata.io/guides/table-schema/) by using `infer` from the Table Schema library. We open the path as a stream, separating the headers from the rest of the file. We then pass the headers and values to infer. The result of which is an inferred schema. For example, if the processor detects only integers in a given column, it will assign `integer` as a column type.
+
+Once we have a schema, we are now ready to add a `resource` key to the Data Package which points to the resource path and its newly created schema.
+
+```{r}
+# import io
+# import csv
+# from jsontableschema import infer
+# # 
+# filepath = 'inst/data/data.csv'
+# # 
+# # with io.open(filepath) as stream:
+#     headers = read.csv(filepath,sep = ",")
+#     values = read.csv(filepath,sep = ",")
+# #     schema = infer(headers, values)
+#     dp.descriptor['resources'] = [
+#         {
+#             'name': 'data',
+#             'path': filepath,
+#             'schema': schema
+#         }
+#     ]
+```
+
+Now we are ready to write our `datapackage.json` file.
+
+```{r}
+# with open('datapackage.json', 'w') as f:
+#   f.write(dp.to_json())
+```
+
+The `datapackage.json` ([download](https://github.com/frictionlessdata/example-data-packages/blob/master/periodic-table/datapackage.json)) is inlined below. Note that atomic number has been correctly inferred as an `integer` and atomic mass as a `number` (float) while every other column is a `string`.
+```{r}
+# datapackage
+```
+
+#Publishing
+
+Now that you have created your Data Package, you might want to [publish your data online](https://frictionlessdata.io/guides/publish-online/) so that you can share it with others.