From 58c76b9d2d8933485e185d275a0c1a3cea85f9c8 Mon Sep 17 00:00:00 2001 From: Nicholas Walsh Date: Thu, 24 May 2018 18:35:53 -0700 Subject: [PATCH] add R notebook (system level commands need to be tested) --- examples/R/.datmo/.config | 22 ++++++ examples/R/.datmo/database/config.json | 1 + examples/R/snapshot-create-notebook.Rmd | 101 ++++++++++++++++++++++++ 3 files changed, 124 insertions(+) create mode 100644 examples/R/.datmo/.config create mode 100644 examples/R/.datmo/database/config.json create mode 100644 examples/R/snapshot-create-notebook.Rmd diff --git a/examples/R/.datmo/.config b/examples/R/.datmo/.config new file mode 100644 index 00000000..7de53863 --- /dev/null +++ b/examples/R/.datmo/.config @@ -0,0 +1,22 @@ +{ + "controller.code.driver": { + "class_constructor": "datmo.core.controller.code.driver.git.GitCodeDriver", + "options": { + "execpath": "git", + "filepath": "/Users/nwalsh/Dev/datmo/examples/R" + } + }, + "storage.driver": { + "class_constructor": "datmo.core.storage.driver.blitzdb_dal_driver.BlitzDBDALDriver", + "options": { + "connection_string": "/Users/nwalsh/Dev/datmo/examples/R/.datmo/database", + "driver_type": "file" + } + }, + "storage.local": { + "class_constructor": "datmo.core.storage.local.dal.LocalDAL", + "options": { + "driver": "storage.driver" + } + } +} \ No newline at end of file diff --git a/examples/R/.datmo/database/config.json b/examples/R/.datmo/database/config.json new file mode 100644 index 00000000..ed6bf090 --- /dev/null +++ b/examples/R/.datmo/database/config.json @@ -0,0 +1 @@ +{"autocommit": false, "serializer_class": "json", "store_class": "transactional", "index_class": "transactional", "indexes": {"code": {"pk": {"id": "ffa1801b0c3e45e081be8f4ce3bcf101", "key": "pk"}}, "file_collection": {"pk": {"id": "6b27ceaecadf4f2eaf9b31ccddbcb09c", "key": "pk"}}, "task": {"pk": {"id": "84110dc049e8452596451b76c05833f4", "key": "pk"}}, "environment": {"pk": {"id": "9d26bd0b5f08454d9497864962b97fc2", "key": "pk"}}, "session": {"pk": {"id": "8f0ca64ddfc14abd89901e987cfdeeb0", "key": "pk"}}, "snapshot": {"pk": {"id": "0df27a73b2974aad9c0b163362adb923", "key": "pk"}}, "user": {"pk": {"id": "be646c50d4564f5cbf58fd1af17807d7", "key": "pk"}}, "model": {"pk": {"id": "45cd516f77f94921876cd86c4528b8d0", "key": "pk"}}}, "index_store_class": "basic", "version": "0.2.12"} \ No newline at end of file diff --git a/examples/R/snapshot-create-notebook.Rmd b/examples/R/snapshot-create-notebook.Rmd new file mode 100644 index 00000000..08e5b2e0 --- /dev/null +++ b/examples/R/snapshot-create-notebook.Rmd @@ -0,0 +1,101 @@ +--- +title: "Log your experiments in R with Datmo" +author: "Nick Walsh" +output: + html_document: + df_print: paged + toc: yes + rmarkdown::html_vignette: + number_sections: yes + toc: yes +--- + +Setup +===== + +First, we'll need to install a few packages for use today. They'll contain everything we'll need to model our data and create visualizations. + +```{r installPackages} +install.packages("datasets") # Package that contains the Iris dataset +install.packages("caret", dependencies = TRUE) # Model selection/tuning package +install.packages("rpart.plot") # Visualization package +``` + +We're going to install a python package called Datmo, which will enable us to log and track our experiments through the power of *snapshots*. +If you don't already have pip, you can [find it here](https://pip.pypa.io/en/stable/installing/). + +```{bash} +pip install datmo +``` + +Next, we're going to want to make sure we've set the proper working directory. We can do this easily through the +RStudio file finder on the right, or with the following command. + +This will be necessary so that Datmo knows the proper directory to perform tracking in. + +```{r "setup", include=FALSE} +require("knitr") +opts_knit$set(root.dir = "~/Dev/datmo-R-example") # Replace with whatever your root directory for the project is +``` + +Now we're going to initialize a Datmo repository. This will enable us to create snapshots for logging our experiments. + +```{r initializeDatmo} +system("datmo init", input=c("my new project","test description"), timeout=15) +``` + +Example +====== + +Ok, time to start with loading in the Fisher Iris dataset. + +```{r loadData} +library(datasets) + +df <- iris # Create dataframe from the Iris dataset +head(df) # View first few rows of dataset +``` + +Now that our dataframe is loaded in, we can import the *caret* package to perform training. + +```{r fitModel} +library(caret) + +modFit <- train(Species ~., method = "rpart", data=df) #Fit model +print(modFit$finalModel) #Summarize model +``` + +Our model is built, but it's kind of hard to comprehend with just the metrics. Let's create a visualization to showcase the +splits in our decision tree. + +```{r visualizeModel} +library(rpart.plot) + +rpart.plot(modFit$finalModel) #create decision tree visualization +``` + +Awesome! Since we're happy with our model results, we'll want to save our model and log configuration and stats sections in a snapshot. +We can do this with the following syntax, where we're creating a *char* string of format "--PROPERTY key:value" that will be passed to +the snapshot create code block. + +```{r defineSnapshot} +config<- paste(sep="", + " --config method:", modFit$method, + " --config modelType:", modFit$modelType) + +#define metrics to save from the model +stats<- paste(sep="", + " --stats Accuracy:", modFit$results$Accuracy[1], + " --stats Kappa:", modFit$results$Kappa[1]) + +config +stats +``` + +```{bash snapshotCreate} +system2("datmo", args=paste("snapshot create", "-m 'Whoah, my first snapshot!'", config, stats), timeout=30) +``` + +```{bash} +datmo snapshot ls +```