diff --git a/server.js b/server.js index 3f2814431b..77c08b6166 100644 --- a/server.js +++ b/server.js @@ -55,6 +55,12 @@ app.prepare().then(() => { Location: req.url.replace('commands-reference', 'command-reference') }) res.end() + } else if (pathname == '/doc/tutorial') { + // path /doc/tutorial -> /doc/tutorials + res.writeHead(302, { + Location: req.url.replace('/doc/tutorial', '/doc/tutorials') + }) + res.end() } else if (/^\/doc.*/i.test(pathname)) { // path /doc* -> /doc let normalized_pathname = pathname.replace(/^\/doc[^?\/]*/i, '/doc') diff --git a/src/Documentation/sidebar.json b/src/Documentation/sidebar.json index 42d20ff3e4..a50bc90ddd 100644 --- a/src/Documentation/sidebar.json +++ b/src/Documentation/sidebar.json @@ -24,14 +24,42 @@ { "label": "Get Older Files", "slug": "older-versions" + } + ] + }, + { + "slug": "tutorials", + "source": "tutorials/index.md", + "children": [ + { + "slug": "interactive", + "label": "Interactive Tutorials" + }, + { + "slug": "versioning", + "label": "Data Versioning" + }, + { + "slug": "pipelines", + "label": "Stages and Pipelines" }, { - "label": "Example: Versioning", - "slug": "example-versioning" + "slug": "tutorial", + "label": "Longer Tutorial", + "source": "tutorial/index.md", + "children": [ + "preparation", + { + "label": "Define ML Pipeline", + "slug": "define-ml-pipeline" + }, + "reproducibility", + "sharing-data" + ] }, { - "label": "Example: Pipelines", - "slug": "example-pipeline" + "slug": "community", + "label": "Community Tutorials" } ] }, @@ -284,19 +312,6 @@ } ] }, - { - "slug": "tutorial", - "source": "tutorial/index.md", - "children": [ - "preparation", - { - "label": "Define ML Pipeline", - "slug": "define-ml-pipeline" - }, - "reproducibility", - "sharing-data" - ] - }, { "label": "Understanding DVC", "slug": "understanding-dvc", diff --git a/static/docs/command-reference/add.md b/static/docs/command-reference/add.md index d004651d65..ce6eeec316 100644 --- a/static/docs/command-reference/add.md +++ b/static/docs/command-reference/add.md @@ -241,7 +241,7 @@ $ dvc run -f train.dvc \ ``` To see this whole example go to -[Example: Versioning](/doc/get-started/example-versioning). +[Tutorial: Versioning](/doc/tutorials/versioning). Since no top-level DVC-file is generated with the `--recursive` option we cannot use the directory structure as a whole. diff --git a/static/docs/command-reference/checkout.md b/static/docs/command-reference/checkout.md index 49de3e7448..6199345cb6 100644 --- a/static/docs/command-reference/checkout.md +++ b/static/docs/command-reference/checkout.md @@ -125,7 +125,7 @@ $ cd example-get-started The workspace looks almost like in this -[pipeline setup](/doc/get-started/example-pipeline): +[pipeline setup](/doc/tutorials/pipelines): ```dvc . diff --git a/static/docs/command-reference/fetch.md b/static/docs/command-reference/fetch.md index 32044e6ef9..3085e7e263 100644 --- a/static/docs/command-reference/fetch.md +++ b/static/docs/command-reference/fetch.md @@ -136,7 +136,7 @@ $ cd example-get-started The workspace looks almost like in this -[pipeline setup](/doc/get-started/example-pipeline): +[pipeline setup](/doc/tutorials/pipelines): ```dvc . diff --git a/static/docs/command-reference/repro.md b/static/docs/command-reference/repro.md index c49b14ae0a..083d68216f 100644 --- a/static/docs/command-reference/repro.md +++ b/static/docs/command-reference/repro.md @@ -118,8 +118,7 @@ specified), and updates stage files with the new checksum information. For simplicity, let's build a pipeline defined below. (If you want get your hands-on something more real, see this shot -[pipeline tutorial](/doc/get-started/example-pipeline)). It takes this -`text.txt` file: +[pipeline tutorial](/doc/tutorials/pipelines)). It takes this `text.txt` file: ``` dvc diff --git a/static/docs/command-reference/run.md b/static/docs/command-reference/run.md index e49719ebb3..ad3058e755 100644 --- a/static/docs/command-reference/run.md +++ b/static/docs/command-reference/run.md @@ -46,11 +46,10 @@ creating a new stage. For example, for every output there should be only one stage that explicitly specifies it. There should be no cycles, etc. Note that `dvc repro` provides an interface to check state and reproduce this -graph (pipeline) later. This concept is similar to that of +graph (pipeline) later. This concept is similar to the one of the [Make](https://www.gnu.org/software/make/) in software build automation, but DVC captures data and caches data artifacts along the way. See this -[example](/doc/get-started/example-pipeline) to learn more and try to create a -pipeline. +[example](/doc/tutorials/pipelines) to learn more and try to create a pipeline. ## Options diff --git a/static/docs/get-started/connect-code-and-data.md b/static/docs/get-started/connect-code-and-data.md index 688052546b..7ce3cf110c 100644 --- a/static/docs/get-started/connect-code-and-data.md +++ b/static/docs/get-started/connect-code-and-data.md @@ -60,7 +60,7 @@ $ git commit -m "Add source code files to repo" Having installed the `src/prepare.py` script in your repo, the following command transforms it into a reproducible [stage](/doc/command-reference/run) for the ML pipeline we're building (described in the -[next chapter](/doc/get-started/example-pipeline)). +[next chapter](/doc/tutorials/pipelines)). ```dvc $ dvc run -f prepare.dvc \ diff --git a/static/docs/get-started/index.md b/static/docs/get-started/index.md index 8994ab2a91..81844da657 100644 --- a/static/docs/get-started/index.md +++ b/static/docs/get-started/index.md @@ -5,8 +5,8 @@ go into details much, but provides links and expandable sections to learn more. At the very end there are a few complete examples to give you more hands-on experience with real life scenarios. The first one is about model and dataset -[versioning](/doc/get-started/example-versioning), and the second one is focused -on [pipelines and reproducibility](/doc/get-started/example-pipeline). +[versioning](/doc/tutorials/versioning), and the second one is focused on +[pipelines and reproducibility](/doc/tutorials/pipelines). ✅ Please, join our [community](/chat) or see these [support](/support) options if you have any questions or need any help. We are very responsive ⚡. diff --git a/static/docs/get-started/older-versions.md b/static/docs/get-started/older-versions.md index d01bdf7c71..5371cc59af 100644 --- a/static/docs/get-started/older-versions.md +++ b/static/docs/get-started/older-versions.md @@ -50,4 +50,4 @@ $ dvc checkout ``` Read the `dvc checkout` command reference and a dedicated data versioning -[example](/doc/get-started/example-versioning) for more information. +[example](/doc/tutorials/versioning) for more information. diff --git a/static/docs/get-started/pipeline.md b/static/docs/get-started/pipeline.md index d24115f276..0011f410c0 100644 --- a/static/docs/get-started/pipeline.md +++ b/static/docs/get-started/pipeline.md @@ -36,7 +36,7 @@ $ dvc push ``` This example is simplified just to show you a basic pipeline, see a more -advanced [example](/doc/get-started/example-pipeline) or complete +advanced [example](/doc/tutorials/pipelines) or complete [tutorial](/doc/tutorial) to create a [NLP](https://en.wikipedia.org/wiki/Natural_language_processing) pipeline end-to-end. diff --git a/static/docs/tutorials/community.md b/static/docs/tutorials/community.md new file mode 100644 index 0000000000..31f8658ae1 --- /dev/null +++ b/static/docs/tutorials/community.md @@ -0,0 +1,25 @@ +# Community Tutorials + +Blog posts from the community which show how they use DVC in their ML scenarios: + +- [Data Version Control Tutorial](https://blog.dataversioncontrol.com/data-version-control-tutorial-9146715eda46) + +- [DVC tutorial: improving CNN model](https://medium.com/@fullstackml/aab8162f8e3f) + +- [Creating an awesome project using DVC and DAGsHub](https://dagshub.com/docs/overview/) + +- [Using DVC to create an efficient version control system for data projects](https://medium.com/qonto-engineering/using-dvc-to-create-an-efficient-version-control-system-for-data-projects-96efd94355fe) + +- [Introduction to using DVC to manage machine learning project datasets](https://techsparx.com/software-development/ai/dvc/simple-example.html) + +- [Managing versioned machine learning datasets in DVC, and easily share ML projects with colleagues](https://techsparx.com/software-development/ai/dvc/versioning-example.html) + +- [A walkthrough of DVC](https://blog.codecentric.de/en/2019/03/walkthrough-dvc/) + +- [DVC dependency management](https://blog.codecentric.de/en/2019/08/dvc-dependency-management/) + +- [How to use data version control (dvc) in a machine learning project](https://towardsdatascience.com/how-to-use-data-version-control-dvc-in-a-machine-learning-project-a78245c0185) + +- [My first try at DVC](https://stdiff.net/MB2019051301.html) + +- [Effective Management of your Machine Learning Laboratory](https://www.linkedin.com/pulse/effective-management-your-machine-learning-laboratory-ulaganathan/) diff --git a/static/docs/tutorials/index.md b/static/docs/tutorials/index.md new file mode 100644 index 0000000000..7b0291b839 --- /dev/null +++ b/static/docs/tutorials/index.md @@ -0,0 +1,18 @@ +# Tutorials + +- [Interactive Tutorials](/docs/tutorials/interactive)
Interactive lessons + and tutorials on [Katacoda](https://katacoda.com/dvc) that explain the basic + concepts of DVC and show how to use it in simple ML scenarios. + +- [Data Versioning](/docs/tutorials/versioning)
Using DVC commands to work + with multiple versions of datasets and ML models. + +- [Stages and Pipelines](/docs/tutorials/pipelines)
Using DVC commands to + build a simple ML pipeline. + +- [Comprehensive Tutorial](/docs/tutorials/tutorial)
Introduces DVC + step-by-step, while additionally explaining in great detail the motivation and + what's happening internally. + +- [Community Tutorials](/docs/tutorials/community)
Blog posts from the + community which show how they use DVC in their ML scenarios. diff --git a/static/docs/tutorials/interactive.md b/static/docs/tutorials/interactive.md new file mode 100644 index 0000000000..a3c89c7883 --- /dev/null +++ b/static/docs/tutorials/interactive.md @@ -0,0 +1,46 @@ +# Interactive Tutorials + +Interactive lessons and tutorials on [Katacoda](https://katacoda.com/dvc) that +explain the basic concepts of DVC and show how to use it in simple ML scenarios. + +## Basic Concepts + +Learn basic concepts and features of DVC with interactive lessons: + +1. [Data Management](https://katacoda.com/dvc/courses/basics/data)
The + core function of DVC is data tracking and management. Let's see how to do it. + +2. [Getting the Best Performance](https://katacoda.com/dvc/courses/basics/performance) +
It is important to optimize the DVC setup for having the best + performance with handling big data files. + +3. [Tracking Data Versions](https://katacoda.com/dvc/courses/basics/versioning) +
DVC takes advantage of GIT's versioning features to keep track of the + data versions. + +4. [Sharing Data](https://katacoda.com/dvc/courses/basics/sharing)
DVC + facilitates sharing of data between different people that work on the same + project. + +5. [Stages And Pipelines](https://katacoda.com/dvc/courses/basics/pipelines) +
DVC has a built-in way to connect ML steps into a DAG and run the full + pipeline end-to-end. + +## Simple ML Scenarios + +Learn how DVC can be used in simple ML scenarios: + +- [Data Versioning](https://katacoda.com/dvc/courses/tutorials/versioning)
+ Using DVC commands to work with multiple versions of datasets and ML models. + +- [Stages and Pipelines](https://katacoda.com/dvc/courses/tutorials/pipelines) +
Using DVC commands to build a simple ML pipeline. + +## Examples + +Interactive examples about using DVC commands and other features of DVC. + +- [dvc fetch](https://katacoda.com/dvc/courses/examples/fetch)
We will use + an example project with some data, code, ML models, pipeline stages, as well + as a few Git tags. Then we will see what happens with dvc fetch as we switch + from tag to tag. diff --git a/static/docs/get-started/example-pipeline.md b/static/docs/tutorials/pipelines.md similarity index 99% rename from static/docs/get-started/example-pipeline.md rename to static/docs/tutorials/pipelines.md index 4ea17429d2..f362a34bb8 100644 --- a/static/docs/get-started/example-pipeline.md +++ b/static/docs/tutorials/pipelines.md @@ -1,4 +1,4 @@ -# Example: Pipelines +# Tutorial: Pipelines To show DVC in action, let's play with an actual machine learning scenario. Let's explore the natural language processing diff --git a/static/docs/tutorial/define-ml-pipeline.md b/static/docs/tutorials/tutorial/define-ml-pipeline.md similarity index 100% rename from static/docs/tutorial/define-ml-pipeline.md rename to static/docs/tutorials/tutorial/define-ml-pipeline.md diff --git a/static/docs/tutorial/index.md b/static/docs/tutorials/tutorial/index.md similarity index 100% rename from static/docs/tutorial/index.md rename to static/docs/tutorials/tutorial/index.md diff --git a/static/docs/tutorial/preparation.md b/static/docs/tutorials/tutorial/preparation.md similarity index 100% rename from static/docs/tutorial/preparation.md rename to static/docs/tutorials/tutorial/preparation.md diff --git a/static/docs/tutorial/reproducibility.md b/static/docs/tutorials/tutorial/reproducibility.md similarity index 100% rename from static/docs/tutorial/reproducibility.md rename to static/docs/tutorials/tutorial/reproducibility.md diff --git a/static/docs/tutorial/sharing-data.md b/static/docs/tutorials/tutorial/sharing-data.md similarity index 100% rename from static/docs/tutorial/sharing-data.md rename to static/docs/tutorials/tutorial/sharing-data.md diff --git a/static/docs/get-started/example-versioning.md b/static/docs/tutorials/versioning.md similarity index 98% rename from static/docs/get-started/example-versioning.md rename to static/docs/tutorials/versioning.md index c1d1d7da09..2fb0189b56 100644 --- a/static/docs/get-started/example-versioning.md +++ b/static/docs/tutorials/versioning.md @@ -1,4 +1,4 @@ -# Example: Versioning +# Tutorial: Versioning > Reading time is 10-13 minutes. Running the training is 30-40 minutes > (including downloading the dataset). Running the code is optional, and reading @@ -362,8 +362,8 @@ Here's where the [pipelines](/doc/command-reference/pipeline) feature of DVC comes very handy and was designed for. We touched it briefly when we described `dvc run` and `dvc repro` at the very end. The next step here would be splitting the script into two parts, and utilizing pipelines. See -[this example](/doc/get-started/example-pipeline) to get a hands-on experience -with pipelines and try to apply it here. Don't hesitate to join our +[this example](/doc/tutorials/pipelines) to get a hands-on experience with +pipelines and try to apply it here. Don't hesitate to join our [community](/chat) to ask any questions! Another detail we only brushed on here is the way we captured the `metrics.json` diff --git a/static/docs/use-cases/data-and-model-files-versioning.md b/static/docs/use-cases/data-and-model-files-versioning.md index ab1e142425..a0a8b8a275 100644 --- a/static/docs/use-cases/data-and-model-files-versioning.md +++ b/static/docs/use-cases/data-and-model-files-versioning.md @@ -2,7 +2,7 @@ > This document provides an overview the file versioning workflow with DVC. To > get more hands-on experience on this we recommend following along the -> [Versioning](/doc/get-started/example-versioning) example. +> [Versioning](/doc/tutorials/versioning) tutorial. DVC allows versioning data files and directories, intermediate results, and ML models using Git, but without storing the file contents in the repository. It's @@ -117,6 +117,5 @@ To share your data with others you need to setup a [Share Data And Model Files](/doc/use-cases/share-data-and-model-files) use case to get an overview on how to do this. -Please also don't forget to see the -[Versioning](/doc/get-started/example-versioning) example to get a hands-on -experience with datasets and models versioning. +Please also don't forget to see the [Versioning](/doc/tutorials/versioning) +example to get a hands-on experience with datasets and models versioning.