From 68a4a1bf3df8aaf70359a9f5bafd7a1306f4099f Mon Sep 17 00:00:00 2001 From: Cheng Li Date: Fri, 25 Feb 2022 10:48:10 -0800 Subject: [PATCH 01/20] add inference post --- docs/_posts/2021-05-05-MoQ.md | 2 +- docs/_posts/2021-05-14-inference-release.md | 8 ++++++++ 2 files changed, 9 insertions(+), 1 deletion(-) create mode 100644 docs/_posts/2021-05-14-inference-release.md diff --git a/docs/_posts/2021-05-05-MoQ.md b/docs/_posts/2021-05-05-MoQ.md index 95dd8c7d1cb4..beecf3ab9ee6 100644 --- a/docs/_posts/2021-05-05-MoQ.md +++ b/docs/_posts/2021-05-05-MoQ.md @@ -4,7 +4,7 @@ title: "Mixture-of-Quantization: A novel quantization approach for reducing mode excerpt: "" categories: news new_post: false -date: 2020-05-28 00:00:00 +date: 2021-05-05 00:00:00 --- ## A unified suite for quantization-aware training and inference diff --git a/docs/_posts/2021-05-14-inference-release.md b/docs/_posts/2021-05-14-inference-release.md new file mode 100644 index 000000000000..5dc9fdc15c0a --- /dev/null +++ b/docs/_posts/2021-05-14-inference-release.md @@ -0,0 +1,8 @@ +--- +layout: single +title: "DeepSpeed: Accelerating large-scale model inference and training via system optimizations and compression" +date: 2021-05-14 +link: https://www.microsoft.com/en-us/research/blog/deepspeed-accelerating-large-scale-model-inference-and-training-via-system-optimizations-and-compression/ +excerpt: "Developed by Microsoft AI & Research." +categories: news +--- From 8ba6415f2fc4758173025c063e6ac53cc9c80b0d Mon Sep 17 00:00:00 2001 From: Cheng Li Date: Fri, 25 Feb 2022 13:24:59 -0800 Subject: [PATCH 02/20] fix format --- docs/_data/navigation.yml | 114 +++++++++--------- docs/_layouts/feed.html | 22 ++++ docs/_layouts/news-home.html | 13 +- docs/_layouts/tags.html | 16 +++ docs/_posts/2020-02-13-release.md | 1 + docs/_posts/2020-02-13-turing-nlg.md | 1 + docs/_posts/2020-05-19-bert-record.md | 1 + docs/_posts/2020-05-19-press-release.md | 1 + docs/_posts/2020-05-19-zero-stage2.md | 1 + .../2020-05-28-fastest-bert-training.md | 1 + docs/_posts/2020-07-24-deepspeed-webinar.md | 1 + docs/_posts/2020-08-07-webinar-on-demand.md | 1 + .../2020-09-08-sparse-attention-news.md | 1 + docs/_posts/2020-09-09-ZeRO-Offload.md | 1 + .../2020-09-09-onebit-adam-blog-post.md | 1 + docs/_posts/2020-09-09-onebit-adam-news.md | 1 + .../_posts/2020-09-09-pipeline-parallelism.md | 1 + docs/_posts/2020-09-09-sparse-attention.md | 1 + ...0-10-28-progressive-layer-dropping-news.md | 1 + docs/_posts/2021-03-08-zero3-offload.md | 1 + docs/_posts/2021-05-05-MoQ.md | 1 + ...021-05-05-inference-kernel-optimization.md | 1 + docs/_posts/2021-05-14-inference-release.md | 1 + docs/_posts/2021-08-18-deepspeed-moe.md | 1 + docs/_posts/2021-11-15-autotuning.md | 1 + docs/_posts/2021-12-09-deepspeed-moe-nlg.md | 1 + docs/_posts/2022-01-19-moe-inference.md | 1 + 27 files changed, 129 insertions(+), 59 deletions(-) create mode 100644 docs/_layouts/feed.html create mode 100644 docs/_layouts/tags.html diff --git a/docs/_data/navigation.yml b/docs/_data/navigation.yml index afdc7e218705..00a3184b5b2e 100755 --- a/docs/_data/navigation.yml +++ b/docs/_data/navigation.yml @@ -1,120 +1,120 @@ main: - - title: "Getting Started" + - title: 'Getting Started' url: /getting-started/ - - title: "News" + - title: 'Blog' url: /news/ - - title: "Tutorials" + - title: 'Tutorials' url: /tutorials/ - - title: "Documentation" + - title: 'Documentation' url: https://deepspeed.readthedocs.io/ - - title: "GitHub" + - title: 'GitHub' url: https://github.com/microsoft/DeepSpeed lnav: - - title: "Feature Overview" + - title: 'Feature Overview' url: /features/ - - title: "Getting Started" + - title: 'Getting Started' url: /getting-started/ children: - - title: "Installation" + - title: 'Installation' url: /getting-started/#installation - - title: "Writing models" + - title: 'Writing models' url: /getting-started/#writing-deepspeed-models - - title: "Training" + - title: 'Training' url: /getting-started/#training - - title: "Launching" + - title: 'Launching' url: /getting-started/#launching-deepspeed-training - - title: "Configuration" + - title: 'Configuration' url: /docs/config-json/ children: - - title: "Autotuning" + - title: 'Autotuning' url: /docs/config-json/#autotuning - - title: "Batch size" + - title: 'Batch size' url: /docs/config-json/#batch-size-related-parameters - - title: "Optimizer" + - title: 'Optimizer' url: /docs/config-json/#optimizer-parameters - - title: "Scheduler" + - title: 'Scheduler' url: /docs/config-json/#scheduler-parameters - - title: "Communication" + - title: 'Communication' url: /docs/config-json/#communication-options - - title: "FP16" + - title: 'FP16' url: /docs/config-json/#fp16-training-options - - title: "BFOAT16" + - title: 'BFOAT16' url: /docs/config-json/#bfloat16-training-options - - title: "Gradient Clipping" + - title: 'Gradient Clipping' url: /docs/config-json/#gradient-clipping - - title: "ZeRO optimizations" + - title: 'ZeRO optimizations' url: /docs/config-json/#zero-optimizations-for-fp16-training - - title: "Parameter Offloading" + - title: 'Parameter Offloading' url: /docs/config-json/#parameter-offloading - - title: "Optimizer Offloading" + - title: 'Optimizer Offloading' url: /docs/config-json/#optimizer-offloading - - title: "Asynchronous I/O" + - title: 'Asynchronous I/O' url: /docs/config-json/#asynchronous-io - - title: "Logging" + - title: 'Logging' url: /docs/config-json/#logging - - title: "Flops Profiler" + - title: 'Flops Profiler' url: /docs/config-json/#flops-profiler - - title: "PyTorch Profiler" + - title: 'PyTorch Profiler' url: /docs/config-json/#pytorch-profiler - - title: "Activation checkpointing" + - title: 'Activation checkpointing' url: /docs/config-json/#activation-checkpointing - - title: "Sparse Attention" + - title: 'Sparse Attention' url: /docs/config-json/#sparse-attention - - title: "Logging to TensorBoard" + - title: 'Logging to TensorBoard' url: /docs/config-json/#tensorboard-options - - title: "Tutorials" + - title: 'Tutorials' url: /tutorials/ children: - - title: "Getting started" + - title: 'Getting started' url: /getting-started/ - - title: "Getting started on Azure" + - title: 'Getting started on Azure' url: /tutorials/azure/ - - title: "Autotuning" + - title: 'Autotuning' url: /tutorials/autotuning/ - - title: "BingBertSQuAD Fine-tuning" + - title: 'BingBertSQuAD Fine-tuning' url: /tutorials/bert-finetuning/ - - title: "BERT Pre-training" + - title: 'BERT Pre-training' url: /tutorials/bert-pretraining/ - - title: "CIFAR-10" + - title: 'CIFAR-10' url: /tutorials/cifar-10/ - - title: "Curriculum Learning" + - title: 'Curriculum Learning' url: /tutorials/curriculum-learning/ - - title: "Flops Profiler" + - title: 'Flops Profiler' url: /tutorials/flops-profiler/ - - title: "PyTorch Profiler" + - title: 'PyTorch Profiler' url: /tutorials/pytorch-profiler/ - - title: "GAN" + - title: 'GAN' url: /tutorials/gan/ - - title: "Inference" + - title: 'Inference' url: /tutorials/inference-tutorial/ - - title: "Learning Rate Range Test" + - title: 'Learning Rate Range Test' url: /tutorials/lrrt/ - - title: "Megatron-LM GPT2" + - title: 'Megatron-LM GPT2' url: /tutorials/megatron/ - - title: "Mixture-of-Experts (MoE)" + - title: 'Mixture-of-Experts (MoE)' url: /tutorials/mixture-of-experts/ - - title: "Mixture-of-Experts for NLG" + - title: 'Mixture-of-Experts for NLG' url: /tutorials/mixture-of-experts-nlg/ - - title: "Mixture-of-Quantization" + - title: 'Mixture-of-Quantization' url: /tutorials/MoQ-tutorial/ - - title: "One-Cycle Schedule" + - title: 'One-Cycle Schedule' url: /tutorials/one-cycle/ - - title: "One-Bit Adam" + - title: 'One-Bit Adam' url: /tutorials/onebit-adam/ - - title: "One-Bit LAMB" + - title: 'One-Bit LAMB' url: /tutorials/onebit-lamb/ - - title: "Pipeline Parallelism" + - title: 'Pipeline Parallelism' url: /tutorials/pipeline/ - - title: "Progressive Layer Dropping" + - title: 'Progressive Layer Dropping' url: /tutorials/progressive_layer_dropping/ - - title: "Sparse Attention" + - title: 'Sparse Attention' url: /tutorials/sparse-attention/ - - title: "Transformer Kernel" + - title: 'Transformer Kernel' url: /tutorials/transformer_kernel/ - - title: "ZeRO-Offload" + - title: 'ZeRO-Offload' url: /tutorials/zero-offload/ - - title: "ZeRO Redundancy Optimizer (ZeRO)" + - title: 'ZeRO Redundancy Optimizer (ZeRO)' url: /tutorials/zero/ - - title: "Contributing" + - title: 'Contributing' url: /contributing/ diff --git a/docs/_layouts/feed.html b/docs/_layouts/feed.html new file mode 100644 index 000000000000..83b137b6908e --- /dev/null +++ b/docs/_layouts/feed.html @@ -0,0 +1,22 @@ + + + {{ site.title }} + + + {{ site.time | date_to_xmlschema }} + {{ site.url }} + + {{ site.author.name }} + {{ site.author.email }} + + + {% for post in site.posts %} {% if post.tags contains page.tag-name %} + + {{ post.title | xml_escape }} + + {{ post.date | date_to_xmlschema }} + {{ site.url }}{{ post.id }} + {{ post.content | xml_escape }} + + {% endif %} {% endfor %} + diff --git a/docs/_layouts/news-home.html b/docs/_layouts/news-home.html index 8248eed5b551..116b4208e4fe 100644 --- a/docs/_layouts/news-home.html +++ b/docs/_layouts/news-home.html @@ -11,7 +11,7 @@ {% assign posts = site.posts %} {% endif %} - + + +{% for tag in site.tags %} +

{{ tag[0] }}

+ +{% endfor %} diff --git a/docs/_layouts/tags.html b/docs/_layouts/tags.html new file mode 100644 index 000000000000..43f34f6771f4 --- /dev/null +++ b/docs/_layouts/tags.html @@ -0,0 +1,16 @@ +--- +layout: default +--- + +
+

Articles tagged with ""

+
    + + {% for post in site.posts %} + {% if post.tags contains page.tag-name %} +
  • {{ post.title }}, published {{ post.date | date: "%Y-%m-%d" }}
  • + {% endif %} + {% endfor %} + +
+
diff --git a/docs/_posts/2020-02-13-release.md b/docs/_posts/2020-02-13-release.md index 968e97c9b420..6c5a063845ea 100644 --- a/docs/_posts/2020-02-13-release.md +++ b/docs/_posts/2020-02-13-release.md @@ -5,4 +5,5 @@ date: 2020-02-13 link: https://www.microsoft.com/en-us/research/blog/zero-deepspeed-new-system-optimizations-enable-training-models-with-over-100-billion-parameters/ excerpt: "Developed by Microsoft AI & Research." categories: news +tags: training zero --- diff --git a/docs/_posts/2020-02-13-turing-nlg.md b/docs/_posts/2020-02-13-turing-nlg.md index 32ecd65379c1..05e99beca307 100644 --- a/docs/_posts/2020-02-13-turing-nlg.md +++ b/docs/_posts/2020-02-13-turing-nlg.md @@ -5,4 +5,5 @@ date: 2020-02-13 link: https://www.microsoft.com/en-us/research/blog/turing-nlg-a-17-billion-parameter-language-model-by-microsoft/ excerpt: "DeepSpeed was used to train the world's largest language model." categories: news +tags: training --- diff --git a/docs/_posts/2020-05-19-bert-record.md b/docs/_posts/2020-05-19-bert-record.md index 4a00681cbfcd..db70c95cc1ab 100644 --- a/docs/_posts/2020-05-19-bert-record.md +++ b/docs/_posts/2020-05-19-bert-record.md @@ -5,6 +5,7 @@ excerpt: "" categories: news new_post: true date: 2020-05-19 00:00:00 +tags: training --- We introduce new technology to accelerate single GPU performance via kernel diff --git a/docs/_posts/2020-05-19-press-release.md b/docs/_posts/2020-05-19-press-release.md index 0a247e253074..c84192b297c7 100644 --- a/docs/_posts/2020-05-19-press-release.md +++ b/docs/_posts/2020-05-19-press-release.md @@ -6,4 +6,5 @@ link: https://www.microsoft.com/en-us/research/blog/zero-2-deepspeed-shattering- categories: news new_post: false date: 2020-05-19 02:00:00 +tags: training zero --- diff --git a/docs/_posts/2020-05-19-zero-stage2.md b/docs/_posts/2020-05-19-zero-stage2.md index 5ce3ad252223..87b5214d203f 100644 --- a/docs/_posts/2020-05-19-zero-stage2.md +++ b/docs/_posts/2020-05-19-zero-stage2.md @@ -5,6 +5,7 @@ excerpt: "" categories: news new_post: false date: 2020-05-19 01:00:00 +tags: training zero --- ZeRO-2 expands the scope of memory optimizations in the original ZeRO by diff --git a/docs/_posts/2020-05-28-fastest-bert-training.md b/docs/_posts/2020-05-28-fastest-bert-training.md index 923319449bbf..9abaf96ec354 100644 --- a/docs/_posts/2020-05-28-fastest-bert-training.md +++ b/docs/_posts/2020-05-28-fastest-bert-training.md @@ -5,6 +5,7 @@ excerpt: "" categories: news new_post: false date: 2020-05-28 00:00:00 +tags: training --- Good news! **DeepSpeed obtains the fastest BERT training record: 44 minutes on diff --git a/docs/_posts/2020-07-24-deepspeed-webinar.md b/docs/_posts/2020-07-24-deepspeed-webinar.md index 276b97c9522e..57e67ba654c6 100644 --- a/docs/_posts/2020-07-24-deepspeed-webinar.md +++ b/docs/_posts/2020-07-24-deepspeed-webinar.md @@ -7,4 +7,5 @@ link: https://note.microsoft.com/MSR-Webinar-DeepSpeed-Registration-On-Demand.ht image: /assets/images/webinar-aug2020.png new_post: true date: 2020-07-24 00:00:00 +tags: presentations --- diff --git a/docs/_posts/2020-08-07-webinar-on-demand.md b/docs/_posts/2020-08-07-webinar-on-demand.md index 6d255520c0df..6ee2c77031ed 100644 --- a/docs/_posts/2020-08-07-webinar-on-demand.md +++ b/docs/_posts/2020-08-07-webinar-on-demand.md @@ -6,4 +6,5 @@ categories: news link: https://note.microsoft.com/MSR-Webinar-DeepSpeed-Registration-On-Demand.html new_post: true date: 2020-08-07 00:00:00 +tags: presentations --- diff --git a/docs/_posts/2020-09-08-sparse-attention-news.md b/docs/_posts/2020-09-08-sparse-attention-news.md index 6f235818c33f..2cb1d0007a91 100644 --- a/docs/_posts/2020-09-08-sparse-attention-news.md +++ b/docs/_posts/2020-09-08-sparse-attention-news.md @@ -5,6 +5,7 @@ excerpt: "" categories: news new_post: true date: 2020-09-09 00:00:00 +tags: training --- DeepSpeed offers sparse attention kernels, an instrumental technology to support long sequences of model inputs, whether for text, image, or sound. Compared with the classic dense Transformers, it powers an order-of-magnitude longer input sequence and obtains up to 6x faster execution with comparable accuracy. It also outperforms state-of-the-art sparse implementations with 1.5-3x faster execution. Furthermore, our sparse kernels support efficient execution of flexible sparse format and empower users to innovate on their custom sparse structures. diff --git a/docs/_posts/2020-09-09-ZeRO-Offload.md b/docs/_posts/2020-09-09-ZeRO-Offload.md index 9a45ba8f244e..6c108d25156a 100755 --- a/docs/_posts/2020-09-09-ZeRO-Offload.md +++ b/docs/_posts/2020-09-09-ZeRO-Offload.md @@ -5,6 +5,7 @@ excerpt: "" categories: news new_post: true date: 2020-09-09 00:00:00 +tags: training zero --- We introduce a new technology called ZeRO-Offload to enable **10X bigger model training on a single GPU**. ZeRO-Offload extends ZeRO-2 to leverage both CPU and GPU memory for training large models. Using a machine with **a single GPU**, our users now can run **models of up to 13 billion parameters** without running out of memory, 10x bigger than the existing approaches, while obtaining competitive throughput. This feature democratizes multi-billion-parameter model training and opens the window for many deep learning practitioners to explore bigger and better models. diff --git a/docs/_posts/2020-09-09-onebit-adam-blog-post.md b/docs/_posts/2020-09-09-onebit-adam-blog-post.md index b16a101578f0..6e8836a0e1f3 100644 --- a/docs/_posts/2020-09-09-onebit-adam-blog-post.md +++ b/docs/_posts/2020-09-09-onebit-adam-blog-post.md @@ -5,6 +5,7 @@ excerpt: "" categories: news new_post: false date: 2020-09-09 00:00:00 +tags: training communication --- ## 1. Introduction diff --git a/docs/_posts/2020-09-09-onebit-adam-news.md b/docs/_posts/2020-09-09-onebit-adam-news.md index 5dc0f3bd2004..c0ffe748bad2 100644 --- a/docs/_posts/2020-09-09-onebit-adam-news.md +++ b/docs/_posts/2020-09-09-onebit-adam-news.md @@ -5,6 +5,7 @@ excerpt: "" categories: news new_post: true date: 2020-09-09 00:00:00 +tags: training communication --- diff --git a/docs/_posts/2020-09-09-pipeline-parallelism.md b/docs/_posts/2020-09-09-pipeline-parallelism.md index d8aa20c1cee5..5b588e32f5d6 100644 --- a/docs/_posts/2020-09-09-pipeline-parallelism.md +++ b/docs/_posts/2020-09-09-pipeline-parallelism.md @@ -5,6 +5,7 @@ excerpt: "" categories: news new_post: true date: 2020-09-09 00:00:00 +tags: training --- DeepSpeed includes new support for pipeline parallelism! DeepSpeed's training diff --git a/docs/_posts/2020-09-09-sparse-attention.md b/docs/_posts/2020-09-09-sparse-attention.md index 79032eeaa153..2dc0c368df82 100644 --- a/docs/_posts/2020-09-09-sparse-attention.md +++ b/docs/_posts/2020-09-09-sparse-attention.md @@ -5,6 +5,7 @@ excerpt: "" categories: news new_post: true date: 2020-09-09 01:00:00 +tags: training inference --- Attention-based deep learning models such as the transformers are highly effective in capturing relationship between tokens in an input sequence, even across long distances. As a result, they are used with text, image, and sound-based inputs, where the sequence length can be in thousands of tokens. However, despite the effectiveness of attention modules to capture long term dependencies, in practice, their application to long sequence input is limited by compute and memory requirements of the attention computation that grow quadratically, `O(n^2)`, with the sequence length `n`. diff --git a/docs/_posts/2020-10-28-progressive-layer-dropping-news.md b/docs/_posts/2020-10-28-progressive-layer-dropping-news.md index 5659cf818987..3e06f2dfe72c 100755 --- a/docs/_posts/2020-10-28-progressive-layer-dropping-news.md +++ b/docs/_posts/2020-10-28-progressive-layer-dropping-news.md @@ -5,6 +5,7 @@ excerpt: "" categories: news new_post: true date: 2020-10-29 00:00:00 +tags: training --- We introduce a new technology called progressive layer dropping (PLD) to speedup the pre-training of Transformer-based networks through efficient and robust compressed training. The pre-training step of Transformer networks often suffer from unbearable overall computational expenses. We analyze the training dynamics and stability of Transformer networks and propose PLD to sparsely update Transformer blocks following a progressive dropping schedule, which smoothly increases the layer dropping rate for each mini-batch as training evolves along both the temporal and the model depth dimension. PLD is able to allow the pre-training to be **2.5X faster** to get similar accuracy on downstream tasks and allows the training to be **24% faster** when training the same number of samples, not at the cost of excessive hardware resources. diff --git a/docs/_posts/2021-03-08-zero3-offload.md b/docs/_posts/2021-03-08-zero3-offload.md index 8e5778afa0fc..e9c8cd470eff 100644 --- a/docs/_posts/2021-03-08-zero3-offload.md +++ b/docs/_posts/2021-03-08-zero3-offload.md @@ -5,6 +5,7 @@ excerpt: "" categories: news new_post: true date: 2021-03-08 00:00:00 +tags: training zero --- Today we are announcing the release of ZeRO-3 Offload, a highly efficient and easy to use implementation of ZeRO Stage 3 and ZeRO Offload combined, geared towards our continued goal of democratizing AI by making efficient large-scale DL training available to everyone. The key benefits of ZeRO-3 Offload are: diff --git a/docs/_posts/2021-05-05-MoQ.md b/docs/_posts/2021-05-05-MoQ.md index beecf3ab9ee6..a9582c652ab4 100644 --- a/docs/_posts/2021-05-05-MoQ.md +++ b/docs/_posts/2021-05-05-MoQ.md @@ -5,6 +5,7 @@ excerpt: "" categories: news new_post: false date: 2021-05-05 00:00:00 +tags: inference --- ## A unified suite for quantization-aware training and inference diff --git a/docs/_posts/2021-05-05-inference-kernel-optimization.md b/docs/_posts/2021-05-05-inference-kernel-optimization.md index 2042703f4729..218387c07f55 100644 --- a/docs/_posts/2021-05-05-inference-kernel-optimization.md +++ b/docs/_posts/2021-05-05-inference-kernel-optimization.md @@ -5,6 +5,7 @@ excerpt: "" categories: news new_post: false date: 2021-03-16 00:00:00 +tags: inference --- While DeepSpeed supports training advanced large-scale models, using these trained models in the desired application scenarios is still challenging due to three major limitations in existing inference solutions: 1) lack of support for multi-GPU inference to fit large models and meet latency requirements, 2) limited GPU kernel performance when running inference with small batch sizes, and 3) difficulties in exploiting quantization, which includes both quantizing the model to reduce the model size and latency as well as supporting high-performance inference of quantized models without specialized hardware. diff --git a/docs/_posts/2021-05-14-inference-release.md b/docs/_posts/2021-05-14-inference-release.md index 5dc9fdc15c0a..658627e7115f 100644 --- a/docs/_posts/2021-05-14-inference-release.md +++ b/docs/_posts/2021-05-14-inference-release.md @@ -5,4 +5,5 @@ date: 2021-05-14 link: https://www.microsoft.com/en-us/research/blog/deepspeed-accelerating-large-scale-model-inference-and-training-via-system-optimizations-and-compression/ excerpt: "Developed by Microsoft AI & Research." categories: news +tags: inference --- diff --git a/docs/_posts/2021-08-18-deepspeed-moe.md b/docs/_posts/2021-08-18-deepspeed-moe.md index 3f21e26a74dd..0cc61f52d0fc 100644 --- a/docs/_posts/2021-08-18-deepspeed-moe.md +++ b/docs/_posts/2021-08-18-deepspeed-moe.md @@ -6,4 +6,5 @@ categories: news link: https://www.microsoft.com/en-us/research/blog/deepspeed-powers-8x-larger-moe-model-training-with-high-performance/ new_post: true date: 2021-08-18 00:00:00 +tags: training --- diff --git a/docs/_posts/2021-11-15-autotuning.md b/docs/_posts/2021-11-15-autotuning.md index dd8d94dcef02..00ea4f1aa27b 100644 --- a/docs/_posts/2021-11-15-autotuning.md +++ b/docs/_posts/2021-11-15-autotuning.md @@ -5,6 +5,7 @@ excerpt: "" categories: news new_post: true date: 2021-11-16 00:00:00 +tags: training --- We introduce a new feature called Autotuning to automatically discover the optimal DeepSpeed configuration that delivers good training speed. One pain point in model training is to figure out good performance-relevant configurations such as micro-batch size to fully utilize the hardware and achieve a high throughput number. This configuration exploring process is commonly done manually but is important since model training is repeated many times and benefits from using a good configuration. Not only is the hand-tuning process time-consuming, but the outcome is hardware-dependent. This means that a good configuration on one hardware might not be the best on another different hardware. The user thus has to hand tune the configuration again. With DeepSpeed, there are more configuration parameters that could potentially affect the training speed, thus making it more tedious to manually tune the configuration. diff --git a/docs/_posts/2021-12-09-deepspeed-moe-nlg.md b/docs/_posts/2021-12-09-deepspeed-moe-nlg.md index b6898f2313d6..ea92b791c449 100644 --- a/docs/_posts/2021-12-09-deepspeed-moe-nlg.md +++ b/docs/_posts/2021-12-09-deepspeed-moe-nlg.md @@ -5,6 +5,7 @@ excerpt: "" categories: news new_post: false date: 2021-12-09 22:00:00 +tags: training --- Published on December 9, 2021 diff --git a/docs/_posts/2022-01-19-moe-inference.md b/docs/_posts/2022-01-19-moe-inference.md index 88046f72c6fa..1554f7bc5fc0 100644 --- a/docs/_posts/2022-01-19-moe-inference.md +++ b/docs/_posts/2022-01-19-moe-inference.md @@ -6,4 +6,5 @@ categories: news link: https://www.microsoft.com/en-us/research/blog/deepspeed-advancing-moe-inference-and-training-to-power-next-generation-ai-scale/ new_post: true date: 2022-01-19 00:00:00 +tags: inference --- From 993fccd92c89258103cd86bb8fdef1a3f45dc056 Mon Sep 17 00:00:00 2001 From: Cheng Li Date: Sat, 26 Feb 2022 10:46:03 +0500 Subject: [PATCH 03/20] use categories filtering --- docs/_layouts/news-home.html | 38 +++++++++++++- docs/_layouts/news.html | 52 +++++++++++++++++++ docs/_posts/2020-02-13-release.md | 3 +- docs/_posts/2020-02-13-turing-nlg.md | 3 +- docs/_posts/2020-05-19-bert-record.md | 3 +- docs/_posts/2020-05-19-press-release.md | 3 +- docs/_posts/2020-05-19-zero-stage2.md | 3 +- .../2020-05-28-fastest-bert-training.md | 3 +- docs/_posts/2020-07-24-deepspeed-webinar.md | 3 +- docs/_posts/2020-08-07-webinar-on-demand.md | 3 +- .../2020-09-08-sparse-attention-news.md | 3 +- docs/_posts/2020-09-09-ZeRO-Offload.md | 3 +- .../2020-09-09-onebit-adam-blog-post.md | 3 +- docs/_posts/2020-09-09-onebit-adam-news.md | 3 +- .../_posts/2020-09-09-pipeline-parallelism.md | 3 +- docs/_posts/2020-09-09-sparse-attention.md | 3 +- ...0-10-28-progressive-layer-dropping-news.md | 3 +- docs/_posts/2021-03-08-zero3-offload.md | 3 +- docs/_posts/2021-05-05-MoQ.md | 3 +- ...021-05-05-inference-kernel-optimization.md | 3 +- docs/_posts/2021-05-14-inference-release.md | 3 +- docs/_posts/2021-08-18-deepspeed-moe.md | 3 +- docs/_posts/2021-11-15-autotuning.md | 3 +- docs/_posts/2021-12-09-deepspeed-moe-nlg.md | 3 +- docs/_posts/2022-01-19-moe-inference.md | 3 +- docs/news/index.html | 2 +- 26 files changed, 112 insertions(+), 49 deletions(-) create mode 100644 docs/_layouts/news.html diff --git a/docs/_layouts/news-home.html b/docs/_layouts/news-home.html index 116b4208e4fe..0ebdf0ea609f 100644 --- a/docs/_layouts/news-home.html +++ b/docs/_layouts/news-home.html @@ -11,6 +11,40 @@ {% assign posts = site.posts %} {% endif %} + + +
+ {% for post in site.posts %} +
+ + + +
+ {% endfor %} +
+ + + + + + + -{% for tag in site.tags %} + diff --git a/docs/_layouts/news.html b/docs/_layouts/news.html new file mode 100644 index 000000000000..d0d5f82787ac --- /dev/null +++ b/docs/_layouts/news.html @@ -0,0 +1,52 @@ +--- +layout: archive +post-content: +backsite: javascript:history.back() +--- + +{% if paginator %} + {% assign posts = paginator.posts %} +{% else %} + {% assign posts = site.posts %} +{% endif %} + + + +
+ + {% assign categories = site.categories | sort %} + {% for category in categories %} + {% assign cat = category | first %} + + {% endfor %} +
+
+ +
+ {% assign id = 0 %} + {% for post in site.posts %} + {% assign id = id | plus:1 %} +
+

+ {{ post.articletitle }}
+ · + {% include archive-single.html %} + {% if post.image %} + + {% endif %} +

+
+ {% endfor %} +
\ No newline at end of file diff --git a/docs/_posts/2020-02-13-release.md b/docs/_posts/2020-02-13-release.md index 6c5a063845ea..d8482de7288b 100644 --- a/docs/_posts/2020-02-13-release.md +++ b/docs/_posts/2020-02-13-release.md @@ -4,6 +4,5 @@ title: "ZeRO & DeepSpeed: New system optimizations enable training models with o date: 2020-02-13 link: https://www.microsoft.com/en-us/research/blog/zero-deepspeed-new-system-optimizations-enable-training-models-with-over-100-billion-parameters/ excerpt: "Developed by Microsoft AI & Research." -categories: news -tags: training zero +categories: training zero --- diff --git a/docs/_posts/2020-02-13-turing-nlg.md b/docs/_posts/2020-02-13-turing-nlg.md index 05e99beca307..cb9ee59e314f 100644 --- a/docs/_posts/2020-02-13-turing-nlg.md +++ b/docs/_posts/2020-02-13-turing-nlg.md @@ -4,6 +4,5 @@ title: "Turing-NLG: A 17-billion-parameter language model by Microsoft" date: 2020-02-13 link: https://www.microsoft.com/en-us/research/blog/turing-nlg-a-17-billion-parameter-language-model-by-microsoft/ excerpt: "DeepSpeed was used to train the world's largest language model." -categories: news -tags: training +categories: training --- diff --git a/docs/_posts/2020-05-19-bert-record.md b/docs/_posts/2020-05-19-bert-record.md index db70c95cc1ab..ce2f58b231f1 100644 --- a/docs/_posts/2020-05-19-bert-record.md +++ b/docs/_posts/2020-05-19-bert-record.md @@ -2,10 +2,9 @@ layout: single title: "The Fastest and Most Efficient BERT Training through Optimized Transformer Kernels" excerpt: "" -categories: news +categories: training new_post: true date: 2020-05-19 00:00:00 -tags: training --- We introduce new technology to accelerate single GPU performance via kernel diff --git a/docs/_posts/2020-05-19-press-release.md b/docs/_posts/2020-05-19-press-release.md index c84192b297c7..b8e2dd09b02f 100644 --- a/docs/_posts/2020-05-19-press-release.md +++ b/docs/_posts/2020-05-19-press-release.md @@ -3,8 +3,7 @@ layout: single title: "ZeRO-2 & DeepSpeed: Shattering Barriers of Deep Learning Speed & Scale" excerpt: "" link: https://www.microsoft.com/en-us/research/blog/zero-2-deepspeed-shattering-barriers-of-deep-learning-speed-scale/ -categories: news +categories: training zero new_post: false date: 2020-05-19 02:00:00 -tags: training zero --- diff --git a/docs/_posts/2020-05-19-zero-stage2.md b/docs/_posts/2020-05-19-zero-stage2.md index 87b5214d203f..39504032d2ae 100644 --- a/docs/_posts/2020-05-19-zero-stage2.md +++ b/docs/_posts/2020-05-19-zero-stage2.md @@ -2,10 +2,9 @@ layout: single title: "An Order-of-Magnitude Larger and Faster Training with ZeRO-2" excerpt: "" -categories: news +categories: training zero new_post: false date: 2020-05-19 01:00:00 -tags: training zero --- ZeRO-2 expands the scope of memory optimizations in the original ZeRO by diff --git a/docs/_posts/2020-05-28-fastest-bert-training.md b/docs/_posts/2020-05-28-fastest-bert-training.md index 9abaf96ec354..1eaf6afc4f1b 100644 --- a/docs/_posts/2020-05-28-fastest-bert-training.md +++ b/docs/_posts/2020-05-28-fastest-bert-training.md @@ -2,10 +2,9 @@ layout: single title: "Microsoft DeepSpeed achieves the fastest BERT training time" excerpt: "" -categories: news +categories: training new_post: false date: 2020-05-28 00:00:00 -tags: training --- Good news! **DeepSpeed obtains the fastest BERT training record: 44 minutes on diff --git a/docs/_posts/2020-07-24-deepspeed-webinar.md b/docs/_posts/2020-07-24-deepspeed-webinar.md index 57e67ba654c6..58f3674845e3 100644 --- a/docs/_posts/2020-07-24-deepspeed-webinar.md +++ b/docs/_posts/2020-07-24-deepspeed-webinar.md @@ -2,10 +2,9 @@ layout: single title: "DeepSpeed Microsoft Research Webinar on August 6th, 2020" excerpt: "" -categories: news +categories: presentations link: https://note.microsoft.com/MSR-Webinar-DeepSpeed-Registration-On-Demand.html image: /assets/images/webinar-aug2020.png new_post: true date: 2020-07-24 00:00:00 -tags: presentations --- diff --git a/docs/_posts/2020-08-07-webinar-on-demand.md b/docs/_posts/2020-08-07-webinar-on-demand.md index 6ee2c77031ed..3173df1f7881 100644 --- a/docs/_posts/2020-08-07-webinar-on-demand.md +++ b/docs/_posts/2020-08-07-webinar-on-demand.md @@ -2,9 +2,8 @@ layout: single title: "DeepSpeed Microsoft Research Webinar is now on-demand" excerpt: "" -categories: news +categories: presentations link: https://note.microsoft.com/MSR-Webinar-DeepSpeed-Registration-On-Demand.html new_post: true date: 2020-08-07 00:00:00 -tags: presentations --- diff --git a/docs/_posts/2020-09-08-sparse-attention-news.md b/docs/_posts/2020-09-08-sparse-attention-news.md index 2cb1d0007a91..3d5c4cdcb5f9 100644 --- a/docs/_posts/2020-09-08-sparse-attention-news.md +++ b/docs/_posts/2020-09-08-sparse-attention-news.md @@ -2,10 +2,9 @@ layout: single title: "Powering 10x longer sequences and 6x faster execution through DeepSpeed Sparse Attention" excerpt: "" -categories: news +categories: training new_post: true date: 2020-09-09 00:00:00 -tags: training --- DeepSpeed offers sparse attention kernels, an instrumental technology to support long sequences of model inputs, whether for text, image, or sound. Compared with the classic dense Transformers, it powers an order-of-magnitude longer input sequence and obtains up to 6x faster execution with comparable accuracy. It also outperforms state-of-the-art sparse implementations with 1.5-3x faster execution. Furthermore, our sparse kernels support efficient execution of flexible sparse format and empower users to innovate on their custom sparse structures. diff --git a/docs/_posts/2020-09-09-ZeRO-Offload.md b/docs/_posts/2020-09-09-ZeRO-Offload.md index 6c108d25156a..749683b25ec9 100755 --- a/docs/_posts/2020-09-09-ZeRO-Offload.md +++ b/docs/_posts/2020-09-09-ZeRO-Offload.md @@ -2,10 +2,9 @@ layout: single title: "10x bigger model training on a single GPU with ZeRO-Offload" excerpt: "" -categories: news new_post: true date: 2020-09-09 00:00:00 -tags: training zero +categories: training zero --- We introduce a new technology called ZeRO-Offload to enable **10X bigger model training on a single GPU**. ZeRO-Offload extends ZeRO-2 to leverage both CPU and GPU memory for training large models. Using a machine with **a single GPU**, our users now can run **models of up to 13 billion parameters** without running out of memory, 10x bigger than the existing approaches, while obtaining competitive throughput. This feature democratizes multi-billion-parameter model training and opens the window for many deep learning practitioners to explore bigger and better models. diff --git a/docs/_posts/2020-09-09-onebit-adam-blog-post.md b/docs/_posts/2020-09-09-onebit-adam-blog-post.md index 6e8836a0e1f3..ffe0f69f317f 100644 --- a/docs/_posts/2020-09-09-onebit-adam-blog-post.md +++ b/docs/_posts/2020-09-09-onebit-adam-blog-post.md @@ -2,10 +2,9 @@ layout: single title: "DeepSpeed with 1-bit Adam: 5x less communication and 3.4x faster training" excerpt: "" -categories: news new_post: false date: 2020-09-09 00:00:00 -tags: training communication +categories: training communication --- ## 1. Introduction diff --git a/docs/_posts/2020-09-09-onebit-adam-news.md b/docs/_posts/2020-09-09-onebit-adam-news.md index c0ffe748bad2..109631251962 100644 --- a/docs/_posts/2020-09-09-onebit-adam-news.md +++ b/docs/_posts/2020-09-09-onebit-adam-news.md @@ -2,10 +2,9 @@ layout: single title: "Up to 5x less communication and 3.4x faster training through 1-bit Adam" excerpt: "" -categories: news new_post: true date: 2020-09-09 00:00:00 -tags: training communication +categories: training communication --- diff --git a/docs/_posts/2020-09-09-pipeline-parallelism.md b/docs/_posts/2020-09-09-pipeline-parallelism.md index 5b588e32f5d6..af537ffcd62c 100644 --- a/docs/_posts/2020-09-09-pipeline-parallelism.md +++ b/docs/_posts/2020-09-09-pipeline-parallelism.md @@ -2,10 +2,9 @@ layout: single title: "Training a Trillion Parameters with Pipeline Parallelism" excerpt: "" -categories: news new_post: true date: 2020-09-09 00:00:00 -tags: training +categories: training --- DeepSpeed includes new support for pipeline parallelism! DeepSpeed's training diff --git a/docs/_posts/2020-09-09-sparse-attention.md b/docs/_posts/2020-09-09-sparse-attention.md index 2dc0c368df82..40fdfb449ef1 100644 --- a/docs/_posts/2020-09-09-sparse-attention.md +++ b/docs/_posts/2020-09-09-sparse-attention.md @@ -2,10 +2,9 @@ layout: single title: "DeepSpeed Sparse Attention" excerpt: "" -categories: news new_post: true date: 2020-09-09 01:00:00 -tags: training inference +categories: training inference --- Attention-based deep learning models such as the transformers are highly effective in capturing relationship between tokens in an input sequence, even across long distances. As a result, they are used with text, image, and sound-based inputs, where the sequence length can be in thousands of tokens. However, despite the effectiveness of attention modules to capture long term dependencies, in practice, their application to long sequence input is limited by compute and memory requirements of the attention computation that grow quadratically, `O(n^2)`, with the sequence length `n`. diff --git a/docs/_posts/2020-10-28-progressive-layer-dropping-news.md b/docs/_posts/2020-10-28-progressive-layer-dropping-news.md index 3e06f2dfe72c..53a995da93b6 100755 --- a/docs/_posts/2020-10-28-progressive-layer-dropping-news.md +++ b/docs/_posts/2020-10-28-progressive-layer-dropping-news.md @@ -2,10 +2,9 @@ layout: single title: "Progressive Layer Dropping" excerpt: "" -categories: news new_post: true date: 2020-10-29 00:00:00 -tags: training +categories: training --- We introduce a new technology called progressive layer dropping (PLD) to speedup the pre-training of Transformer-based networks through efficient and robust compressed training. The pre-training step of Transformer networks often suffer from unbearable overall computational expenses. We analyze the training dynamics and stability of Transformer networks and propose PLD to sparsely update Transformer blocks following a progressive dropping schedule, which smoothly increases the layer dropping rate for each mini-batch as training evolves along both the temporal and the model depth dimension. PLD is able to allow the pre-training to be **2.5X faster** to get similar accuracy on downstream tasks and allows the training to be **24% faster** when training the same number of samples, not at the cost of excessive hardware resources. diff --git a/docs/_posts/2021-03-08-zero3-offload.md b/docs/_posts/2021-03-08-zero3-offload.md index e9c8cd470eff..ea394e46fc37 100644 --- a/docs/_posts/2021-03-08-zero3-offload.md +++ b/docs/_posts/2021-03-08-zero3-offload.md @@ -2,10 +2,9 @@ layout: single title: "DeepSpeed ZeRO-3 Offload" excerpt: "" -categories: news new_post: true date: 2021-03-08 00:00:00 -tags: training zero +categories: training zero --- Today we are announcing the release of ZeRO-3 Offload, a highly efficient and easy to use implementation of ZeRO Stage 3 and ZeRO Offload combined, geared towards our continued goal of democratizing AI by making efficient large-scale DL training available to everyone. The key benefits of ZeRO-3 Offload are: diff --git a/docs/_posts/2021-05-05-MoQ.md b/docs/_posts/2021-05-05-MoQ.md index a9582c652ab4..8e2255293dbe 100644 --- a/docs/_posts/2021-05-05-MoQ.md +++ b/docs/_posts/2021-05-05-MoQ.md @@ -2,10 +2,9 @@ layout: single title: "Mixture-of-Quantization: A novel quantization approach for reducing model size with minimal accuracy impact" excerpt: "" -categories: news new_post: false date: 2021-05-05 00:00:00 -tags: inference +categories: inference --- ## A unified suite for quantization-aware training and inference diff --git a/docs/_posts/2021-05-05-inference-kernel-optimization.md b/docs/_posts/2021-05-05-inference-kernel-optimization.md index 218387c07f55..933e00fa7449 100644 --- a/docs/_posts/2021-05-05-inference-kernel-optimization.md +++ b/docs/_posts/2021-05-05-inference-kernel-optimization.md @@ -2,10 +2,9 @@ layout: single title: "DeepSpeed Inference: Multi-GPU inference with customized inference kernels and quantization support" excerpt: "" -categories: news new_post: false date: 2021-03-16 00:00:00 -tags: inference +categories: inference --- While DeepSpeed supports training advanced large-scale models, using these trained models in the desired application scenarios is still challenging due to three major limitations in existing inference solutions: 1) lack of support for multi-GPU inference to fit large models and meet latency requirements, 2) limited GPU kernel performance when running inference with small batch sizes, and 3) difficulties in exploiting quantization, which includes both quantizing the model to reduce the model size and latency as well as supporting high-performance inference of quantized models without specialized hardware. diff --git a/docs/_posts/2021-05-14-inference-release.md b/docs/_posts/2021-05-14-inference-release.md index 658627e7115f..2f6755d3679d 100644 --- a/docs/_posts/2021-05-14-inference-release.md +++ b/docs/_posts/2021-05-14-inference-release.md @@ -4,6 +4,5 @@ title: "DeepSpeed: Accelerating large-scale model inference and training via sys date: 2021-05-14 link: https://www.microsoft.com/en-us/research/blog/deepspeed-accelerating-large-scale-model-inference-and-training-via-system-optimizations-and-compression/ excerpt: "Developed by Microsoft AI & Research." -categories: news -tags: inference +categories: inference --- diff --git a/docs/_posts/2021-08-18-deepspeed-moe.md b/docs/_posts/2021-08-18-deepspeed-moe.md index 0cc61f52d0fc..86e27ff2edb7 100644 --- a/docs/_posts/2021-08-18-deepspeed-moe.md +++ b/docs/_posts/2021-08-18-deepspeed-moe.md @@ -2,9 +2,8 @@ layout: single title: "DeepSpeed powers 8x larger MoE model training with high performance" excerpt: "" -categories: news link: https://www.microsoft.com/en-us/research/blog/deepspeed-powers-8x-larger-moe-model-training-with-high-performance/ new_post: true date: 2021-08-18 00:00:00 -tags: training +categories: training --- diff --git a/docs/_posts/2021-11-15-autotuning.md b/docs/_posts/2021-11-15-autotuning.md index 00ea4f1aa27b..509f691e57f1 100644 --- a/docs/_posts/2021-11-15-autotuning.md +++ b/docs/_posts/2021-11-15-autotuning.md @@ -2,10 +2,9 @@ layout: single title: "Autotuning: Automatically discover the optimal DeepSpeed configuration that delivers good training speed" excerpt: "" -categories: news new_post: true date: 2021-11-16 00:00:00 -tags: training +categories: training --- We introduce a new feature called Autotuning to automatically discover the optimal DeepSpeed configuration that delivers good training speed. One pain point in model training is to figure out good performance-relevant configurations such as micro-batch size to fully utilize the hardware and achieve a high throughput number. This configuration exploring process is commonly done manually but is important since model training is repeated many times and benefits from using a good configuration. Not only is the hand-tuning process time-consuming, but the outcome is hardware-dependent. This means that a good configuration on one hardware might not be the best on another different hardware. The user thus has to hand tune the configuration again. With DeepSpeed, there are more configuration parameters that could potentially affect the training speed, thus making it more tedious to manually tune the configuration. diff --git a/docs/_posts/2021-12-09-deepspeed-moe-nlg.md b/docs/_posts/2021-12-09-deepspeed-moe-nlg.md index ea92b791c449..2e2b19c9cc2e 100644 --- a/docs/_posts/2021-12-09-deepspeed-moe-nlg.md +++ b/docs/_posts/2021-12-09-deepspeed-moe-nlg.md @@ -2,10 +2,9 @@ layout: single title: "DeepSpeed-MoE for NLG: Reducing the training cost of language models by 5 times" excerpt: "" -categories: news new_post: false date: 2021-12-09 22:00:00 -tags: training +categories: training --- Published on December 9, 2021 diff --git a/docs/_posts/2022-01-19-moe-inference.md b/docs/_posts/2022-01-19-moe-inference.md index 1554f7bc5fc0..3cfab04b96cc 100644 --- a/docs/_posts/2022-01-19-moe-inference.md +++ b/docs/_posts/2022-01-19-moe-inference.md @@ -2,9 +2,8 @@ layout: single title: "DeepSpeed: Advancing MoE inference and training to power next-generation AI scale" excerpt: "" -categories: news link: https://www.microsoft.com/en-us/research/blog/deepspeed-advancing-moe-inference-and-training-to-power-next-generation-ai-scale/ new_post: true date: 2022-01-19 00:00:00 -tags: inference +categories: inference --- diff --git a/docs/news/index.html b/docs/news/index.html index 95e7974b5050..6cfe0b7a14c1 100644 --- a/docs/news/index.html +++ b/docs/news/index.html @@ -1,3 +1,3 @@ --- -layout: news-home +layout: news --- From 234d7c2bf147053cb1af96438796cb2bfec4542c Mon Sep 17 00:00:00 2001 From: Cheng Li Date: Sat, 26 Feb 2022 14:02:25 +0500 Subject: [PATCH 04/20] refactor posts layout and presentation --- docs/_config.yml | 9 +- docs/_data/navigation.yml | 2 +- docs/_includes/analytics.html | 14 + docs/_includes/archive-single.html | 30 +++ .../author-profile-custom-links.html | 7 + docs/_includes/author-profile.html | 252 ++++++++++++++++++ docs/_includes/breadcrumbs.html | 39 +++ docs/_includes/browser-upgrade.html | 3 + docs/_includes/category-list.html | 26 ++ docs/_includes/comment.html | 22 ++ docs/_includes/comments.html | 159 +++++++++++ docs/_includes/documents-collection.html | 21 ++ docs/_includes/feature_row | 41 +++ docs/_includes/figure | 9 + docs/_includes/footer.html | 19 ++ docs/_includes/gallery | 35 +++ docs/_includes/group-by-array | 47 ++++ docs/_includes/head.html | 37 +++ docs/_includes/masthead.html | 37 +++ docs/_includes/nav_list | 26 ++ docs/_includes/page__date.html | 5 + docs/_includes/page__hero.html | 51 ++++ docs/_includes/page__hero_video.html | 2 + docs/_includes/page__meta.html | 30 +++ docs/_includes/page__taxonomy.html | 7 + docs/_includes/paginator.html | 69 +++++ docs/_includes/post_pagination.html | 14 + docs/_includes/posts-category.html | 5 + docs/_includes/posts-tag.html | 5 + docs/_includes/scripts.html | 28 ++ docs/_includes/seo.html | 155 +++++++++++ docs/_includes/sidebar.html | 19 ++ docs/_includes/skip-links.html | 8 + docs/_includes/social-share.html | 11 + docs/_includes/tag-list.html | 26 ++ docs/_includes/toc | 7 + docs/_includes/toc.html | 96 +++++++ docs/_includes/video | 24 ++ docs/_layouts/feed.html | 22 -- docs/_layouts/news-home.html | 67 ----- docs/_layouts/single-full.html | 82 ++++++ docs/_layouts/tags.html | 16 -- docs/_posts/2020-02-13-release.md | 3 +- docs/_posts/2020-02-13-turing-nlg.md | 3 +- docs/_posts/2020-05-19-bert-record.md | 4 +- docs/_posts/2020-05-19-press-release.md | 4 +- docs/_posts/2020-05-19-zero-stage2.md | 4 +- .../2020-05-28-fastest-bert-training.md | 4 +- docs/_posts/2020-07-24-deepspeed-webinar.md | 4 +- docs/_posts/2020-08-07-webinar-on-demand.md | 4 +- .../2020-09-08-sparse-attention-news.md | 4 +- docs/_posts/2020-09-09-ZeRO-Offload.md | 4 +- .../2020-09-09-onebit-adam-blog-post.md | 4 +- docs/_posts/2020-09-09-onebit-adam-news.md | 4 +- .../_posts/2020-09-09-pipeline-parallelism.md | 4 +- docs/_posts/2020-09-09-sparse-attention.md | 4 +- ...0-10-28-progressive-layer-dropping-news.md | 4 +- docs/_posts/2021-03-08-zero3-offload.md | 4 +- docs/_posts/2021-05-05-MoQ.md | 4 +- ...021-05-05-inference-kernel-optimization.md | 4 +- docs/_posts/2021-05-14-inference-release.md | 3 +- docs/_posts/2021-08-18-deepspeed-moe.md | 4 +- docs/_posts/2021-11-15-autotuning.md | 6 +- docs/_posts/2021-12-09-deepspeed-moe-nlg.md | 6 +- docs/_posts/2022-01-19-moe-inference.md | 4 +- docs/_sass/minimal-mistakes.scss | 65 +++++ docs/assets/css/main.scss | 64 +++++ docs/news/index.html | 3 - docs/{_layouts/news.html => tags.md} | 30 ++- docs/tags_list.md | 29 ++ 70 files changed, 1675 insertions(+), 193 deletions(-) create mode 100644 docs/_includes/analytics.html create mode 100644 docs/_includes/archive-single.html create mode 100644 docs/_includes/author-profile-custom-links.html create mode 100644 docs/_includes/author-profile.html create mode 100644 docs/_includes/breadcrumbs.html create mode 100644 docs/_includes/browser-upgrade.html create mode 100644 docs/_includes/category-list.html create mode 100644 docs/_includes/comment.html create mode 100644 docs/_includes/comments.html create mode 100644 docs/_includes/documents-collection.html create mode 100644 docs/_includes/feature_row create mode 100644 docs/_includes/figure create mode 100644 docs/_includes/footer.html create mode 100644 docs/_includes/gallery create mode 100644 docs/_includes/group-by-array create mode 100644 docs/_includes/head.html create mode 100644 docs/_includes/masthead.html create mode 100644 docs/_includes/nav_list create mode 100644 docs/_includes/page__date.html create mode 100644 docs/_includes/page__hero.html create mode 100644 docs/_includes/page__hero_video.html create mode 100644 docs/_includes/page__meta.html create mode 100644 docs/_includes/page__taxonomy.html create mode 100644 docs/_includes/paginator.html create mode 100644 docs/_includes/post_pagination.html create mode 100644 docs/_includes/posts-category.html create mode 100644 docs/_includes/posts-tag.html create mode 100644 docs/_includes/scripts.html create mode 100644 docs/_includes/seo.html create mode 100644 docs/_includes/sidebar.html create mode 100644 docs/_includes/skip-links.html create mode 100644 docs/_includes/social-share.html create mode 100644 docs/_includes/tag-list.html create mode 100644 docs/_includes/toc create mode 100644 docs/_includes/toc.html create mode 100644 docs/_includes/video delete mode 100644 docs/_layouts/feed.html delete mode 100644 docs/_layouts/news-home.html create mode 100644 docs/_layouts/single-full.html delete mode 100644 docs/_layouts/tags.html create mode 100644 docs/_sass/minimal-mistakes.scss delete mode 100644 docs/news/index.html rename docs/{_layouts/news.html => tags.md} (63%) create mode 100644 docs/tags_list.md diff --git a/docs/_config.yml b/docs/_config.yml index 87cc953255fe..e5642cbfcded 100644 --- a/docs/_config.yml +++ b/docs/_config.yml @@ -78,8 +78,15 @@ defaults: path: "" type: posts values: - layout: single + layout: single-full + classes: wide2 + author_profile: false + read_time: false + comments: false share: true + related: false + toc: false + show_date: true analytics: provider: "google-gtag" diff --git a/docs/_data/navigation.yml b/docs/_data/navigation.yml index 00a3184b5b2e..9fa5e12b4d79 100755 --- a/docs/_data/navigation.yml +++ b/docs/_data/navigation.yml @@ -2,7 +2,7 @@ main: - title: 'Getting Started' url: /getting-started/ - title: 'Blog' - url: /news/ + url: /posts/ - title: 'Tutorials' url: /tutorials/ - title: 'Documentation' diff --git a/docs/_includes/analytics.html b/docs/_includes/analytics.html new file mode 100644 index 000000000000..371469f0af9b --- /dev/null +++ b/docs/_includes/analytics.html @@ -0,0 +1,14 @@ +{% if jekyll.environment == 'production' and site.analytics.provider and page.analytics != false %} + +{% case site.analytics.provider %} +{% when "google" %} + {% include /analytics-providers/google.html %} +{% when "google-universal" %} + {% include /analytics-providers/google-universal.html %} +{% when "google-gtag" %} + {% include /analytics-providers/google-gtag.html %} +{% when "custom" %} + {% include /analytics-providers/custom.html %} +{% endcase %} + +{% endif %} \ No newline at end of file diff --git a/docs/_includes/archive-single.html b/docs/_includes/archive-single.html new file mode 100644 index 000000000000..68174807ef50 --- /dev/null +++ b/docs/_includes/archive-single.html @@ -0,0 +1,30 @@ +{% if post.header.teaser %} + {% capture teaser %}{{ post.header.teaser }}{% endcapture %} +{% else %} + {% assign teaser = site.teaser %} +{% endif %} + +{% if post.id %} + {% assign title = post.title | markdownify | remove: "

" | remove: "

" %} +{% else %} + {% assign title = post.title %} +{% endif %} + +
+
+ {% if include.type == "grid" and teaser %} +
+ +
+ {% endif %} +

+ {% if post.link %} + {{ title }} Permalink + {% else %} + {{ title }} + {% endif %} +

+ {% include page__meta.html type=include.type %} + {% if post.excerpt %}

{{ post.excerpt | markdownify | strip_html | truncate: 160 }}

{% endif %} +
+
diff --git a/docs/_includes/author-profile-custom-links.html b/docs/_includes/author-profile-custom-links.html new file mode 100644 index 000000000000..b89ffcb7df6c --- /dev/null +++ b/docs/_includes/author-profile-custom-links.html @@ -0,0 +1,7 @@ + \ No newline at end of file diff --git a/docs/_includes/author-profile.html b/docs/_includes/author-profile.html new file mode 100644 index 000000000000..d384ee734376 --- /dev/null +++ b/docs/_includes/author-profile.html @@ -0,0 +1,252 @@ +{% assign author = page.author | default: page.authors[0] | default: site.author %} +{% assign author = site.data.authors[author] | default: author %} + +
+ + {% if author.avatar %} +
+ {% if author.home %} + + {{ author.name }} + + {% else %} + {{ author.name }} + {% endif %} +
+ {% endif %} + +
+ {% if author.home %} +

{{ author.name }}

+ {% else %} +

{{ author.name }}

+ {% endif %} + {% if author.bio %} +
+ {{ author.bio | markdownify }} +
+ {% endif %} +
+ +
+ + +
+
diff --git a/docs/_includes/breadcrumbs.html b/docs/_includes/breadcrumbs.html new file mode 100644 index 000000000000..cba3d415fa4f --- /dev/null +++ b/docs/_includes/breadcrumbs.html @@ -0,0 +1,39 @@ +{% case site.category_archive.type %} + {% when "liquid" %} + {% assign path_type = "#" %} + {% when "jekyll-archives" %} + {% assign path_type = nil %} +{% endcase %} + +{% if page.collection != 'posts' %} + {% assign path_type = nil %} + {% assign crumb_path = '/' %} +{% else %} + {% assign crumb_path = site.category_archive.path %} +{% endif %} + + diff --git a/docs/_includes/browser-upgrade.html b/docs/_includes/browser-upgrade.html new file mode 100644 index 000000000000..ec6ad0acc566 --- /dev/null +++ b/docs/_includes/browser-upgrade.html @@ -0,0 +1,3 @@ + diff --git a/docs/_includes/category-list.html b/docs/_includes/category-list.html new file mode 100644 index 000000000000..d684a2829f3b --- /dev/null +++ b/docs/_includes/category-list.html @@ -0,0 +1,26 @@ +{% case site.category_archive.type %} + {% when "liquid" %} + {% assign path_type = "#" %} + {% when "jekyll-archives" %} + {% assign path_type = nil %} +{% endcase %} + +{% if site.category_archive.path %} + {% comment %} + + + {% endcomment %} + {% capture page_categories %}{% for category in page.categories %}{{ category | downcase }}|{{ category }}{% unless forloop.last %},{% endunless %}{% endfor %}{% endcapture %} + {% assign category_hashes = page_categories | split: ',' | sort %} + +

+ {{ site.data.ui-text[site.locale].categories_label | default: "Categories:" }} + + {% for hash in category_hashes %} + {% assign keyValue = hash | split: '|' %} + {% capture category_word %}{{ keyValue[1] | strip_newlines }}{% endcapture %} + {% unless forloop.last %}, {% endunless %} + {% endfor %} + +

+{% endif %} \ No newline at end of file diff --git a/docs/_includes/comment.html b/docs/_includes/comment.html new file mode 100644 index 000000000000..2e3013ee2a76 --- /dev/null +++ b/docs/_includes/comment.html @@ -0,0 +1,22 @@ + diff --git a/docs/_includes/comments.html b/docs/_includes/comments.html new file mode 100644 index 000000000000..dbb90d1e3a0f --- /dev/null +++ b/docs/_includes/comments.html @@ -0,0 +1,159 @@ +
+ {% capture comments_label %}{{ site.data.ui-text[site.locale].comments_label | default: "Comments" }}{% endcapture %} + {% case site.comments.provider %} + {% when "discourse" %} +

{{ comments_label }}

+
+ {% when "disqus" %} +

{{ comments_label }}

+
+ {% when "facebook" %} +

{{ comments_label }}

+
+ {% when "staticman_v2" %} +
+ {% if site.repository and site.comments.staticman.branch %} + +
+ {% if site.data.comments[page.slug] %} +

{{ site.data.ui-text[site.locale].comments_title | default: "Comments" }}

+ {% assign comments = site.data.comments[page.slug] | sort %} + + {% for comment in comments %} + {% assign email = comment[1].email %} + {% assign name = comment[1].name %} + {% assign url = comment[1].url %} + {% assign date = comment[1].date %} + {% assign message = comment[1].message %} + {% include comment.html index=forloop.index email=email name=name url=url date=date message=message %} + {% endfor %} + {% endif %} +
+ + + +
+

{{ site.data.ui-text[site.locale].comments_label | default: "Leave a Comment" }}

+

{{ site.data.ui-text[site.locale].comment_form_info | default: "Your email address will not be published. Required fields are marked" }} *

+
+
+ + {{ site.data.ui-text[site.locale].loading_label | default: "Loading..." }} +
+ +
+ + + +
+
+ + +
+
+ + +
+
+ + +
+ + + + + {% if site.reCaptcha.siteKey %} +
+
+
+ {% endif %} +
+ +
+
+
+ + {% if site.reCaptcha.siteKey %}{% endif %} + {% endif %} +
+ {% when "staticman" %} +
+ {% if site.repository and site.staticman.branch %} + +
+ {% if site.data.comments[page.slug] %} +

{{ site.data.ui-text[site.locale].comments_title | default: "Comments" }}

+ {% assign comments = site.data.comments[page.slug] | sort %} + + {% for comment in comments %} + {% assign email = comment[1].email %} + {% assign name = comment[1].name %} + {% assign url = comment[1].url %} + {% assign date = comment[1].date %} + {% assign message = comment[1].message %} + {% include comment.html index=forloop.index email=email name=name url=url date=date message=message %} + {% endfor %} + {% endif %} +
+ + + +
+

{{ site.data.ui-text[site.locale].comments_label | default: "Leave a Comment" }}

+

{{ site.data.ui-text[site.locale].comment_form_info | default: "Your email address will not be published. Required fields are marked" }} *

+
+
+ + {{ site.data.ui-text[site.locale].loading_label | default: "Loading..." }} +
+ +
+ + + +
+
+ + +
+
+ + +
+
+ + +
+ + + + +
+ +
+
+
+ + {% endif %} +
+ {% when "utterances" %} +

{{ comments_label }}

+
+ {% when "custom" %} + {% include /comments-providers/custom.html %} + {% endcase %} +
diff --git a/docs/_includes/documents-collection.html b/docs/_includes/documents-collection.html new file mode 100644 index 000000000000..376a509e3e49 --- /dev/null +++ b/docs/_includes/documents-collection.html @@ -0,0 +1,21 @@ +{% assign entries = site[include.collection] %} + +{% if include.sort_by == 'title' %} + {% if include.sort_order == 'reverse' %} + {% assign entries = entries | sort: 'title' | reverse %} + {% else %} + {% assign entries = entries | sort: 'title' %} + {% endif %} +{% elsif include.sort_by == 'date' %} + {% if include.sort_order == 'reverse' %} + {% assign entries = entries | sort: 'date' | reverse %} + {% else %} + {% assign entries = entries | sort: 'date' %} + {% endif %} +{% endif %} + +{%- for post in entries -%} + {%- unless post.hidden -%} + {% include archive-single.html %} + {%- endunless -%} +{%- endfor -%} diff --git a/docs/_includes/feature_row b/docs/_includes/feature_row new file mode 100644 index 000000000000..03f09c15cf0c --- /dev/null +++ b/docs/_includes/feature_row @@ -0,0 +1,41 @@ +{% if include.id %} + {% assign feature_row = page[include.id] %} +{% else %} + {% assign feature_row = page.feature_row %} +{% endif %} + +
+ + {% for f in feature_row %} +
+
+ {% if f.image_path %} +
+ {% if f.alt %}{{ f.alt }}{% endif %} + {% if f.image_caption %} + {{ f.image_caption | markdownify | remove: "

" | remove: "

" }}
+ {% endif %} +
+ {% endif %} + +
+ {% if f.title %} +

{{ f.title }}

+ {% endif %} + + {% if f.excerpt %} +
+ {{ f.excerpt | markdownify }} +
+ {% endif %} + + {% if f.url %} +

{{ f.btn_label | default: site.data.ui-text[site.locale].more_label | default: "Learn More" }}

+ {% endif %} +
+
+
+ {% endfor %} + +
diff --git a/docs/_includes/figure b/docs/_includes/figure new file mode 100644 index 000000000000..f1ce1ebcc110 --- /dev/null +++ b/docs/_includes/figure @@ -0,0 +1,9 @@ +
+ {% if include.alt %}{{ include.alt }}{% endif %} + {% if include.caption %} +
+ {{ include.caption | markdownify | remove: "

" | remove: "

" }} +
+ {% endif %} +
diff --git a/docs/_includes/footer.html b/docs/_includes/footer.html new file mode 100644 index 000000000000..2bc78963ce39 --- /dev/null +++ b/docs/_includes/footer.html @@ -0,0 +1,19 @@ + + + diff --git a/docs/_includes/gallery b/docs/_includes/gallery new file mode 100644 index 000000000000..71a9e1e1b3d1 --- /dev/null +++ b/docs/_includes/gallery @@ -0,0 +1,35 @@ +{% if include.id %} + {% assign gallery = page[include.id] %} +{% else %} + {% assign gallery = page.gallery %} +{% endif %} + +{% if include.layout %} + {% assign gallery_layout = include.layout %} +{% else %} + {% if gallery.size == 2 %} + {% assign gallery_layout = 'half' %} + {% elsif gallery.size >= 3 %} + {% assign gallery_layout = 'third' %} + {% else %} + {% assign gallery_layout = '' %} + {% endif %} +{% endif %} + + diff --git a/docs/_includes/group-by-array b/docs/_includes/group-by-array new file mode 100644 index 000000000000..708de41ae3f8 --- /dev/null +++ b/docs/_includes/group-by-array @@ -0,0 +1,47 @@ + + + +{% assign __empty_array = '' | split: ',' %} +{% assign group_names = __empty_array %} +{% assign group_items = __empty_array %} + + +{% assign __names = include.collection | map: include.field %} + + +{% assign __names = __names | join: ',' | join: ',' | split: ',' %} + + +{% assign __names = __names | sort %} +{% for name in __names %} + + +{% unless name == previous %} + + +{% assign group_names = group_names | push: name %} +{% endunless %} + +{% assign previous = name %} +{% endfor %} + + + +{% for name in group_names %} + + +{% assign __item = __empty_array %} +{% for __element in include.collection %} +{% if __element[include.field] contains name %} +{% assign __item = __item | push: __element %} +{% endif %} +{% endfor %} + + +{% assign group_items = group_items | push: __item %} +{% endfor %} \ No newline at end of file diff --git a/docs/_includes/head.html b/docs/_includes/head.html new file mode 100644 index 000000000000..3b99471f5ec5 --- /dev/null +++ b/docs/_includes/head.html @@ -0,0 +1,37 @@ + + +{% include seo.html %} + + + + + + + + + + + + + + +{% if site.head_scripts %} + {% for script in site.head_scripts %} + + {% endfor %} +{% endif %} diff --git a/docs/_includes/masthead.html b/docs/_includes/masthead.html new file mode 100644 index 000000000000..47cce0a3545e --- /dev/null +++ b/docs/_includes/masthead.html @@ -0,0 +1,37 @@ +{% capture logo_path %}{{ site.logo }}{% endcapture %} + +
+
+
+ +
+
+
diff --git a/docs/_includes/nav_list b/docs/_includes/nav_list new file mode 100644 index 000000000000..a035a5bd7b15 --- /dev/null +++ b/docs/_includes/nav_list @@ -0,0 +1,26 @@ +{% assign navigation = site.data.navigation[include.nav] %} + + diff --git a/docs/_includes/page__date.html b/docs/_includes/page__date.html new file mode 100644 index 000000000000..e663f9b9c7f0 --- /dev/null +++ b/docs/_includes/page__date.html @@ -0,0 +1,5 @@ +{% if page.last_modified_at %} +

{{ site.data.ui-text[site.locale].date_label | default: "Updated:" }}

+{% elsif page.date %} +

{{ site.data.ui-text[site.locale].date_label | default: "Updated:" }}

+{% endif %} diff --git a/docs/_includes/page__hero.html b/docs/_includes/page__hero.html new file mode 100644 index 000000000000..3f55aaa60ac5 --- /dev/null +++ b/docs/_includes/page__hero.html @@ -0,0 +1,51 @@ +{% capture overlay_img_path %}{{ page.header.overlay_image | relative_url }}{% endcapture %} + +{% if page.header.overlay_filter contains "rgba" %} + {% capture overlay_filter %}{{ page.header.overlay_filter }}{% endcapture %} +{% elsif page.header.overlay_filter %} + {% capture overlay_filter %}rgba(0, 0, 0, {{ page.header.overlay_filter }}){% endcapture %} +{% endif %} + +{% if page.header.image_description %} + {% assign image_description = page.header.image_description %} +{% else %} + {% assign image_description = page.title %} +{% endif %} + +{% assign image_description = image_description | markdownify | strip_html | strip_newlines | escape_once %} + +
+ {% if page.header.overlay_color or page.header.overlay_image %} +
+

+ {% if paginator and site.paginate_show_page_num %} + {{ site.title }}{% unless paginator.page == 1 %} {{ site.data.ui-text[site.locale].page | default: "Page" }} {{ paginator.page }}{% endunless %} + {% else %} + {{ page.title | default: site.title | markdownify | remove: "

" | remove: "

" }} + {% endif %} +

+ {% if page.tagline %} +

{{ page.tagline | markdownify | remove: "

" | remove: "

" }}

+ {% elsif page.header.show_overlay_excerpt != false and page.excerpt %} +

{{ page.excerpt | markdownify | remove: "

" | remove: "

" }}

+ {% endif %} + {% include page__meta.html %} + {% if page.header.cta_url %} +

{{ page.header.cta_label | default: site.data.ui-text[site.locale].more_label | default: "Learn More" }}

+ {% endif %} + {% if page.header.actions %} +

+ {% for action in page.header.actions %} + {{ action.label | default: site.data.ui-text[site.locale].more_label | default: "Learn More" }} + {% endfor %} + {% endif %} +

+ {% else %} + {{ image_description }} + {% endif %} + {% if page.header.caption %} + {{ page.header.caption | markdownify | remove: "

" | remove: "

" }}
+ {% endif %} +
diff --git a/docs/_includes/page__hero_video.html b/docs/_includes/page__hero_video.html new file mode 100644 index 000000000000..a313a23d45b9 --- /dev/null +++ b/docs/_includes/page__hero_video.html @@ -0,0 +1,2 @@ +{% assign video = page.header.video %} +{% include video id=video.id provider=video.provider danmaku=video.danmaku %} diff --git a/docs/_includes/page__meta.html b/docs/_includes/page__meta.html new file mode 100644 index 000000000000..1afc3d8f85eb --- /dev/null +++ b/docs/_includes/page__meta.html @@ -0,0 +1,30 @@ +{% assign document = post | default: page %} +{% if document.read_time or document.show_date %} +

+ {% if document.show_date and document.date %} + {% assign date = document.date %} + + + + + {% endif %} + + {% if document.read_time and document.show_date %}{% endif %} + + {% if document.read_time %} + {% assign words_per_minute = document.words_per_minute | default: site.words_per_minute | default: 200 %} + {% assign words = document.content | strip_html | number_of_words %} + + + + {% if words < words_per_minute %} + {{ site.data.ui-text[site.locale].less_than | default: "less than" }} 1 {{ site.data.ui-text[site.locale].minute_read | default: "minute read" }} + {% elsif words == words_per_minute %} + 1 {{ site.data.ui-text[site.locale].minute_read | default: "minute read" }} + {% else %} + {{ words | divided_by: words_per_minute }} {{ site.data.ui-text[site.locale].minute_read | default: "minute read" }} + {% endif %} + + {% endif %} +

+{% endif %} diff --git a/docs/_includes/page__taxonomy.html b/docs/_includes/page__taxonomy.html new file mode 100644 index 000000000000..75c76c81ddf2 --- /dev/null +++ b/docs/_includes/page__taxonomy.html @@ -0,0 +1,7 @@ +{% if site.tag_archive.type and page.tags[0] %} + {% include tag-list.html %} +{% endif %} + +{% if site.category_archive.type and page.categories[0] %} + {% include category-list.html %} +{% endif %} \ No newline at end of file diff --git a/docs/_includes/paginator.html b/docs/_includes/paginator.html new file mode 100644 index 000000000000..bffa0794678e --- /dev/null +++ b/docs/_includes/paginator.html @@ -0,0 +1,69 @@ +{% if paginator.total_pages > 1 %} + +{% endif %} diff --git a/docs/_includes/post_pagination.html b/docs/_includes/post_pagination.html new file mode 100644 index 000000000000..a93c6279763b --- /dev/null +++ b/docs/_includes/post_pagination.html @@ -0,0 +1,14 @@ +{% if page.previous or page.next %} + +{% endif %} \ No newline at end of file diff --git a/docs/_includes/posts-category.html b/docs/_includes/posts-category.html new file mode 100644 index 000000000000..b364f30e94e5 --- /dev/null +++ b/docs/_includes/posts-category.html @@ -0,0 +1,5 @@ +{%- for post in site.categories[include.taxonomy] -%} + {%- unless post.hidden -%} + {% include archive-single.html %} + {%- endunless -%} +{%- endfor -%} diff --git a/docs/_includes/posts-tag.html b/docs/_includes/posts-tag.html new file mode 100644 index 000000000000..46fade02a01f --- /dev/null +++ b/docs/_includes/posts-tag.html @@ -0,0 +1,5 @@ +{%- for post in site.tags[include.taxonomy] -%} + {%- unless post.hidden -%} + {% include archive-single.html %} + {%- endunless -%} +{%- endfor -%} diff --git a/docs/_includes/scripts.html b/docs/_includes/scripts.html new file mode 100644 index 000000000000..bbdaddff0bab --- /dev/null +++ b/docs/_includes/scripts.html @@ -0,0 +1,28 @@ +{% if site.footer_scripts %} + {% for script in site.footer_scripts %} + + {% endfor %} +{% else %} + +{% endif %} + +{% if site.search == true or page.layout == "search" %} + {%- assign search_provider = site.search_provider | default: "lunr" -%} + {%- case search_provider -%} + {%- when "lunr" -%} + {% include_cached search/lunr-search-scripts.html %} + {%- when "google" -%} + {% include_cached search/google-search-scripts.html %} + {%- when "algolia" -%} + {% include_cached search/algolia-search-scripts.html %} + {%- endcase -%} +{% endif %} + +{% include analytics.html %} +{% include /comments-providers/scripts.html %} + +{% if site.after_footer_scripts %} + {% for script in site.after_footer_scripts %} + + {% endfor %} +{% endif %} diff --git a/docs/_includes/seo.html b/docs/_includes/seo.html new file mode 100644 index 000000000000..7df1253b7f85 --- /dev/null +++ b/docs/_includes/seo.html @@ -0,0 +1,155 @@ + +{%- if site.url -%} + {%- assign seo_url = site.url | append: site.baseurl -%} +{%- endif -%} +{%- assign seo_url = seo_url | default: site.github.url -%} + +{% assign title_separator = site.title_separator | default: '-' | replace: '|', '|' %} + +{%- if page.title -%} + {%- assign seo_title = page.title | append: " " | append: title_separator | append: " " | append: site.title -%} +{%- endif -%} + +{%- if seo_title -%} + {%- assign seo_title = seo_title | markdownify | strip_html | strip_newlines | escape_once -%} +{%- endif -%} + +{% if page.canonical_url %} + {%- assign canonical_url = page.canonical_url %} +{% else %} + {%- assign canonical_url = page.url | replace: "index.html", "" | absolute_url %} +{% endif %} + +{%- assign seo_description = page.description | default: page.excerpt | default: site.description -%} +{%- if seo_description -%} + {%- assign seo_description = seo_description | markdownify | strip_html | newline_to_br | strip_newlines | replace: '
', ' ' | escape_once | strip -%} +{%- endif -%} + +{%- assign author = page.author | default: page.authors[0] | default: site.author -%} +{%- assign author = site.data.authors[author] | default: author -%} + +{%- if author.twitter -%} + {%- assign author_twitter = author.twitter | replace: "@", "" -%} +{%- endif -%} + +{%- assign page_large_image = page.header.og_image | default: page.header.overlay_image | default: page.header.image | absolute_url -%} +{%- assign page_large_image = page_large_image | escape -%} + +{%- assign page_teaser_image = page.header.teaser | default: site.og_image | absolute_url -%} +{%- assign page_teaser_image = page_teaser_image | escape -%} + +{%- assign site_og_image = site.og_image | absolute_url -%} +{%- assign site_og_image = site_og_image | escape -%} + +{%- if page.date -%} + {%- assign og_type = "article" -%} +{%- else -%} + {%- assign og_type = "website" -%} +{%- endif -%} + +{{ seo_title | default: site.title }}{% if paginator %}{% unless paginator.page == 1 %} {{ title_separator }} {{ site.data.ui-text[site.locale].page | default: "Page" }} {{ paginator.page }}{% endunless %}{% endif %} + + +{% if author.name %} + + {% if og_type == "article" %} + + {% endif %} +{% endif %} + + + + + + + +{% if seo_description %} + +{% endif %} + +{% if page_large_image %} + +{% elsif page_teaser_image %} + +{% endif %} + +{% if site.twitter.username %} + + + + + + {% if page_large_image %} + + + {% else %} + + {% if page_teaser_image %} + + {% endif %} + {% endif %} + + {% if author_twitter %} + + {% endif %} +{% endif %} + +{% if page.date %} + +{% endif %} + +{% if og_type == "article" and page.last_modified_at %} + +{% endif %} + +{% if site.facebook %} + {% if site.facebook.publisher %} + + {% endif %} + + {% if site.facebook.app_id %} + + {% endif %} +{% endif %} + + + +{% if paginator.previous_page %} + +{% endif %} +{% if paginator.next_page %} + +{% endif %} + + + +{% if site.google_site_verification %} + +{% endif %} +{% if site.bing_site_verification %} + +{% endif %} +{% if site.alexa_site_verification %} + +{% endif %} +{% if site.yandex_site_verification %} + +{% endif %} +{% if site.naver_site_verification %} + +{% endif %} + diff --git a/docs/_includes/sidebar.html b/docs/_includes/sidebar.html new file mode 100644 index 000000000000..a4ca1ca78151 --- /dev/null +++ b/docs/_includes/sidebar.html @@ -0,0 +1,19 @@ +{% if page.author_profile or layout.author_profile or page.sidebar %} + +{% endif %} diff --git a/docs/_includes/skip-links.html b/docs/_includes/skip-links.html new file mode 100644 index 000000000000..2cd9f17d814a --- /dev/null +++ b/docs/_includes/skip-links.html @@ -0,0 +1,8 @@ + diff --git a/docs/_includes/social-share.html b/docs/_includes/social-share.html new file mode 100644 index 000000000000..0b377982b268 --- /dev/null +++ b/docs/_includes/social-share.html @@ -0,0 +1,11 @@ + diff --git a/docs/_includes/tag-list.html b/docs/_includes/tag-list.html new file mode 100644 index 000000000000..e0d02bfa561a --- /dev/null +++ b/docs/_includes/tag-list.html @@ -0,0 +1,26 @@ +{% case site.tag_archive.type %} + {% when "liquid" %} + {% assign path_type = "#" %} + {% when "jekyll-archives" %} + {% assign path_type = nil %} +{% endcase %} + +{% if site.tag_archive.path %} + {% comment %} + + + {% endcomment %} + {% capture page_tags %}{% for tag in page.tags %}{{ tag | downcase }}|{{ tag }}{% unless forloop.last %},{% endunless %}{% endfor %}{% endcapture %} + {% assign tag_hashes = page_tags | split: ',' | sort %} + +

+ {{ site.data.ui-text[site.locale].tags_label | default: "Tags:" }} + + {% for hash in tag_hashes %} + {% assign keyValue = hash | split: '|' %} + {% capture tag_word %}{{ keyValue[1] | strip_newlines }}{% endcapture %} + {% unless forloop.last %}, {% endunless %} + {% endfor %} + +

+{% endif %} \ No newline at end of file diff --git a/docs/_includes/toc b/docs/_includes/toc new file mode 100644 index 000000000000..6423ccdc72e1 --- /dev/null +++ b/docs/_includes/toc @@ -0,0 +1,7 @@ + \ No newline at end of file diff --git a/docs/_includes/toc.html b/docs/_includes/toc.html new file mode 100644 index 000000000000..25b9f6a382b0 --- /dev/null +++ b/docs/_includes/toc.html @@ -0,0 +1,96 @@ +{% capture tocWorkspace %} + {% comment %} + Version 1.0.8 + https://github.com/allejo/jekyll-toc + + "...like all things liquid - where there's a will, and ~36 hours to spare, there's usually a/some way" ~jaybe + + Usage: + {% include toc.html html=content sanitize=true class="inline_toc" id="my_toc" h_min=2 h_max=3 %} + + Parameters: + * html (string) - the HTML of compiled markdown generated by kramdown in Jekyll + + Optional Parameters: + * sanitize (bool) : false - when set to true, the headers will be stripped of any HTML in the TOC + * class (string) : '' - a CSS class assigned to the TOC + * id (string) : '' - an ID to assigned to the TOC + * h_min (int) : 1 - the minimum TOC header level to use; any header lower than this value will be ignored + * h_max (int) : 6 - the maximum TOC header level to use; any header greater than this value will be ignored + * ordered (bool) : false - when set to true, an ordered list will be outputted instead of an unordered list + * item_class (string) : '' - add custom class(es) for each list item; has support for '%level%' placeholder, which is the current heading level + * baseurl (string) : '' - add a base url to the TOC links for when your TOC is on another page than the actual content + * anchor_class (string) : '' - add custom class(es) for each anchor element + + Output: + An ordered or unordered list representing the table of contents of a markdown block. This snippet will only + generate the table of contents and will NOT output the markdown given to it + {% endcomment %} + + {% capture my_toc %}{% endcapture %} + {% assign orderedList = include.ordered | default: false %} + {% assign minHeader = include.h_min | default: 1 %} + {% assign maxHeader = include.h_max | default: 6 %} + {% assign nodes = include.html | split: ' maxHeader %} + {% continue %} + {% endif %} + + {% if firstHeader %} + {% assign firstHeader = false %} + {% assign minHeader = headerLevel %} + {% endif %} + + {% assign indentAmount = headerLevel | minus: minHeader %} + {% assign _workspace = node | split: '' | first }}>{% endcapture %} + {% assign header = _workspace[0] | replace: _hAttrToStrip, '' %} + + {% assign space = '' %} + {% for i in (1..indentAmount) %} + {% assign space = space | prepend: ' ' %} + {% endfor %} + + {% unless include.item_class == blank %} + {% capture listItemClass %}{:.{{ include.item_class | replace: '%level%', headerLevel }}}{% endcapture %} + {% endunless %} + + {% capture heading_body %}{% if include.sanitize %}{{ header | strip_html }}{% else %}{{ header }}{% endif %}{% endcapture %} + {% capture my_toc %}{{ my_toc }} +{{ space }}{{ listModifier }} {{ listItemClass }} [{{ heading_body | replace: "|", "\|" }}]({% if include.baseurl %}{{ include.baseurl }}{% endif %}#{{ html_id }}){% if include.anchor_class %}{:.{{ include.anchor_class }}}{% endif %}{% endcapture %} + {% endfor %} + + {% if include.class %} + {% capture my_toc %}{:.{{ include.class }}} +{{ my_toc | lstrip }}{% endcapture %} + {% endif %} + + {% if include.id %} + {% capture my_toc %}{: #{{ include.id }}} +{{ my_toc | lstrip }}{% endcapture %} + {% endif %} +{% endcapture %}{% assign tocWorkspace = '' %}{{ my_toc | markdownify | strip }} diff --git a/docs/_includes/video b/docs/_includes/video new file mode 100644 index 000000000000..d653fd641df6 --- /dev/null +++ b/docs/_includes/video @@ -0,0 +1,24 @@ +{% capture video_id %}{{ include.id }}{% endcapture %} +{% capture video_provider %}{{ include.provider }}{% endcapture %} +{% capture video_danmaku %}{{ include.danmaku | default: 0 }}{% endcapture %} + +{% capture video_src %} + {% case video_provider %} + {% when "vimeo" %} + https://player.vimeo.com/video/{{ video_id }}?dnt=true + {% when "youtube" %} + https://www.youtube-nocookie.com/embed/{{ video_id }} + {% when "google-drive" %} + https://drive.google.com/file/d/{{ video_id }}/preview + {% when "bilibili" %} + https://player.bilibili.com/player.html?bvid={{ video_id }}&page=1&as_wide=1&high_quality=1&danmaku={{ video_danmaku }} + {% endcase %} +{% endcapture %} +{% assign video_src = video_src | strip %} + + +{% unless video_src == "" %} +
+ +
+{% endunless %} diff --git a/docs/_layouts/feed.html b/docs/_layouts/feed.html deleted file mode 100644 index 83b137b6908e..000000000000 --- a/docs/_layouts/feed.html +++ /dev/null @@ -1,22 +0,0 @@ - - - {{ site.title }} - - - {{ site.time | date_to_xmlschema }} - {{ site.url }} - - {{ site.author.name }} - {{ site.author.email }} - - - {% for post in site.posts %} {% if post.tags contains page.tag-name %} - - {{ post.title | xml_escape }} - - {{ post.date | date_to_xmlschema }} - {{ site.url }}{{ post.id }} - {{ post.content | xml_escape }} - - {% endif %} {% endfor %} - diff --git a/docs/_layouts/news-home.html b/docs/_layouts/news-home.html deleted file mode 100644 index 0ebdf0ea609f..000000000000 --- a/docs/_layouts/news-home.html +++ /dev/null @@ -1,67 +0,0 @@ ---- -layout: archive ---- - -{{ content }} - - -{% if paginator %} - {% assign posts = paginator.posts %} -{% else %} - {% assign posts = site.posts %} -{% endif %} - - - -
- {% for post in site.posts %} -
- - - -
- {% endfor %} -
- - - - - - - - - - diff --git a/docs/_layouts/single-full.html b/docs/_layouts/single-full.html new file mode 100644 index 000000000000..8dce93ad491b --- /dev/null +++ b/docs/_layouts/single-full.html @@ -0,0 +1,82 @@ +--- +layout: default +--- + +{% if page.header.overlay_color or page.header.overlay_image or page.header.image %} + {% include page__hero.html %} +{% elsif page.header.video.id and page.header.video.provider %} + {% include page__hero_video.html %} +{% endif %} + +
+ +
+ {% if page.title %}{% endif %} + {% if page.excerpt %}{% endif %} + {% if page.date %}{% endif %} + {% if page.last_modified_at %}{% endif %} + +
+ {% unless page.header.overlay_color or page.header.overlay_image %} +
+ {% if page.title %}

{{ page.title | markdownify | remove: "

" | remove: "

" }}

{% endif %} + {% include page__meta.html %} +
+ {% endunless %} + +
+ {% if page.toc %} + + {% endif %} + {{ content }} + {% if page.link %}{% endif %} +
+ +
+ {% if site.data.ui-text[site.locale].meta_label %} +

{{ site.data.ui-text[site.locale].meta_label }}

+ {% endif %} + {% include page__taxonomy.html %} + {% include page__date.html %} +
+ + {% if page.share %}{% include social-share.html %}{% endif %} + + {% include post_pagination.html %} +
+ + {% if jekyll.environment == 'production' and site.comments.provider and page.comments %} + {% include comments.html %} + {% endif %} +
+ + {% comment %}{% endcomment %} + {% if page.id and page.related and site.related_posts.size > 0 %} + + {% comment %}{% endcomment %} + {% elsif page.id and page.related %} + + {% endif %} +
diff --git a/docs/_layouts/tags.html b/docs/_layouts/tags.html deleted file mode 100644 index 43f34f6771f4..000000000000 --- a/docs/_layouts/tags.html +++ /dev/null @@ -1,16 +0,0 @@ ---- -layout: default ---- - -
-

Articles tagged with ""

-
    - - {% for post in site.posts %} - {% if post.tags contains page.tag-name %} -
  • {{ post.title }}, published {{ post.date | date: "%Y-%m-%d" }}
  • - {% endif %} - {% endfor %} - -
-
diff --git a/docs/_posts/2020-02-13-release.md b/docs/_posts/2020-02-13-release.md index d8482de7288b..50581ca0c102 100644 --- a/docs/_posts/2020-02-13-release.md +++ b/docs/_posts/2020-02-13-release.md @@ -1,8 +1,7 @@ --- -layout: single title: "ZeRO & DeepSpeed: New system optimizations enable training models with over 100 billion parameters" date: 2020-02-13 link: https://www.microsoft.com/en-us/research/blog/zero-deepspeed-new-system-optimizations-enable-training-models-with-over-100-billion-parameters/ excerpt: "Developed by Microsoft AI & Research." -categories: training zero +tags: training zero --- diff --git a/docs/_posts/2020-02-13-turing-nlg.md b/docs/_posts/2020-02-13-turing-nlg.md index cb9ee59e314f..0da59aa8fee3 100644 --- a/docs/_posts/2020-02-13-turing-nlg.md +++ b/docs/_posts/2020-02-13-turing-nlg.md @@ -1,8 +1,7 @@ --- -layout: single title: "Turing-NLG: A 17-billion-parameter language model by Microsoft" date: 2020-02-13 link: https://www.microsoft.com/en-us/research/blog/turing-nlg-a-17-billion-parameter-language-model-by-microsoft/ excerpt: "DeepSpeed was used to train the world's largest language model." -categories: training +tags: training --- diff --git a/docs/_posts/2020-05-19-bert-record.md b/docs/_posts/2020-05-19-bert-record.md index ce2f58b231f1..47599a5a8aed 100644 --- a/docs/_posts/2020-05-19-bert-record.md +++ b/docs/_posts/2020-05-19-bert-record.md @@ -1,9 +1,7 @@ --- -layout: single title: "The Fastest and Most Efficient BERT Training through Optimized Transformer Kernels" excerpt: "" -categories: training -new_post: true +tags: training date: 2020-05-19 00:00:00 --- diff --git a/docs/_posts/2020-05-19-press-release.md b/docs/_posts/2020-05-19-press-release.md index b8e2dd09b02f..a4da131f06a1 100644 --- a/docs/_posts/2020-05-19-press-release.md +++ b/docs/_posts/2020-05-19-press-release.md @@ -1,9 +1,7 @@ --- -layout: single title: "ZeRO-2 & DeepSpeed: Shattering Barriers of Deep Learning Speed & Scale" excerpt: "" link: https://www.microsoft.com/en-us/research/blog/zero-2-deepspeed-shattering-barriers-of-deep-learning-speed-scale/ -categories: training zero -new_post: false +tags: training zero date: 2020-05-19 02:00:00 --- diff --git a/docs/_posts/2020-05-19-zero-stage2.md b/docs/_posts/2020-05-19-zero-stage2.md index 39504032d2ae..7c5a1e912a17 100644 --- a/docs/_posts/2020-05-19-zero-stage2.md +++ b/docs/_posts/2020-05-19-zero-stage2.md @@ -1,9 +1,7 @@ --- -layout: single title: "An Order-of-Magnitude Larger and Faster Training with ZeRO-2" excerpt: "" -categories: training zero -new_post: false +tags: training zero date: 2020-05-19 01:00:00 --- diff --git a/docs/_posts/2020-05-28-fastest-bert-training.md b/docs/_posts/2020-05-28-fastest-bert-training.md index 1eaf6afc4f1b..99d132c1e53d 100644 --- a/docs/_posts/2020-05-28-fastest-bert-training.md +++ b/docs/_posts/2020-05-28-fastest-bert-training.md @@ -1,9 +1,7 @@ --- -layout: single title: "Microsoft DeepSpeed achieves the fastest BERT training time" excerpt: "" -categories: training -new_post: false +tags: training date: 2020-05-28 00:00:00 --- diff --git a/docs/_posts/2020-07-24-deepspeed-webinar.md b/docs/_posts/2020-07-24-deepspeed-webinar.md index 58f3674845e3..be4ee777ed61 100644 --- a/docs/_posts/2020-07-24-deepspeed-webinar.md +++ b/docs/_posts/2020-07-24-deepspeed-webinar.md @@ -1,10 +1,8 @@ --- -layout: single title: "DeepSpeed Microsoft Research Webinar on August 6th, 2020" excerpt: "" -categories: presentations +tags: presentations link: https://note.microsoft.com/MSR-Webinar-DeepSpeed-Registration-On-Demand.html image: /assets/images/webinar-aug2020.png -new_post: true date: 2020-07-24 00:00:00 --- diff --git a/docs/_posts/2020-08-07-webinar-on-demand.md b/docs/_posts/2020-08-07-webinar-on-demand.md index 3173df1f7881..983e17eca36b 100644 --- a/docs/_posts/2020-08-07-webinar-on-demand.md +++ b/docs/_posts/2020-08-07-webinar-on-demand.md @@ -1,9 +1,7 @@ --- -layout: single title: "DeepSpeed Microsoft Research Webinar is now on-demand" excerpt: "" -categories: presentations +tags: presentations link: https://note.microsoft.com/MSR-Webinar-DeepSpeed-Registration-On-Demand.html -new_post: true date: 2020-08-07 00:00:00 --- diff --git a/docs/_posts/2020-09-08-sparse-attention-news.md b/docs/_posts/2020-09-08-sparse-attention-news.md index 3d5c4cdcb5f9..2ae615c45c82 100644 --- a/docs/_posts/2020-09-08-sparse-attention-news.md +++ b/docs/_posts/2020-09-08-sparse-attention-news.md @@ -1,9 +1,7 @@ --- -layout: single title: "Powering 10x longer sequences and 6x faster execution through DeepSpeed Sparse Attention" excerpt: "" -categories: training -new_post: true +tags: training date: 2020-09-09 00:00:00 --- diff --git a/docs/_posts/2020-09-09-ZeRO-Offload.md b/docs/_posts/2020-09-09-ZeRO-Offload.md index 749683b25ec9..353b8b50f701 100755 --- a/docs/_posts/2020-09-09-ZeRO-Offload.md +++ b/docs/_posts/2020-09-09-ZeRO-Offload.md @@ -1,10 +1,8 @@ --- -layout: single title: "10x bigger model training on a single GPU with ZeRO-Offload" excerpt: "" -new_post: true date: 2020-09-09 00:00:00 -categories: training zero +tags: training zero --- We introduce a new technology called ZeRO-Offload to enable **10X bigger model training on a single GPU**. ZeRO-Offload extends ZeRO-2 to leverage both CPU and GPU memory for training large models. Using a machine with **a single GPU**, our users now can run **models of up to 13 billion parameters** without running out of memory, 10x bigger than the existing approaches, while obtaining competitive throughput. This feature democratizes multi-billion-parameter model training and opens the window for many deep learning practitioners to explore bigger and better models. diff --git a/docs/_posts/2020-09-09-onebit-adam-blog-post.md b/docs/_posts/2020-09-09-onebit-adam-blog-post.md index ffe0f69f317f..ee25a73efa1c 100644 --- a/docs/_posts/2020-09-09-onebit-adam-blog-post.md +++ b/docs/_posts/2020-09-09-onebit-adam-blog-post.md @@ -1,10 +1,8 @@ --- -layout: single title: "DeepSpeed with 1-bit Adam: 5x less communication and 3.4x faster training" excerpt: "" -new_post: false date: 2020-09-09 00:00:00 -categories: training communication +tags: training communication --- ## 1. Introduction diff --git a/docs/_posts/2020-09-09-onebit-adam-news.md b/docs/_posts/2020-09-09-onebit-adam-news.md index 109631251962..19253ac76b00 100644 --- a/docs/_posts/2020-09-09-onebit-adam-news.md +++ b/docs/_posts/2020-09-09-onebit-adam-news.md @@ -1,10 +1,8 @@ --- -layout: single title: "Up to 5x less communication and 3.4x faster training through 1-bit Adam" excerpt: "" -new_post: true date: 2020-09-09 00:00:00 -categories: training communication +tags: training communication --- diff --git a/docs/_posts/2020-09-09-pipeline-parallelism.md b/docs/_posts/2020-09-09-pipeline-parallelism.md index af537ffcd62c..4f2e53ed80ee 100644 --- a/docs/_posts/2020-09-09-pipeline-parallelism.md +++ b/docs/_posts/2020-09-09-pipeline-parallelism.md @@ -1,10 +1,8 @@ --- -layout: single title: "Training a Trillion Parameters with Pipeline Parallelism" excerpt: "" -new_post: true date: 2020-09-09 00:00:00 -categories: training +tags: training --- DeepSpeed includes new support for pipeline parallelism! DeepSpeed's training diff --git a/docs/_posts/2020-09-09-sparse-attention.md b/docs/_posts/2020-09-09-sparse-attention.md index 40fdfb449ef1..aa0fa0bb60d4 100644 --- a/docs/_posts/2020-09-09-sparse-attention.md +++ b/docs/_posts/2020-09-09-sparse-attention.md @@ -1,10 +1,8 @@ --- -layout: single title: "DeepSpeed Sparse Attention" excerpt: "" -new_post: true date: 2020-09-09 01:00:00 -categories: training inference +tags: training inference --- Attention-based deep learning models such as the transformers are highly effective in capturing relationship between tokens in an input sequence, even across long distances. As a result, they are used with text, image, and sound-based inputs, where the sequence length can be in thousands of tokens. However, despite the effectiveness of attention modules to capture long term dependencies, in practice, their application to long sequence input is limited by compute and memory requirements of the attention computation that grow quadratically, `O(n^2)`, with the sequence length `n`. diff --git a/docs/_posts/2020-10-28-progressive-layer-dropping-news.md b/docs/_posts/2020-10-28-progressive-layer-dropping-news.md index 53a995da93b6..b55d1cb7d8cf 100755 --- a/docs/_posts/2020-10-28-progressive-layer-dropping-news.md +++ b/docs/_posts/2020-10-28-progressive-layer-dropping-news.md @@ -1,10 +1,8 @@ --- -layout: single title: "Progressive Layer Dropping" excerpt: "" -new_post: true date: 2020-10-29 00:00:00 -categories: training +tags: training --- We introduce a new technology called progressive layer dropping (PLD) to speedup the pre-training of Transformer-based networks through efficient and robust compressed training. The pre-training step of Transformer networks often suffer from unbearable overall computational expenses. We analyze the training dynamics and stability of Transformer networks and propose PLD to sparsely update Transformer blocks following a progressive dropping schedule, which smoothly increases the layer dropping rate for each mini-batch as training evolves along both the temporal and the model depth dimension. PLD is able to allow the pre-training to be **2.5X faster** to get similar accuracy on downstream tasks and allows the training to be **24% faster** when training the same number of samples, not at the cost of excessive hardware resources. diff --git a/docs/_posts/2021-03-08-zero3-offload.md b/docs/_posts/2021-03-08-zero3-offload.md index ea394e46fc37..2ce5ef919a0d 100644 --- a/docs/_posts/2021-03-08-zero3-offload.md +++ b/docs/_posts/2021-03-08-zero3-offload.md @@ -1,10 +1,8 @@ --- -layout: single title: "DeepSpeed ZeRO-3 Offload" excerpt: "" -new_post: true date: 2021-03-08 00:00:00 -categories: training zero +tags: training zero --- Today we are announcing the release of ZeRO-3 Offload, a highly efficient and easy to use implementation of ZeRO Stage 3 and ZeRO Offload combined, geared towards our continued goal of democratizing AI by making efficient large-scale DL training available to everyone. The key benefits of ZeRO-3 Offload are: diff --git a/docs/_posts/2021-05-05-MoQ.md b/docs/_posts/2021-05-05-MoQ.md index 8e2255293dbe..e6f7872a4007 100644 --- a/docs/_posts/2021-05-05-MoQ.md +++ b/docs/_posts/2021-05-05-MoQ.md @@ -1,10 +1,8 @@ --- -layout: single title: "Mixture-of-Quantization: A novel quantization approach for reducing model size with minimal accuracy impact" excerpt: "" -new_post: false date: 2021-05-05 00:00:00 -categories: inference +tags: inference --- ## A unified suite for quantization-aware training and inference diff --git a/docs/_posts/2021-05-05-inference-kernel-optimization.md b/docs/_posts/2021-05-05-inference-kernel-optimization.md index 933e00fa7449..63e3ac669e22 100644 --- a/docs/_posts/2021-05-05-inference-kernel-optimization.md +++ b/docs/_posts/2021-05-05-inference-kernel-optimization.md @@ -1,10 +1,8 @@ --- -layout: single title: "DeepSpeed Inference: Multi-GPU inference with customized inference kernels and quantization support" excerpt: "" -new_post: false date: 2021-03-16 00:00:00 -categories: inference +tags: inference --- While DeepSpeed supports training advanced large-scale models, using these trained models in the desired application scenarios is still challenging due to three major limitations in existing inference solutions: 1) lack of support for multi-GPU inference to fit large models and meet latency requirements, 2) limited GPU kernel performance when running inference with small batch sizes, and 3) difficulties in exploiting quantization, which includes both quantizing the model to reduce the model size and latency as well as supporting high-performance inference of quantized models without specialized hardware. diff --git a/docs/_posts/2021-05-14-inference-release.md b/docs/_posts/2021-05-14-inference-release.md index 2f6755d3679d..94e69503c40c 100644 --- a/docs/_posts/2021-05-14-inference-release.md +++ b/docs/_posts/2021-05-14-inference-release.md @@ -1,8 +1,7 @@ --- -layout: single title: "DeepSpeed: Accelerating large-scale model inference and training via system optimizations and compression" date: 2021-05-14 link: https://www.microsoft.com/en-us/research/blog/deepspeed-accelerating-large-scale-model-inference-and-training-via-system-optimizations-and-compression/ excerpt: "Developed by Microsoft AI & Research." -categories: inference +tags: inference --- diff --git a/docs/_posts/2021-08-18-deepspeed-moe.md b/docs/_posts/2021-08-18-deepspeed-moe.md index 86e27ff2edb7..5bd9667f2a7f 100644 --- a/docs/_posts/2021-08-18-deepspeed-moe.md +++ b/docs/_posts/2021-08-18-deepspeed-moe.md @@ -1,9 +1,7 @@ --- -layout: single title: "DeepSpeed powers 8x larger MoE model training with high performance" excerpt: "" link: https://www.microsoft.com/en-us/research/blog/deepspeed-powers-8x-larger-moe-model-training-with-high-performance/ -new_post: true date: 2021-08-18 00:00:00 -categories: training +tags: training --- diff --git a/docs/_posts/2021-11-15-autotuning.md b/docs/_posts/2021-11-15-autotuning.md index 509f691e57f1..650f099ecb29 100644 --- a/docs/_posts/2021-11-15-autotuning.md +++ b/docs/_posts/2021-11-15-autotuning.md @@ -1,10 +1,8 @@ --- -layout: single title: "Autotuning: Automatically discover the optimal DeepSpeed configuration that delivers good training speed" excerpt: "" -new_post: true -date: 2021-11-16 00:00:00 -categories: training +date: 2021-11-16 10:00:00 +tags: training --- We introduce a new feature called Autotuning to automatically discover the optimal DeepSpeed configuration that delivers good training speed. One pain point in model training is to figure out good performance-relevant configurations such as micro-batch size to fully utilize the hardware and achieve a high throughput number. This configuration exploring process is commonly done manually but is important since model training is repeated many times and benefits from using a good configuration. Not only is the hand-tuning process time-consuming, but the outcome is hardware-dependent. This means that a good configuration on one hardware might not be the best on another different hardware. The user thus has to hand tune the configuration again. With DeepSpeed, there are more configuration parameters that could potentially affect the training speed, thus making it more tedious to manually tune the configuration. diff --git a/docs/_posts/2021-12-09-deepspeed-moe-nlg.md b/docs/_posts/2021-12-09-deepspeed-moe-nlg.md index 2e2b19c9cc2e..99bc86cbd5d2 100644 --- a/docs/_posts/2021-12-09-deepspeed-moe-nlg.md +++ b/docs/_posts/2021-12-09-deepspeed-moe-nlg.md @@ -1,14 +1,10 @@ --- -layout: single title: "DeepSpeed-MoE for NLG: Reducing the training cost of language models by 5 times" excerpt: "" -new_post: false date: 2021-12-09 22:00:00 -categories: training +tags: training --- -Published on December 9, 2021 - [By DeepSpeed Team](https://www.microsoft.com/en-us/research/project/deepspeed/people/) Autoregressive transformer-based natural language generation (referred to as diff --git a/docs/_posts/2022-01-19-moe-inference.md b/docs/_posts/2022-01-19-moe-inference.md index 3cfab04b96cc..f2ac1c6de2e1 100644 --- a/docs/_posts/2022-01-19-moe-inference.md +++ b/docs/_posts/2022-01-19-moe-inference.md @@ -1,9 +1,7 @@ --- -layout: single title: "DeepSpeed: Advancing MoE inference and training to power next-generation AI scale" excerpt: "" link: https://www.microsoft.com/en-us/research/blog/deepspeed-advancing-moe-inference-and-training-to-power-next-generation-ai-scale/ -new_post: true date: 2022-01-19 00:00:00 -categories: inference +tags: inference --- diff --git a/docs/_sass/minimal-mistakes.scss b/docs/_sass/minimal-mistakes.scss new file mode 100644 index 000000000000..7f25575784dd --- /dev/null +++ b/docs/_sass/minimal-mistakes.scss @@ -0,0 +1,65 @@ +/*! + * Minimal Mistakes Jekyll Theme 4.20.2 by Michael Rose + * Copyright 2013-2020 Michael Rose - mademistakes.com | @mmistakes + * Licensed under MIT (https://github.com/mmistakes/minimal-mistakes/blob/master/LICENSE) +*/ + +/* Variables */ +@import "minimal-mistakes/variables"; + +/* Mixins and functions */ +@import "minimal-mistakes/vendor/breakpoint/breakpoint"; +@include breakpoint-set("to ems", true); +@import "minimal-mistakes/vendor/magnific-popup/magnific-popup"; // Magnific Popup +@import "minimal-mistakes/vendor/susy/susy"; +@import "minimal-mistakes/mixins"; + +/* Core CSS */ +@import "minimal-mistakes/reset"; +@import "minimal-mistakes/base"; +@import "minimal-mistakes/forms"; +@import "minimal-mistakes/tables"; +@import "minimal-mistakes/animations"; + +/* Components */ +@import "minimal-mistakes/buttons"; +@import "minimal-mistakes/notices"; +@import "minimal-mistakes/masthead"; +@import "minimal-mistakes/navigation"; +@import "minimal-mistakes/footer"; +@import "minimal-mistakes/search"; +@import "minimal-mistakes/syntax"; + +/* Utility classes */ +@import "minimal-mistakes/utilities"; + +/* Layout specific */ +@import "minimal-mistakes/page"; +@import "minimal-mistakes/archive"; +@import "minimal-mistakes/sidebar"; +@import "minimal-mistakes/print"; + + +.wide2 { + .page { + float: left; + width: 115%; + @include breakpoint($large) { + padding-left: 15%; + } + + @include breakpoint($x-large) { + padding-left: 15%; + } + } + + .page__related { + @include breakpoint($large) { + padding-left: 15%; + } + + @include breakpoint($x-large) { + padding-left: 15%; + } + } + } diff --git a/docs/assets/css/main.scss b/docs/assets/css/main.scss index 26a771784d01..4f31bd44dd40 100644 --- a/docs/assets/css/main.scss +++ b/docs/assets/css/main.scss @@ -45,3 +45,67 @@ @include yiq-contrasted($active-color); } } + +/* CSS */ +.button-15 { + background-image: linear-gradient(#42A1EC, #0070C9); + border: 1px solid #0077CC; + border-radius: 4px; + box-sizing: border-box; + color: #FFFFFF; + cursor: pointer; + direction: ltr; + display: block; + font-family: "SF Pro Text","SF Pro Icons","AOS Icons","Helvetica Neue",Helvetica,Arial,sans-serif; + font-size: 17px; + font-weight: 400; + letter-spacing: -.022em; + line-height: 1.47059; + min-width: 30px; + overflow: visible; + padding: 4px 15px; + text-align: center; + vertical-align: baseline; + user-select: none; + -webkit-user-select: none; + touch-action: manipulation; + white-space: nowrap; +} + +.button-15:disabled { + cursor: default; + opacity: .3; +} + +.button-15:hover { + background-image: linear-gradient(#51A9EE, #147BCD); + border-color: #1482D0; + text-decoration: none; +} + +.button-15:active { + background-image: linear-gradient(#3D94D9, #0067B9); + border-color: #006DBC; + outline: none; +} + +.button-15:focus { + box-shadow: rgba(131, 192, 253, 0.5) 0 0 0 3px; + outline: none; +} + +ul.tag-box li { + display: inline-block; + list-style: none; + list-style-image: none; + margin-bottom: 7px; +} +ul.tag-box li a { + background: #e6e6e6; + padding: 4px 8px; + border-radius: 3px; + color: #f76b48; +} +ul.tag-box li span.size { + font-weight: 300; +} \ No newline at end of file diff --git a/docs/news/index.html b/docs/news/index.html deleted file mode 100644 index 6cfe0b7a14c1..000000000000 --- a/docs/news/index.html +++ /dev/null @@ -1,3 +0,0 @@ ---- -layout: news ---- diff --git a/docs/_layouts/news.html b/docs/tags.md similarity index 63% rename from docs/_layouts/news.html rename to docs/tags.md index d0d5f82787ac..54c3f9c3cf69 100644 --- a/docs/_layouts/news.html +++ b/docs/tags.md @@ -1,7 +1,6 @@ --- layout: archive -post-content: -backsite: javascript:history.back() +permalink: /posts/ --- {% if paginator %} @@ -13,8 +12,8 @@ + -
- +
+ {% assign tags = site.tags | sort %} {% for category in tags %} {% assign cat = category | first %} - + {% endfor %}
diff --git a/docs/tags_list.md b/docs/tags_list.md index c56491ae1550..a9ec54c8fe86 100644 --- a/docs/tags_list.md +++ b/docs/tags_list.md @@ -16,7 +16,9 @@ permalink: /tags/ {% assign t = tag | first %} {% assign posts = tag | last %} +

{{ t }}

+
    {% for post in posts %} {% if post.tags contains t %} From da303ff2a3150e5d0bab06a5602880449d61001c Mon Sep 17 00:00:00 2001 From: Cheng Li Date: Mon, 28 Feb 2022 23:57:30 +0500 Subject: [PATCH 06/20] add tagging list view --- docs/tags.md | 13 ++++++------- docs/tags_list.md | 12 +++++++++--- 2 files changed, 15 insertions(+), 10 deletions(-) diff --git a/docs/tags.md b/docs/tags.md index ec983b5115b8..5600b08d00ce 100644 --- a/docs/tags.md +++ b/docs/tags.md @@ -39,13 +39,12 @@ permalink: /posts/ {% assign id = id | plus:1 %}

    - {{ post.articletitle }} - {%- unless post.hidden -%} - {% include archive-single.html %} - {% if post.image %} - - {% endif %} - {%- endunless -%} + {%- unless post.hidden -%} + {% include archive-single.html %} + {% if post.image %} + + {% endif %} + {%- endunless -%}

    {% endfor %} diff --git a/docs/tags_list.md b/docs/tags_list.md index a9ec54c8fe86..2309f3981392 100644 --- a/docs/tags_list.md +++ b/docs/tags_list.md @@ -22,9 +22,15 @@ permalink: /tags/
      {% for post in posts %} {% if post.tags contains t %} -
    • - {{ post.date | date: '%d %b %y' }}: {{ post.title }} -
    • + {% if post.link %} +
    • + {{ post.date | date: '%d %b %y' }}: {{ post.title }} +
    • + {% else %} +
    • + {{ post.date | date: '%d %b %y' }}: {{ post.title }} +
    • + {% endif %} {% endif %} {% endfor %}
    From 718362a6082a7f3f5d75af276bb2dc04d2535982 Mon Sep 17 00:00:00 2001 From: Cheng Li Date: Tue, 1 Mar 2022 00:52:52 +0500 Subject: [PATCH 07/20] add count to tags --- docs/tags.md | 4 ++-- docs/tags_list.md | 39 +++++++++++++++++++-------------------- 2 files changed, 21 insertions(+), 22 deletions(-) diff --git a/docs/tags.md b/docs/tags.md index 5600b08d00ce..015f1a4bcb44 100644 --- a/docs/tags.md +++ b/docs/tags.md @@ -24,11 +24,11 @@ permalink: /posts/
    - + {% assign tags = site.tags | sort %} {% for category in tags %} {% assign cat = category | first %} - + {% endfor %}
    diff --git a/docs/tags_list.md b/docs/tags_list.md index 2309f3981392..532cd0c62ea6 100644 --- a/docs/tags_list.md +++ b/docs/tags_list.md @@ -7,31 +7,30 @@ permalink: /tags/ {% for tag in sorted_tags %} {% assign t = tag | first %} {% assign posts = tag | last %} - -
    -

    {{ t }}

    -
    -
      -{% for post in posts %} - {% if post.tags contains t %} - {% if post.link %} -
    • - {{ post.date | date: '%d %b %y' }}: {{ post.title }} -
    • - {% else %} -
    • - {{ post.date | date: '%d %b %y' }}: {{ post.title }} -
    • +
      +

      {{ t }}

      +
      +
        + {% for post in posts %} + {% if post.tags contains t %} + {% if post.link %} +
      • + {{ post.date | date: '%d %b %y' }}: {{ post.title }} +
      • + {% else %} +
      • + {{ post.date | date: '%d %b %y' }}: {{ post.title }} +
      • + {% endif %} {% endif %} - {% endif %} -{% endfor %} -
      + {% endfor %} +
    {% endfor %} From 7d3c3fe149f8e0d436a6f922918e96ee7b4a7793 Mon Sep 17 00:00:00 2001 From: Cheng Li Date: Tue, 1 Mar 2022 00:59:25 +0500 Subject: [PATCH 08/20] add moe inference tutorial --- ...{moe-inference-tutorial.md => mixture-of-experts-inference.md} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename docs/_tutorials/{moe-inference-tutorial.md => mixture-of-experts-inference.md} (100%) diff --git a/docs/_tutorials/moe-inference-tutorial.md b/docs/_tutorials/mixture-of-experts-inference.md similarity index 100% rename from docs/_tutorials/moe-inference-tutorial.md rename to docs/_tutorials/mixture-of-experts-inference.md From 4d14f0c1862d2511d21e332e62a2279fe591bd61 Mon Sep 17 00:00:00 2001 From: Cheng Li Date: Tue, 1 Mar 2022 01:01:52 +0500 Subject: [PATCH 09/20] fix typo --- docs/_config.yml | 1 + docs/_data/navigation.yml | 2 ++ 2 files changed, 3 insertions(+) diff --git a/docs/_config.yml b/docs/_config.yml index e5642cbfcded..bb545716cec6 100644 --- a/docs/_config.yml +++ b/docs/_config.yml @@ -45,6 +45,7 @@ collections: - megatron.md - mixture-of-experts.md - mixture-of-experts-nlg.md + - moe-inference.md - one-cycle.md - onebit-adam.md - onebit-lamb.md diff --git a/docs/_data/navigation.yml b/docs/_data/navigation.yml index 6f117f710aca..21bf5fc14a20 100755 --- a/docs/_data/navigation.yml +++ b/docs/_data/navigation.yml @@ -96,6 +96,8 @@ lnav: url: /tutorials/mixture-of-experts/ - title: 'Mixture-of-Experts for NLG' url: /tutorials/mixture-of-experts-nlg/ + - title: 'Mixture-of-Experts Inference' + url: /tutorials/mixture-of-experts-inference/ - title: 'Mixture-of-Quantization' url: /tutorials/MoQ-tutorial/ - title: 'One-Cycle Schedule' From 632453c852df44a04d461becdd88ce84e5ac090a Mon Sep 17 00:00:00 2001 From: Cheng Li Date: Tue, 1 Mar 2022 03:28:20 +0500 Subject: [PATCH 10/20] fix format --- docs/_includes/analytics.html | 2 +- docs/_includes/author-profile-custom-links.html | 2 +- docs/_includes/category-list.html | 2 +- docs/_includes/group-by-array | 2 +- docs/_includes/page__taxonomy.html | 2 +- docs/_includes/post_pagination.html | 2 +- docs/_includes/tag-list.html | 2 +- docs/_includes/toc | 2 +- docs/tags.md | 2 -- 9 files changed, 8 insertions(+), 10 deletions(-) diff --git a/docs/_includes/analytics.html b/docs/_includes/analytics.html index 371469f0af9b..5c85236124c2 100644 --- a/docs/_includes/analytics.html +++ b/docs/_includes/analytics.html @@ -11,4 +11,4 @@ {% include /analytics-providers/custom.html %} {% endcase %} -{% endif %} \ No newline at end of file +{% endif %} diff --git a/docs/_includes/author-profile-custom-links.html b/docs/_includes/author-profile-custom-links.html index b89ffcb7df6c..06e0b630f688 100644 --- a/docs/_includes/author-profile-custom-links.html +++ b/docs/_includes/author-profile-custom-links.html @@ -4,4 +4,4 @@ Custom Social Profile Link ---> \ No newline at end of file +--> diff --git a/docs/_includes/category-list.html b/docs/_includes/category-list.html index d684a2829f3b..4f38f11b51e8 100644 --- a/docs/_includes/category-list.html +++ b/docs/_includes/category-list.html @@ -23,4 +23,4 @@ {% endfor %}

    -{% endif %} \ No newline at end of file +{% endif %} diff --git a/docs/_includes/group-by-array b/docs/_includes/group-by-array index 708de41ae3f8..528e40b106c6 100644 --- a/docs/_includes/group-by-array +++ b/docs/_includes/group-by-array @@ -44,4 +44,4 @@ {% assign group_items = group_items | push: __item %} -{% endfor %} \ No newline at end of file +{% endfor %} diff --git a/docs/_includes/page__taxonomy.html b/docs/_includes/page__taxonomy.html index 75c76c81ddf2..f10b2026a22b 100644 --- a/docs/_includes/page__taxonomy.html +++ b/docs/_includes/page__taxonomy.html @@ -4,4 +4,4 @@ {% if site.category_archive.type and page.categories[0] %} {% include category-list.html %} -{% endif %} \ No newline at end of file +{% endif %} diff --git a/docs/_includes/post_pagination.html b/docs/_includes/post_pagination.html index a93c6279763b..c09dd29f0345 100644 --- a/docs/_includes/post_pagination.html +++ b/docs/_includes/post_pagination.html @@ -11,4 +11,4 @@ {{ site.data.ui-text[site.locale].pagination_next | default: "Next" }} {% endif %} -{% endif %} \ No newline at end of file +{% endif %} diff --git a/docs/_includes/tag-list.html b/docs/_includes/tag-list.html index e0d02bfa561a..18ee8bdf1d00 100644 --- a/docs/_includes/tag-list.html +++ b/docs/_includes/tag-list.html @@ -23,4 +23,4 @@ {% endfor %}

    -{% endif %} \ No newline at end of file +{% endif %} diff --git a/docs/_includes/toc b/docs/_includes/toc index 6423ccdc72e1..a234afafad40 100644 --- a/docs/_includes/toc +++ b/docs/_includes/toc @@ -4,4 +4,4 @@ * Auto generated table of contents {:toc .toc__menu} - \ No newline at end of file + diff --git a/docs/tags.md b/docs/tags.md index 015f1a4bcb44..035f6cfe153d 100644 --- a/docs/tags.md +++ b/docs/tags.md @@ -49,5 +49,3 @@ permalink: /posts/
{% endfor %} - - From 921ed10fed884b592a4d9a0ea79692d49afbf30a Mon Sep 17 00:00:00 2001 From: Cheng Li Date: Tue, 1 Mar 2022 07:25:25 +0500 Subject: [PATCH 11/20] tagging tutorials --- docs/_pages/tutorials-landing.md | 45 ++++++++++++++++++- docs/_sass/button-group.scss | 4 +- docs/_tutorials/MoQ-tutorial.md | 1 + docs/_tutorials/advanced-install.md | 1 + docs/_tutorials/autotuning.md | 1 + docs/_tutorials/azure.md | 1 + docs/_tutorials/bert-finetuning.md | 1 + docs/_tutorials/bert-pretraining.md | 1 + docs/_tutorials/cifar-10.md | 1 + docs/_tutorials/curriculum-learning.md | 1 + docs/_tutorials/flops-profiler.md | 1 + docs/_tutorials/gan.md | 1 + docs/_tutorials/getting-started.md | 2 +- docs/_tutorials/inference-tutorial.md | 1 + docs/_tutorials/large-models-w-deepspeed.md | 1 + docs/_tutorials/lrrt.md | 1 + docs/_tutorials/megatron.md | 1 + .../mixture-of-experts-inference.md | 1 + docs/_tutorials/mixture-of-experts-nlg.md | 1 + docs/_tutorials/mixture-of-experts.md | 1 + docs/_tutorials/one-cycle.md | 1 + docs/_tutorials/onebit-adam.md | 1 + docs/_tutorials/onebit-lamb.md | 1 + docs/_tutorials/pipeline.md | 1 + docs/_tutorials/progressive_layer_dropping.md | 2 +- docs/_tutorials/pytorch-profiler.md | 1 + docs/_tutorials/sparse-attention.md | 1 + docs/_tutorials/transformer_kernel.md | 3 +- docs/_tutorials/zero-offload.md | 1 + docs/_tutorials/zero.md | 1 + docs/tags.md | 7 +-- 31 files changed, 77 insertions(+), 11 deletions(-) diff --git a/docs/_pages/tutorials-landing.md b/docs/_pages/tutorials-landing.md index 5447e137c9fd..c7d6af66f411 100644 --- a/docs/_pages/tutorials-landing.md +++ b/docs/_pages/tutorials-landing.md @@ -1,6 +1,49 @@ --- title: "Tutorials" -layout: collection +layout: archive collection: tutorials permalink: /tutorials/ --- + + +{% if paginator %} + {% assign tutorials = paginator.tutorials %} +{% else %} + {% assign tutorials = site.tutorials %} +{% endif %} + + + +
+ + {% assign tags = site.tutorials | map: 'tags' | join: ',' | split: ',' | group_by: tag %} + {% for cat in tags %} + + {% endfor %} +
+
+
+ {% for tutorial in tutorials %} + {% assign post = tutorial %} +
+

+ {%- unless tutorial.hidden -%} + {% include archive-single.html %} + {% if tutorial.image %} + + {% endif %} + {%- endunless -%} +

+
+ {% endfor %} +
diff --git a/docs/_sass/button-group.scss b/docs/_sass/button-group.scss index c3eb17fe70da..ced944f51db0 100644 --- a/docs/_sass/button-group.scss +++ b/docs/_sass/button-group.scss @@ -19,10 +19,10 @@ cursor: pointer; display: inline-block; font-family: system-ui,-apple-system,system-ui,"Segoe UI",Roboto,Ubuntu,"Helvetica Neue",sans-serif; - font-size: 18px; + font-size: 16px; font-weight: 600; outline: 0; - padding: 16px 21px; + padding: 16px 16px; position: relative; text-align: center; text-decoration: none; diff --git a/docs/_tutorials/MoQ-tutorial.md b/docs/_tutorials/MoQ-tutorial.md index d2abb3f11619..8776ee6f3775 100644 --- a/docs/_tutorials/MoQ-tutorial.md +++ b/docs/_tutorials/MoQ-tutorial.md @@ -1,5 +1,6 @@ --- title: "DeepSpeed Mixture-of-Quantization (MoQ)" +tags: training quantization --- DeepSpeed introduces new support for model compression using quantization, called Mixture-of-Quantization (MoQ). MoQ is designed on top of QAT (Quantization-Aware Training), with the difference that it schedules various data precisions across the training process. It starts with quantizing the model with a high precision, such as FP16 or 16-bit quantization, and reduce the precision through a pre-defined schedule until reaching the target quantization bits (like 8-bit). Moreover, we use second-order information of the model parameters to dynamically adjust the quantization schedule for each of layer of the network separately. We have seen that by adding such schedule and using various data precision in the training process, we can quantize the model with better quality and preserve accuracy. For a better understanding of MoQ methodology, please refer to MoQ deep-dive, [here](https://www.deepspeed.ai/posts/2021-05-05-MoQ/). diff --git a/docs/_tutorials/advanced-install.md b/docs/_tutorials/advanced-install.md index e8242c734341..fe950d0ff5f1 100755 --- a/docs/_tutorials/advanced-install.md +++ b/docs/_tutorials/advanced-install.md @@ -1,6 +1,7 @@ --- title: "Installation Details" date: 2020-10-28 +tags: getting-started --- The quickest way to get started with DeepSpeed is via pip, this will install diff --git a/docs/_tutorials/autotuning.md b/docs/_tutorials/autotuning.md index 1354a7571396..303087d298a7 100644 --- a/docs/_tutorials/autotuning.md +++ b/docs/_tutorials/autotuning.md @@ -1,6 +1,7 @@ --- title: "Autotuning" excerpt: "Automatically discover the optimal DeepSpeed configuration that delivers good training speed" +tags: training performance-tuning --- Make sure you've read the DeepSpeed tutorials on [Getting Started](https://www.deepspeed.ai/getting-started/) and [Zero Redundancy Optimizer](https://www.deepspeed.ai/tutorials/zero/) before stepping through this tutorial. diff --git a/docs/_tutorials/azure.md b/docs/_tutorials/azure.md index ee6962b3ba99..1016aeafd007 100644 --- a/docs/_tutorials/azure.md +++ b/docs/_tutorials/azure.md @@ -1,5 +1,6 @@ --- title: "Getting Started with DeepSpeed on Azure" +tags: getting-started --- This tutorial will help you get started running DeepSpeed on [Azure virtual diff --git a/docs/_tutorials/bert-finetuning.md b/docs/_tutorials/bert-finetuning.md index 24e4fa167ddc..f7ea8226022e 100755 --- a/docs/_tutorials/bert-finetuning.md +++ b/docs/_tutorials/bert-finetuning.md @@ -1,6 +1,7 @@ --- title: "BingBertSQuAD Fine-tuning" excerpt: "" +tags: training fine-tuning --- In this tutorial we will be adding DeepSpeed to the BingBert model for the SQuAD fine-tuning task, called "BingBertSquad" henceforth. We will also demonstrate performance gains. diff --git a/docs/_tutorials/bert-pretraining.md b/docs/_tutorials/bert-pretraining.md index 0791fb3308fe..e3771b7fdad2 100755 --- a/docs/_tutorials/bert-pretraining.md +++ b/docs/_tutorials/bert-pretraining.md @@ -1,6 +1,7 @@ --- title: "BERT Pre-training" excerpt: "" +tags: training pre-training --- In this tutorial we will apply DeepSpeed to pre-train the BERT diff --git a/docs/_tutorials/cifar-10.md b/docs/_tutorials/cifar-10.md index c7b53e58357a..11a05a78a749 100644 --- a/docs/_tutorials/cifar-10.md +++ b/docs/_tutorials/cifar-10.md @@ -1,6 +1,7 @@ --- title: "CIFAR-10 Tutorial" excerpt: "Train your first model with DeepSpeed!" +tags: getting-started --- If you haven't already, we advise you to first read through the diff --git a/docs/_tutorials/curriculum-learning.md b/docs/_tutorials/curriculum-learning.md index 577ec5c66031..938955ab57cc 100644 --- a/docs/_tutorials/curriculum-learning.md +++ b/docs/_tutorials/curriculum-learning.md @@ -1,5 +1,6 @@ --- title: "Curriculum Learning: A Regularization Method for Efficient and Stable Billion-Scale GPT Model Pre-Training" +tags: training pre-training --- **Note:** diff --git a/docs/_tutorials/flops-profiler.md b/docs/_tutorials/flops-profiler.md index 504ee9222fd1..b90a55efcada 100644 --- a/docs/_tutorials/flops-profiler.md +++ b/docs/_tutorials/flops-profiler.md @@ -1,6 +1,7 @@ --- title: "Flops Profiler" excerpt: "Measure the parameters, latency, and floating-point operations of your model" +tags: profiling performance-tuning --- In this tutorial, we introduce the DeepSpeed Flops Profiler and provide examples of its usage. diff --git a/docs/_tutorials/gan.md b/docs/_tutorials/gan.md index d880f48db28e..1389c91617dd 100755 --- a/docs/_tutorials/gan.md +++ b/docs/_tutorials/gan.md @@ -1,6 +1,7 @@ --- title: "DCGAN Tutorial" excerpt: "Train your first GAN model with DeepSpeed!" +tags: getting-started training --- If you haven't already, we advise you to first read through the [Getting Started](/getting-started/) guide before stepping through this diff --git a/docs/_tutorials/getting-started.md b/docs/_tutorials/getting-started.md index 1e45babd569b..fe765d2713b9 100644 --- a/docs/_tutorials/getting-started.md +++ b/docs/_tutorials/getting-started.md @@ -2,7 +2,7 @@ title: 'Getting Started' permalink: /getting-started/ excerpt: 'First steps with DeepSpeed' -date: 2020-05-15 +tags: getting-started --- ## Installation diff --git a/docs/_tutorials/inference-tutorial.md b/docs/_tutorials/inference-tutorial.md index 43aa45dd8bf7..253bb7092467 100644 --- a/docs/_tutorials/inference-tutorial.md +++ b/docs/_tutorials/inference-tutorial.md @@ -1,5 +1,6 @@ --- title: "Getting Started with DeepSpeed for Inferencing Transformer based Models" +tags: inference --- DeepSpeed-Inference introduces several features to efficiently serve transformer-based PyTorch models. It supports model parallelism (MP) to fit large models that would otherwise not fit in GPU memory. Even for smaller models, MP can be used to reduce latency for inference. To further reduce latency and cost, we introduce inference-customized kernels. Finally, we propose a novel approach to quantize models, called MoQ, to both shrink the model and reduce the inference cost at production. For more details on the inference related optimizations in DeepSpeed, please refer to our [blog post](https://www.microsoft.com/en-us/research/blog/deepspeed-accelerating-large-scale-model-inference-and-training-via-system-optimizations-and-compression/). diff --git a/docs/_tutorials/large-models-w-deepspeed.md b/docs/_tutorials/large-models-w-deepspeed.md index a7a3d66579c1..177199304a5c 100644 --- a/docs/_tutorials/large-models-w-deepspeed.md +++ b/docs/_tutorials/large-models-w-deepspeed.md @@ -1,5 +1,6 @@ --- title: "Training your large model with DeepSpeed" +tags: training --- ## Overview diff --git a/docs/_tutorials/lrrt.md b/docs/_tutorials/lrrt.md index d2e1e4051934..1659ab5bbd4d 100644 --- a/docs/_tutorials/lrrt.md +++ b/docs/_tutorials/lrrt.md @@ -1,5 +1,6 @@ --- title: "Learning Rate Range Test" +tags: training learning-rate --- This tutorial shows how to use to perform Learning Rate range tests in PyTorch. diff --git a/docs/_tutorials/megatron.md b/docs/_tutorials/megatron.md index 4d9846c0e329..7d81ecdcd28d 100644 --- a/docs/_tutorials/megatron.md +++ b/docs/_tutorials/megatron.md @@ -1,5 +1,6 @@ --- title: "Megatron-LM GPT2" +tags: training --- If you haven't already, we advise you to first read through the [Getting diff --git a/docs/_tutorials/mixture-of-experts-inference.md b/docs/_tutorials/mixture-of-experts-inference.md index 7a170eddd113..42df78dd0cfc 100644 --- a/docs/_tutorials/mixture-of-experts-inference.md +++ b/docs/_tutorials/mixture-of-experts-inference.md @@ -1,5 +1,6 @@ --- title: "Getting Started with DeepSpeed-MoE for Inferencing Large-Scale MoE Models" +tags: MoE inference --- DeepSpeed-MoE Inference introduces several important features on top of the inference optimization for dense models ([DeepSpeed-Inference blog post](https://www.microsoft.com/en-us/research/blog/deepspeed-accelerating-large-scale-model-inference-and-training-via-system-optimizations-and-compression/)). It embraces several different types of parallelism, i.e. data-parallelism and tensor-slicing for the non-expert parameters and expert-parallelism and expert-slicing for the expert parameters. To maximize the aggregate memory-bandwidth, we provide the communication scheduling with parallelism coordination to effectively group and route tokens with the same critical-data-path. Moreover, we propose new modeling optimizations, PR-MoE and MoS, to reduce MoE model size while maintaining accuracy. For more information on the DeepSpeed MoE inference optimization, please refer to our [blog post]({{ site.press_release_v6 }}). diff --git a/docs/_tutorials/mixture-of-experts-nlg.md b/docs/_tutorials/mixture-of-experts-nlg.md index f384ab645660..e9239a7156a2 100644 --- a/docs/_tutorials/mixture-of-experts-nlg.md +++ b/docs/_tutorials/mixture-of-experts-nlg.md @@ -1,5 +1,6 @@ --- title: "Mixture of Experts for NLG models" +tags: MoE training --- In this tutorial, we introduce how to apply DeepSpeed Mixture of Experts (MoE) to NLG models, which reduces the training cost by 5 times and reduce the MoE model size by 3 times (details in our [Blog]({{ site.press_release_v6 }})). We use the GPT-3 like models in Megatron-LM framework as the example. Before reading this tutorial, we recommend to first read the tutorials about [Mixture of Experts](/tutorials/mixture-of-experts/) and [Megatron-LM GPT pre-training](/tutorials/megatron/). diff --git a/docs/_tutorials/mixture-of-experts.md b/docs/_tutorials/mixture-of-experts.md index 8485f1547c6a..23d807ab3eb1 100644 --- a/docs/_tutorials/mixture-of-experts.md +++ b/docs/_tutorials/mixture-of-experts.md @@ -1,5 +1,6 @@ --- title: "Mixture of Experts" +tags: MoE training --- DeepSpeed v0.5 introduces new support for training Mixture of Experts (MoE) models. MoE models are an emerging class of sparsely activated models that have sublinear compute costs with respect to their parameters. For example, the [Switch Transformer](https://arxiv.org/abs/2101.03961) consists of over 1.6 trillion parameters, while the compute required to train it is approximately equal to that of a 10 billion-parameter dense model. This increase in model size offers tremendous accuracy gains for a constant compute budget. diff --git a/docs/_tutorials/one-cycle.md b/docs/_tutorials/one-cycle.md index 560f637b4763..12967ad56ad5 100644 --- a/docs/_tutorials/one-cycle.md +++ b/docs/_tutorials/one-cycle.md @@ -1,5 +1,6 @@ --- title: "1-Cycle Schedule" +tags: training learning-rate --- This tutorial shows how to implement 1Cycle schedules for learning rate and diff --git a/docs/_tutorials/onebit-adam.md b/docs/_tutorials/onebit-adam.md index df3a7ba926c9..20df2b99d9fb 100644 --- a/docs/_tutorials/onebit-adam.md +++ b/docs/_tutorials/onebit-adam.md @@ -1,5 +1,6 @@ --- title: "1-bit Adam: Up to 5x less communication volume and up to 3.4x faster training" +tags: training IO --- **Note:** diff --git a/docs/_tutorials/onebit-lamb.md b/docs/_tutorials/onebit-lamb.md index f6d9341d9095..822f79e61740 100644 --- a/docs/_tutorials/onebit-lamb.md +++ b/docs/_tutorials/onebit-lamb.md @@ -1,5 +1,6 @@ --- title: "1-bit LAMB: Communication Efficient Large-Scale Large-Batch Training with LAMB's Convergence Speed" +tags: training IO --- **Watch out!** diff --git a/docs/_tutorials/pipeline.md b/docs/_tutorials/pipeline.md index 1751846830ef..4454de00038f 100644 --- a/docs/_tutorials/pipeline.md +++ b/docs/_tutorials/pipeline.md @@ -1,5 +1,6 @@ --- title: "Pipeline Parallelism" +tags: training --- DeepSpeed v0.3 includes new support for pipeline parallelism! Pipeline diff --git a/docs/_tutorials/progressive_layer_dropping.md b/docs/_tutorials/progressive_layer_dropping.md index 8c184dfc6d21..b7b868bf29d3 100755 --- a/docs/_tutorials/progressive_layer_dropping.md +++ b/docs/_tutorials/progressive_layer_dropping.md @@ -1,6 +1,6 @@ --- title: "Accelerating Training of Transformer-Based Language Models with Progressive Layer Dropping" - +tags: training --- In this tutorial, we are going to introduce the progressive layer dropping (PLD) in DeepSpeed and provide examples on how to use PLD. PLD allows to train Transformer networks such as BERT 24% faster under the same number of samples and 2.5 times faster to get similar accuracy on downstream tasks. Detailed description of PLD and the experimental results are available in our [technical report](https://arxiv.org/pdf/2010.13369.pdf). diff --git a/docs/_tutorials/pytorch-profiler.md b/docs/_tutorials/pytorch-profiler.md index 7389f9b1e61f..a9a9f58d6e32 100644 --- a/docs/_tutorials/pytorch-profiler.md +++ b/docs/_tutorials/pytorch-profiler.md @@ -1,5 +1,6 @@ --- title: "Using PyTorch Profiler with DeepSpeed for performance debugging" +tags: profiling performance-tuning --- This tutorial describes how to use [PyTorch Profiler](https://pytorch.org/blog/introducing-pytorch-profiler-the-new-and-improved-performance-tool/) with DeepSpeed. diff --git a/docs/_tutorials/sparse-attention.md b/docs/_tutorials/sparse-attention.md index 8905b38debbc..bad6bf627d90 100644 --- a/docs/_tutorials/sparse-attention.md +++ b/docs/_tutorials/sparse-attention.md @@ -1,5 +1,6 @@ --- title: "DeepSpeed Sparse Attention" +tags: training --- In this tutorial we describe how to use DeepSpeed Sparse Attention (SA) and its building-block kernels. The easiest way to use SA is through DeepSpeed launcher. We will describe this through an example in [How to use sparse attention with DeepSpeed launcher](#how-to-use-sparse-attention-with-deepspeed-launcher) section. But before that, we introduce modules provided by DeepSpeed SA in the [next](#sparse-attention-modules) section. diff --git a/docs/_tutorials/transformer_kernel.md b/docs/_tutorials/transformer_kernel.md index 9dbcf26e2a12..915117fc3af9 100755 --- a/docs/_tutorials/transformer_kernel.md +++ b/docs/_tutorials/transformer_kernel.md @@ -1,5 +1,6 @@ --- -title: "DeepSpeed Transformer Kernel" +title: "DeepSpeed Transformer Kernel" +tags: training --- This tutorial shows how to enable the DeepSpeed transformer kernel and set its different configuration parameters. diff --git a/docs/_tutorials/zero-offload.md b/docs/_tutorials/zero-offload.md index a1b05451f96d..7102f0667477 100644 --- a/docs/_tutorials/zero-offload.md +++ b/docs/_tutorials/zero-offload.md @@ -1,5 +1,6 @@ --- title: "ZeRO-Offload" +tags: training IO --- ZeRO-3 Offload consists of a subset of features in our newly released ZeRO-Infinity. Read our [ZeRO-Infinity blog](https://www.microsoft.com/en-us/research/blog/zero-infinity-and-deepspeed-unlocking-unprecedented-model-scale-for-deep-learning-training/) to learn more! diff --git a/docs/_tutorials/zero.md b/docs/_tutorials/zero.md index 7721f45ece4f..9f858a067162 100644 --- a/docs/_tutorials/zero.md +++ b/docs/_tutorials/zero.md @@ -1,5 +1,6 @@ --- title: "Zero Redundancy Optimizer (ZeRO)" +tags: training --- If you have not done so already, we advise that you read the DeepSpeed tutorials on [Getting Started](/getting-started/) and [Megatron-LM GPT-2](/tutorials/megatron/) before stepping through this tutorial. diff --git a/docs/tags.md b/docs/tags.md index 035f6cfe153d..fb704ffaa808 100644 --- a/docs/tags.md +++ b/docs/tags.md @@ -11,11 +11,10 @@ permalink: /posts/ + +
+ + {% assign tags = site.tags | sort %} + {% for category in tags %} + {% assign cat = category | first %} + + {% endfor %} +
+
+ +
+ {% for post in posts %} +
+

+ {%- unless post.hidden -%} + {% include archive-single.html %} + {% if post.image %} + + {% endif %} + {%- endunless -%} +

+
+ {% endfor %} +
diff --git a/docs/_pages/posts_list_landing.md b/docs/_pages/posts_list_landing.md new file mode 100644 index 000000000000..207d48a259f2 --- /dev/null +++ b/docs/_pages/posts_list_landing.md @@ -0,0 +1,36 @@ +--- +layout: archive +permalink: /posts-list/ +--- + +{% assign sorted_tags = (site.tags | sort:0) %} +
    + {% for tag in sorted_tags %} + {% assign t = tag | first %} + {% assign ps = tag | last %} +
  • {{ t }} ({{ ps.size }})
  • + {% endfor %} +
+ +{% for tag in sorted_tags %} + {% assign t = tag | first %} + {% assign posts = tag | last %} +
+

{{ t }}

+
+
    + {% for post in posts %} + {% if post.tags contains t %} + {% if post.link %} +
  • + {{ post.date | date: '%d %b %y' }}: {{ post.title }} +
  • + {% else %} +
  • + {{ post.date | date: '%d %b %y' }}: {{ post.title }} +
  • + {% endif %} + {% endif %} + {% endfor %} +
+{% endfor %} diff --git a/docs/_sass/minimal-mistakes/_page.scss b/docs/_sass/minimal-mistakes/_page.scss new file mode 100644 index 000000000000..06fef675dbb5 --- /dev/null +++ b/docs/_sass/minimal-mistakes/_page.scss @@ -0,0 +1,560 @@ +/* ========================================================================== + SINGLE PAGE/POST + ========================================================================== */ + +#main { + @include clearfix; + margin-left: auto; + margin-right: auto; + padding-left: 1em; + padding-right: 1em; + -webkit-animation: $intro-transition; + animation: $intro-transition; + max-width: 100%; + -webkit-animation-delay: 0.15s; + animation-delay: 0.15s; + + @include breakpoint($x-large) { + max-width: $max-width; + } +} + +body { + display: -webkit-box; + display: -ms-flexbox; + display: flex; + min-height: 100vh; + -webkit-box-orient: vertical; + -webkit-box-direction: normal; + -ms-flex-direction: column; + flex-direction: column; +} + +.initial-content, +.search-content { + flex: 1 0 auto; +} + +.page { + @include breakpoint($large) { + float: right; + width: calc(100% - #{$right-sidebar-width-narrow}); + padding-right: $right-sidebar-width-narrow; + } + + @include breakpoint($x-large) { + width: calc(100% - #{$right-sidebar-width}); + padding-right: $right-sidebar-width; + } + + .page__inner-wrap { + float: left; + margin-top: 1em; + margin-left: 0; + margin-right: 0; + width: 100%; + clear: both; + + .page__content, + .page__meta, + .page__share { + position: relative; + float: left; + margin-left: 0; + margin-right: 0; + width: 100%; + clear: both; + } + } +} + +.page__title { + margin-top: 0; + line-height: 1.5; + + & + .page__meta { + margin-top: -0.5em; + } +} + +.page__lead { + font-family: $global-font-family; + font-size: $type-size-4; +} + +.page__content { + h2 { + padding-bottom: 0.5em; + border-bottom: 1px solid $border-color; + } + + h1, h2, h3, h4, h5, h6 { + .header-link { + position: relative; + left: 0.5em; + opacity: 0; + font-size: 0.8em; + -webkit-transition: opacity 0.2s ease-in-out 0.1s; + -moz-transition: opacity 0.2s ease-in-out 0.1s; + -o-transition: opacity 0.2s ease-in-out 0.1s; + transition: opacity 0.2s ease-in-out 0.1s; + } + + &:hover .header-link { + opacity: 1; + } + } + + p, + li, + dl { + font-size: 1em; + line-height: 1.7777778; + } + + /* paragraph indents */ + p { + margin: 0 0 $indent-var; + + /* sibling indentation*/ + @if $paragraph-indent == true { + & + p { + text-indent: $indent-var; + margin-top: -($indent-var); + } + } + } + + a:not(.btn) { + &:hover { + text-decoration: underline; + + img { + box-shadow: 0 0 10px rgba(#000, 0.25); + } + } + } + + dt { + margin-top: 1em; + font-family: $sans-serif; + font-weight: bold; + } + + dd { + margin-left: 1em; + font-family: $sans-serif; + font-size: $type-size-6; + } + + .small { + font-size: $type-size-6; + } + + /* blockquote citations */ + blockquote + .small { + margin-top: -1.5em; + padding-left: 1.25rem; + } +} + +.page__hero { + position: relative; + margin-bottom: 2em; + @include clearfix; + -webkit-animation: $intro-transition; + animation: $intro-transition; + -webkit-animation-delay: 0.25s; + animation-delay: 0.25s; + + &--overlay { + position: relative; + margin-bottom: 2em; + padding: 3em 0; + @include clearfix; + background-size: cover; + background-repeat: no-repeat; + background-position: center; + -webkit-animation: $intro-transition; + animation: $intro-transition; + -webkit-animation-delay: 0.25s; + animation-delay: 0.25s; + + a { + color: #fff; + } + + .wrapper { + padding-left: 1em; + padding-right: 1em; + + @include breakpoint($x-large) { + max-width: $x-large; + } + } + + .page__title, + .page__meta, + .page__lead, + .btn { + color: #fff; + text-shadow: 1px 1px 4px rgba(#000, 0.5); + } + + .page__lead { + max-width: $medium; + } + + .page__title { + font-size: $type-size-2; + + @include breakpoint($small) { + font-size: $type-size-1; + } + } + } +} + +.page__hero-image { + width: 100%; + height: auto; + -ms-interpolation-mode: bicubic; +} + +.page__hero-caption { + position: absolute; + bottom: 0; + right: 0; + margin: 0 auto; + padding: 2px 5px; + color: #fff; + font-family: $caption-font-family; + font-size: $type-size-7; + background: #000; + text-align: right; + z-index: 5; + opacity: 0.5; + border-radius: $border-radius 0 0 0; + + @include breakpoint($large) { + padding: 5px 10px; + } + + a { + color: #fff; + text-decoration: none; + } +} + +/* + Social sharing + ========================================================================== */ + +.page__share { + margin-top: 2em; + padding-top: 1em; + border-top: 1px solid $border-color; + + @include breakpoint(max-width $small) { + .btn span { + border: 0; + clip: rect(0 0 0 0); + height: 1px; + margin: -1px; + overflow: hidden; + padding: 0; + position: absolute; + width: 1px; + } + } +} + +.page__share-title { + margin-bottom: 10px; + font-size: $type-size-6; + text-transform: uppercase; +} + +/* + Page meta + ========================================================================== */ + +.page__meta { + margin-top: 2em; + color: $muted-text-color; + font-family: $sans-serif; + font-size: $type-size-6; + + p { + margin: 0; + } + + a { + color: inherit; + } +} + +.page__meta-title { + margin-bottom: 10px; + font-size: $type-size-6; + text-transform: uppercase; +} + +.page__meta-sep::before { + content: "\2022"; + padding-left: 0.5em; + padding-right: 0.5em; +} + +/* + Page taxonomy + ========================================================================== */ + +.page__taxonomy { + .sep { + display: none; + } + + strong { + margin-right: 10px; + } +} + +.page__taxonomy-item { + display: inline-block; + margin-right: 5px; + margin-bottom: 8px; + padding: 5px 10px; + text-decoration: none; + border: 1px solid mix(#000, $border-color, 25%); + border-radius: $border-radius; + + &:hover { + text-decoration: none; + color: $link-color-hover; + } +} + +.taxonomy__section { + margin-bottom: 2em; + padding-bottom: 1em; + + &:not(:last-child) { + border-bottom: solid 1px $border-color; + } + + .archive__item-title { + margin-top: 0; + } + + .archive__subtitle { + clear: both; + border: 0; + } + + + .taxonomy__section { + margin-top: 2em; + } +} + +.taxonomy__title { + margin-bottom: 0.5em; + color: $muted-text-color; +} + +.taxonomy__count { + color: $muted-text-color; +} + +.taxonomy__index { + display: grid; + grid-column-gap: 2em; + grid-template-columns: repeat(2, 1fr); + margin: 1.414em 0; + padding: 0; + font-size: 0.75em; + list-style: none; + + @include breakpoint($large) { + grid-template-columns: repeat(3, 1fr); + } + + a { + display: -webkit-box; + display: -ms-flexbox; + display: flex; + padding: 0.25em 0; + -webkit-box-pack: justify; + -ms-flex-pack: justify; + justify-content: space-between; + color: inherit; + text-decoration: none; + border-bottom: 1px solid $border-color; + } +} + +.back-to-top { + display: block; + clear: both; + color: $muted-text-color; + font-size: 0.6em; + text-transform: uppercase; + text-align: right; + text-decoration: none; +} + +/* + Comments + ========================================================================== */ + +.page__comments { + float: left; + margin-left: 0; + margin-right: 0; + width: 100%; + clear: both; +} + +.page__comments-title { + margin-top: 2rem; + margin-bottom: 10px; + padding-top: 2rem; + font-size: $type-size-6; + border-top: 1px solid $border-color; + text-transform: uppercase; +} + +.page__comments-form { + -webkit-transition: $global-transition; + transition: $global-transition; + + &.disabled { + input, + button, + textarea, + label { + pointer-events: none; + cursor: not-allowed; + filter: alpha(opacity=65); + box-shadow: none; + opacity: 0.65; + } + } +} + +.comment { + @include clearfix(); + margin: 1em 0; + + &:not(:last-child) { + border-bottom: 1px solid $border-color; + } +} + +.comment__avatar-wrapper { + float: left; + width: 60px; + height: 60px; + + @include breakpoint($large) { + width: 100px; + height: 100px; + } +} + +.comment__avatar { + width: 40px; + height: 40px; + border-radius: 50%; + + @include breakpoint($large) { + width: 80px; + height: 80px; + padding: 5px; + border: 1px solid $border-color; + } +} + +.comment__content-wrapper { + float: right; + width: calc(100% - 60px); + + @include breakpoint($large) { + width: calc(100% - 100px); + } +} + +.comment__author { + margin: 0; + + a { + text-decoration: none; + } +} + +.comment__date { + @extend .page__meta; + margin: 0; + + a { + text-decoration: none; + } +} + +/* + Related + ========================================================================== */ + +.page__related { + @include clearfix(); + float: left; + margin-top: 2em; + padding-top: 1em; + border-top: 1px solid $border-color; + + @include breakpoint($large) { + float: right; + width: calc(100% - #{$right-sidebar-width-narrow}); + } + + @include breakpoint($x-large) { + width: calc(100% - #{$right-sidebar-width}); + } + + a { + color: inherit; + text-decoration: none; + } +} + +.page__related-title { + margin-bottom: 10px; + font-size: $type-size-6; + text-transform: uppercase; +} + +/* + Wide Pages + ========================================================================== */ + +.wide { + .page { + @include breakpoint($large) { + padding-right: 0; + } + + @include breakpoint($x-large) { + padding-right: 0; + } + } + + .page__related { + @include breakpoint($large) { + padding-right: 0; + } + + @include breakpoint($x-large) { + padding-right: 0; + } + } +} diff --git a/docs/_sass/minimal-mistakes/_variables.scss b/docs/_sass/minimal-mistakes/_variables.scss new file mode 100644 index 000000000000..f7143ca81364 --- /dev/null +++ b/docs/_sass/minimal-mistakes/_variables.scss @@ -0,0 +1,169 @@ +/* ========================================================================== + Variables + ========================================================================== */ + +/* + Typography + ========================================================================== */ + +$doc-font-size: 12 !default; + +/* paragraph indention */ +$paragraph-indent: false !default; // true, false (default) +$indent-var: 1.3em !default; + +/* system typefaces */ +$serif: Georgia, Times, serif !default; +$sans-serif: -apple-system, BlinkMacSystemFont, "Roboto", "Segoe UI", + "Helvetica Neue", "Lucida Grande", Arial, sans-serif !default; +$monospace: Monaco, Consolas, "Lucida Console", monospace !default; + +/* sans serif typefaces */ +$sans-serif-narrow: $sans-serif !default; +$helvetica: Helvetica, "Helvetica Neue", Arial, sans-serif !default; + +/* serif typefaces */ +$georgia: Georgia, serif !default; +$times: Times, serif !default; +$bodoni: "Bodoni MT", serif !default; +$calisto: "Calisto MT", serif !default; +$garamond: Garamond, serif !default; + +$global-font-family: $sans-serif !default; +$header-font-family: $sans-serif !default; +$caption-font-family: $serif !default; + +/* type scale */ +$type-size-1: 2.441em !default; // ~39.056px +$type-size-2: 1.953em !default; // ~31.248px +$type-size-3: 1.563em !default; // ~25.008px +$type-size-4: 1.25em !default; // ~20px +$type-size-5: 1em !default; // ~16px +$type-size-6: 0.75em !default; // ~12px +$type-size-7: 0.6875em !default; // ~11px +$type-size-8: 0.625em !default; // ~10px + +/* headline scale */ +$h-size-1: 1.563em !default; // ~25.008px +$h-size-2: 1.25em !default; // ~20px +$h-size-3: 1.125em !default; // ~18px +$h-size-4: 1.0625em !default; // ~17px +$h-size-5: 1.03125em !default; // ~16.5px +$h-size-6: 1em !default; // ~16px + +/* + Colors + ========================================================================== */ + +$gray: #7a8288 !default; +$dark-gray: mix(#000, $gray, 50%) !default; +$darker-gray: mix(#000, $gray, 60%) !default; +$light-gray: mix(#fff, $gray, 50%) !default; +$lighter-gray: mix(#fff, $gray, 90%) !default; + +$background-color: #fff !default; +$code-background-color: #fafafa !default; +$code-background-color-dark: $light-gray !default; +$text-color: $dark-gray !default; +$muted-text-color: mix(#fff, $text-color, 20%) !default; +$border-color: $lighter-gray !default; +$form-background-color: $lighter-gray !default; +$footer-background-color: $lighter-gray !default; + +$primary-color: #6f777d !default; +$success-color: #3fa63f !default; +$warning-color: #d67f05 !default; +$danger-color: #ee5f5b !default; +$info-color: #3b9cba !default; +$focus-color: $primary-color !default; +$active-color: mix(#fff, $primary-color, 80%) !default; + +/* YIQ color contrast */ +$yiq-contrasted-dark-default: $dark-gray !default; +$yiq-contrasted-light-default: #fff !default; +$yiq-contrasted-threshold: 175 !default; +$yiq-debug: false !default; + +/* brands */ +$behance-color: #1769ff !default; +$bitbucket-color: #205081 !default; +$dribbble-color: #ea4c89 !default; +$facebook-color: #3b5998 !default; +$flickr-color: #ff0084 !default; +$foursquare-color: #0072b1 !default; +$github-color: #171516 !default; +$gitlab-color: #e24329 !default; +$instagram-color: #517fa4 !default; +$keybase-color: #ef7639 !default; +$lastfm-color: #d51007 !default; +$linkedin-color: #007bb6 !default; +$mastodon-color: #2b90d9 !default; +$pinterest-color: #cb2027 !default; +$reddit-color: #ff4500 !default; +$rss-color: #fa9b39 !default; +$soundcloud-color: #ff3300 !default; +$stackoverflow-color: #fe7a15 !default; +$tumblr-color: #32506d !default; +$twitter-color: #55acee !default; +$vimeo-color: #1ab7ea !default; +$vine-color: #00bf8f !default; +$youtube-color: #bb0000 !default; +$xing-color: #006567 !default; + +/* links */ +$link-color: mix(#000, $info-color, 20%) !default; +$link-color-hover: mix(#000, $link-color, 25%) !default; +$link-color-visited: mix(#fff, $link-color, 15%) !default; +$masthead-link-color: $primary-color !default; +$masthead-link-color-hover: mix(#000, $primary-color, 25%) !default; +$navicon-link-color-hover: mix(#fff, $primary-color, 75%) !default; + +/* syntax highlighting (base16) */ +$base00: #263238 !default; +$base01: #2e3c43 !default; +$base02: #314549 !default; +$base03: #546e7a !default; +$base04: #b2ccd6 !default; +$base05: #eeffff !default; +$base06: #eeffff !default; +$base07: #ffffff !default; +$base08: #f07178 !default; +$base09: #f78c6c !default; +$base0a: #ffcb6b !default; +$base0b: #c3e88d !default; +$base0c: #89ddff !default; +$base0d: #82aaff !default; +$base0e: #c792ea !default; +$base0f: #ff5370 !default; + +/* + Breakpoints + ========================================================================== */ + +$small: 600px !default; +$medium: 768px !default; +$medium-wide: 900px !default; +$large: 1024px !default; +$x-large: 1280px !default; +$max-width: $x-large !default; + +/* + Grid + ========================================================================== */ + +$right-sidebar-width-narrow: 200px !default; +$right-sidebar-width: 300px !default; +$right-sidebar-width-wide: 400px !default; + +/* + Other + ========================================================================== */ + +$border-radius: 4px !default; +$box-shadow: 0 1px 1px rgba(0, 0, 0, 0.125) !default; +$nav-height: 2em !default; +$nav-toggle-height: 2rem !default; +$navicon-width: 1.5rem !default; +$navicon-height: 0.25rem !default; +$global-transition: all 0.2s ease-in-out !default; +$intro-transition: intro 0.3s both !default; diff --git a/docs/_sass/minimal-mistakes/skins/_air.scss b/docs/_sass/minimal-mistakes/skins/_air.scss new file mode 100644 index 000000000000..662e1451926e --- /dev/null +++ b/docs/_sass/minimal-mistakes/skins/_air.scss @@ -0,0 +1,23 @@ +/* ========================================================================== + Air skin + ========================================================================== */ + +/* Colors */ +$background-color: white !default; +$text-color: #222831 !default; +$muted-text-color: #393e46 !default; +$primary-color: #0092ca !default; +$border-color: mix(#fff, #393e46, 75%) !default; +$footer-background-color: $primary-color !default; +$link-color: #393e46 !default; +$masthead-link-color: $text-color !default; +$masthead-link-color-hover: $text-color !default; +$navicon-link-color-hover: mix(#fff, $text-color, 80%) !default; + +.page__footer { + color: #fff !important; // override +} + +.page__footer-follow .social-icons .svg-inline--fa { + color: inherit; +} diff --git a/docs/assets/css/main.scss b/docs/assets/css/main.scss index 5739da82947e..7fccdd65bd30 100644 --- a/docs/assets/css/main.scss +++ b/docs/assets/css/main.scss @@ -65,3 +65,5 @@ ul.tag-box li a { ul.tag-box li span.size { font-weight: bold; } + +.site-logo img { max-height: 200%; width: auto; } diff --git a/docs/assets/images/deepspeed-logo-uppercase-bold-white-1.15.svg b/docs/assets/images/deepspeed-logo-uppercase-bold-white-1.15.svg new file mode 100644 index 000000000000..1cb9f7575a00 --- /dev/null +++ b/docs/assets/images/deepspeed-logo-uppercase-bold-white-1.15.svg @@ -0,0 +1,24 @@ + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/docs/assets/images/deepspeed-logo-uppercase-bold-white.svg b/docs/assets/images/deepspeed-logo-uppercase-bold-white.svg new file mode 100644 index 000000000000..32395acc8dc8 --- /dev/null +++ b/docs/assets/images/deepspeed-logo-uppercase-bold-white.svg @@ -0,0 +1,19 @@ + + + + + + + + + + + + + + + + + + + diff --git a/docs/assets/images/deepspeed-logo-uppercase-white.svg b/docs/assets/images/deepspeed-logo-uppercase-white.svg new file mode 100644 index 000000000000..1fc99199f8d6 --- /dev/null +++ b/docs/assets/images/deepspeed-logo-uppercase-white.svg @@ -0,0 +1,19 @@ + + + + + + + + + + + + + + + + + + + From 04c5f5a4c7ed253e8fb4d7b5513a04c3040ccaf9 Mon Sep 17 00:00:00 2001 From: Cheng Li Date: Wed, 2 Mar 2022 02:38:43 +0500 Subject: [PATCH 13/20] fix format --- docs/Gemfile.lock | 8 ++-- docs/_config.yml | 12 ++++- docs/_includes/masthead.html | 4 +- docs/_layouts/single-full.html | 2 +- docs/_posts/2020-02-13-release.md | 6 +-- docs/_posts/2020-05-19-bert-record.md | 4 +- docs/_posts/2020-05-19-press-release.md | 4 +- docs/_posts/2020-05-19-zero-stage2.md | 5 +- .../2020-09-08-sparse-attention-news.md | 1 + docs/_posts/2020-09-09-ZeRO-Offload.md | 5 +- ...0-10-28-progressive-layer-dropping-news.md | 1 + docs/_posts/2021-03-08-zero3-offload.md | 10 ++-- docs/_posts/2021-05-14-inference-release.md | 2 +- docs/_posts/2021-11-15-autotuning.md | 1 + docs/_posts/2021-12-09-deepspeed-moe-nlg.md | 6 +-- docs/_tutorials/zero.md | 2 + ...epspeed-logo-uppercase-bold-white-1.15.svg | 2 +- docs/index.md | 7 +-- docs/tags.md | 48 ------------------- docs/tags_list.md | 36 -------------- 20 files changed, 47 insertions(+), 119 deletions(-) delete mode 100644 docs/tags.md delete mode 100644 docs/tags_list.md diff --git a/docs/Gemfile.lock b/docs/Gemfile.lock index 657c3228370a..592d5adbced7 100644 --- a/docs/Gemfile.lock +++ b/docs/Gemfile.lock @@ -231,7 +231,7 @@ GEM jekyll (>= 3.5, < 5.0) jekyll-feed (~> 0.9) jekyll-seo-tag (~> 2.1) - minimal-mistakes-jekyll (4.20.2) + minimal-mistakes-jekyll (4.24.0) jekyll (>= 3.7, < 5.0) jekyll-feed (~> 0.1) jekyll-gist (~> 1.5) @@ -248,9 +248,9 @@ GEM sawyer (~> 0.8.0, >= 0.5.3) pathutil (0.16.2) forwardable-extended (~> 2.6) - public_suffix (3.1.1) + public_suffix (4.0.6) racc (1.6.0) - rb-fsevent (0.10.4) + rb-fsevent (0.11.1) rb-inotify (0.10.1) ffi (~> 1.0) rexml (3.2.5) @@ -277,7 +277,7 @@ GEM ethon (>= 0.9.0) tzinfo (1.2.9) thread_safe (~> 0.1) - tzinfo-data (1.2020.3) + tzinfo-data (1.2021.5) tzinfo (>= 1.0.0) unf (0.1.4) unf_ext diff --git a/docs/_config.yml b/docs/_config.yml index bb545716cec6..963e8734a8f2 100644 --- a/docs/_config.yml +++ b/docs/_config.yml @@ -5,6 +5,7 @@ description: >- training easy, efficient, and effective. locale : "en-US" +logo: /assets/images/deepspeed-logo-uppercase-bold-white-1.15.svg repository: microsoft/DeepSpeed baseurl: "/" # the subpath of your site, e.g. /blog @@ -80,14 +81,21 @@ defaults: type: posts values: layout: single-full - classes: wide2 + classes: wide author_profile: false read_time: false comments: false share: true related: false - toc: false + toc: true + toc_label: "Contents" show_date: true + - scope: + path: "" + type: tutorials + values: + classes: wide + layout: single analytics: provider: "google-gtag" diff --git a/docs/_includes/masthead.html b/docs/_includes/masthead.html index 47cce0a3545e..0a14c23ebde0 100644 --- a/docs/_includes/masthead.html +++ b/docs/_includes/masthead.html @@ -7,10 +7,10 @@ {% unless logo_path == empty %} {% endunless %} - +