diff --git a/README.md b/README.md
index 5b967b6a94..71ab91121e 100644
--- a/README.md
+++ b/README.md
@@ -39,8 +39,6 @@ Please leave us [your feedback](https://forms.gle/i64fowQmiVhMMC7f9) on how we c
 
 [Automatic Spelling Correction](http://docs.deeppavlov.ai/en/master/features/models/spelling_correction.html) | [ELMo training and fine-tuning](http://docs.deeppavlov.ai/en/master/apiref/models/elmo.html)
 
-[Speech recognition and synthesis (ASR and TTS)](http://docs.deeppavlov.ai/en/master/features/models/nemo.html) based on [NVIDIA NeMo](https://nvidia.github.io/NeMo/index.html)
-
 [Entity Linking](http://docs.deeppavlov.ai/en/master/features/models/entity_linking.html) | [Multitask BERT](http://docs.deeppavlov.ai/en/master/features/models/multitask_bert.html)
 
 **Skills**
diff --git a/deeppavlov/configs/nemo/asr.json b/deeppavlov/configs/nemo/asr.json
deleted file mode 100644
index 410e0ac560..0000000000
--- a/deeppavlov/configs/nemo/asr.json
+++ /dev/null
@@ -1,26 +0,0 @@
-{
-  "chainer": {
-    "in": "speech",
-    "pipe": [
-      {
-        "class_name": "nemo_asr",
-        "nemo_params_path": "{NEMO_PATH}/quartznet15x5/quartznet15x5.yaml",
-        "load_path": "{NEMO_PATH}/quartznet15x5",
-        "in": ["speech"],
-        "out": ["text"]
-      }
-    ],
-    "out": ["text"]
-  },
-  "metadata": {
-    "variables": {
-      "NEMO_PATH": "~/.deeppavlov/models/nemo"
-    },
-    "download": [
-      {
-        "url": "http://files.deeppavlov.ai/deeppavlov_data/nemo/quartznet15x5.tar.gz",
-        "subdir": "{NEMO_PATH}"
-      }
-    ]
-  }
-}
diff --git a/deeppavlov/configs/nemo/asr_tts.json b/deeppavlov/configs/nemo/asr_tts.json
deleted file mode 100644
index 8ecc10c304..0000000000
--- a/deeppavlov/configs/nemo/asr_tts.json
+++ /dev/null
@@ -1,48 +0,0 @@
-{
-  "chainer": {
-    "in": "speech_in_encoded",
-    "pipe": [
-      {
-        "class_name": "base64_decode_bytesIO",
-        "in": ["speech_in_encoded"],
-        "out": ["speech_in"]
-      },
-      {
-        "class_name": "nemo_asr",
-        "nemo_params_path": "{NEMO_PATH}/quartznet15x5/quartznet15x5.yaml",
-        "load_path": "{NEMO_PATH}/quartznet15x5",
-        "in": ["speech_in"],
-        "out": ["text"]
-      },
-      {
-        "class_name": "nemo_tts",
-        "nemo_params_path": "{TTS_PATH}/tacotron2_waveglow.yaml",
-        "load_path": "{TTS_PATH}",
-        "in": ["text"],
-        "out": ["speech_out"]
-      },
-      {
-        "class_name": "bytesIO_encode_base64",
-        "in": ["speech_out"],
-        "out": ["speech_out_encoded"]
-      }
-    ],
-    "out": ["text", "speech_out_encoded"]
-  },
-  "metadata": {
-    "variables": {
-      "NEMO_PATH": "~/.deeppavlov/models/nemo",
-      "TTS_PATH": "{NEMO_PATH}/tacotron2_waveglow"
-    },
-    "download": [
-      {
-        "url": "http://files.deeppavlov.ai/deeppavlov_data/nemo/quartznet15x5.tar.gz",
-        "subdir": "{NEMO_PATH}"
-      },
-      {
-        "url": "http://files.deeppavlov.ai/deeppavlov_data/nemo/tacotron2_waveglow.tar.gz",
-        "subdir": "{NEMO_PATH}"
-      }
-    ]
-  }
-}
diff --git a/deeppavlov/configs/nemo/tts.json b/deeppavlov/configs/nemo/tts.json
deleted file mode 100644
index 6cbac9a043..0000000000
--- a/deeppavlov/configs/nemo/tts.json
+++ /dev/null
@@ -1,27 +0,0 @@
-{
-  "chainer": {
-    "in": ["text", "filepath"],
-    "pipe": [
-      {
-        "class_name": "nemo_tts",
-        "nemo_params_path": "{TTS_PATH}/tacotron2_waveglow.yaml",
-        "load_path": "{TTS_PATH}",
-        "in": ["text", "filepath"],
-        "out": ["saved_path"]
-      }
-    ],
-    "out": ["saved_path"]
-  },
-  "metadata": {
-    "variables": {
-      "NEMO_PATH": "~/.deeppavlov/models/nemo",
-      "TTS_PATH": "{NEMO_PATH}/tacotron2_waveglow"
-    },
-    "download": [
-      {
-        "url": "http://files.deeppavlov.ai/deeppavlov_data/nemo/tacotron2_waveglow.tar.gz",
-        "subdir": "{NEMO_PATH}"
-      }
-    ]
-  }
-}
diff --git a/deeppavlov/core/common/base.py b/deeppavlov/core/common/base.py
index 91067cae29..e18d548d05 100644
--- a/deeppavlov/core/common/base.py
+++ b/deeppavlov/core/common/base.py
@@ -55,21 +55,6 @@ def __init__(self, x: Optional[Union[str, list]] = None,
             out: Names of pipeline inference outputs.
             y: Names of additional inputs (targets) for pipeline training and evaluation.
             pipe: List of pipeline elements.
-
-
-        Example:
-            .. code:: python
-
-                >>> from deeppavlov.models.nemo.asr import NeMoASR
-                >>> from deeppavlov import Element, Model
-                >>> asr = NeMoASR(nemo_params_path="~/.deeppavlov/models/nemo/quartznet15x5/quartznet15x5.yaml",
-                                  load_path="~/.deeppavlov/models/nemo/quartznet15x5")
-                >>> upper = lambda batch: list(map(str.upper, batch))
-                >>> model = Model(x=["speech"],
-                                  out=["upper_text"],
-                                  pipe=[Element(asr, "speech", "text"), Element(upper, "text", "upper_text")])
-                >>> model(["8088-284756-0037.wav"])
-                ['I WALKED ALONG BRISKLY FOR PERHAPS FIVE MINUTES']
         """
         super().__init__(in_x=x, out_params=out, in_y=y)
         if pipe is not None:
diff --git a/deeppavlov/core/common/registry.json b/deeppavlov/core/common/registry.json
index a870e00a75..d30baf5da7 100644
--- a/deeppavlov/core/common/registry.json
+++ b/deeppavlov/core/common/registry.json
@@ -2,7 +2,6 @@
   "UD_pymorphy_lemmatizer": "deeppavlov.models.morpho_tagger.lemmatizer:UDPymorphyLemmatizer",
   "api_requester": "deeppavlov.models.api_requester.api_requester:ApiRequester",
   "api_router": "deeppavlov.models.api_requester.api_router:ApiRouter",
-  "base64_decode_bytesIO": "deeppavlov.models.nemo.common:ascii_to_bytes_io",
   "basic_classification_iterator": "deeppavlov.dataset_iterators.basic_classification_iterator:BasicClassificationDatasetIterator",
   "basic_classification_reader": "deeppavlov.dataset_readers.basic_classification_reader:BasicClassificationDatasetReader",
   "bert_classifier": "deeppavlov.models.bert.bert_classifier:BertClassifierModel",
@@ -20,7 +19,6 @@
   "bilstm_nn": "deeppavlov.models.ranking.bilstm_siamese_network:BiLSTMSiameseNetwork",
   "boolqa_reader": "deeppavlov.dataset_readers.boolqa_reader:BoolqaReader",
   "bow": "deeppavlov.models.embedders.bow_embedder:BoWEmbedder",
-  "bytesIO_encode_base64": "deeppavlov.models.nemo.common:bytes_io_to_ascii",
   "capitalization_featurizer": "deeppavlov.models.preprocessors.capitalization:CapitalizationPreprocessor",
   "char_splitter": "deeppavlov.models.preprocessors.char_splitter:CharSplitter",
   "char_splitting_lowercase_preprocessor": "deeppavlov.models.preprocessors.capitalization:CharSplittingLowercasePreprocessor",
@@ -89,8 +87,6 @@
   "multi_squad_retr_iterator": "deeppavlov.dataset_iterators.squad_iterator:MultiSquadRetrIterator",
   "multitask_iterator": "deeppavlov.dataset_iterators.multitask_iterator:MultiTaskIterator",
   "multitask_reader": "deeppavlov.dataset_readers.multitask_reader:MultiTaskReader",
-  "nemo_asr": "deeppavlov.models.nemo.asr:NeMoASR",
-  "nemo_tts": "deeppavlov.models.nemo.tts:NeMoTTS",
   "ner": "deeppavlov.models.ner.network:NerNetwork",
   "ner_bio_converter": "deeppavlov.models.ner.bio:BIOMarkupRestorer",
   "ner_chunker": "deeppavlov.models.kbqa.entity_linking:NerChunker",
diff --git a/deeppavlov/core/common/requirements_registry.json b/deeppavlov/core/common/requirements_registry.json
index 05b2350d75..6d092e721f 100644
--- a/deeppavlov/core/common/requirements_registry.json
+++ b/deeppavlov/core/common/requirements_registry.json
@@ -167,18 +167,12 @@
   "static_dictionary": [
     "{DEEPPAVLOV_PATH}/requirements/lxml.txt"
   ],
-  "base64_decode_bytesIO": [
-    "{DEEPPAVLOV_PATH}/requirements/nemo.txt"
-  ],
   "wikitionary_100K_vocab": [
     "{DEEPPAVLOV_PATH}/requirements/lxml.txt"
   ],
   "huggingface_dataset_iterator": [
     "{DEEPPAVLOV_PATH}/requirements/datasets.txt"
   ],
-  "bytesIO_encode_base64": [
-    "{DEEPPAVLOV_PATH}/requirements/nemo.txt"
-  ],
   "typos_custom_reader": [
     "{DEEPPAVLOV_PATH}/requirements/lxml.txt"
   ],
@@ -243,18 +237,6 @@
   "typos_kartaslov_reader": [
     "{DEEPPAVLOV_PATH}/requirements/lxml.txt"
   ],
-  "nemo_asr": [
-    "{DEEPPAVLOV_PATH}/requirements/pytorch14.txt",
-    "{DEEPPAVLOV_PATH}/requirements/nemo.txt",
-    "{DEEPPAVLOV_PATH}/requirements/nemo-asr.txt"
-  ],
-  "nemo_tts": [
-    "{DEEPPAVLOV_PATH}/requirements/pytorch14.txt",
-    "{DEEPPAVLOV_PATH}/requirements/nemo.txt",
-    "{DEEPPAVLOV_PATH}/requirements/nemo-asr.txt",
-    "{DEEPPAVLOV_PATH}/requirements/transformers28.txt",
-    "{DEEPPAVLOV_PATH}/requirements/nemo-tts.txt"
-  ],
   "spelling_error_model": [
     "{DEEPPAVLOV_PATH}/requirements/lxml.txt"
   ],
diff --git a/deeppavlov/models/nemo/__init__.py b/deeppavlov/models/nemo/__init__.py
deleted file mode 100644
index e69de29bb2..0000000000
diff --git a/deeppavlov/models/nemo/asr.py b/deeppavlov/models/nemo/asr.py
deleted file mode 100644
index 70527adea3..0000000000
--- a/deeppavlov/models/nemo/asr.py
+++ /dev/null
@@ -1,193 +0,0 @@
-# Copyright 2020 Neural Networks and Deep Learning lab, MIPT
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import logging
-from io import BytesIO
-from pathlib import Path
-from typing import List, Optional, Tuple, Union, Dict
-
-import torch
-from nemo.collections.asr import AudioToMelSpectrogramPreprocessor, JasperEncoder, JasperDecoderForCTC, GreedyCTCDecoder
-from nemo.collections.asr.helpers import post_process_predictions
-from nemo.collections.asr.parts.features import WaveformFeaturizer
-from nemo.core.neural_types import AudioSignal, NeuralType, LengthsType
-from nemo.utils.decorators import add_port_docs
-from torch import Tensor
-from torch.utils.data import Dataset, DataLoader
-
-from deeppavlov.core.common.registry import register
-from deeppavlov.models.nemo.common import CustomDataLayerBase, NeMoBase
-
-log = logging.getLogger(__name__)
-
-
-class AudioInferDataset(Dataset):
-    def __init__(self, audio_batch: List[Union[str, BytesIO]], sample_rate: int, int_values: bool, trim=False) -> None:
-        """Dataset reader for AudioInferDataLayer.
-
-        Args:
-            audio_batch: Batch to be read. Elements could be either paths to audio files or Binary I/O objects.
-            sample_rate: Audio files sample rate.
-            int_values: If true, load samples as 32-bit integers.
-            trim: Trim leading and trailing silence from an audio signal if True.
-
-        """
-        self.audio_batch = audio_batch
-        self.featurizer = WaveformFeaturizer(sample_rate=sample_rate, int_values=int_values)
-        self.trim = trim
-
-    def __getitem__(self, index: int) -> Tuple[Tensor, Tensor]:
-        """Processes audio batch item and extracts features.
-
-        Args:
-            index: Audio batch item index.
-
-        Returns:
-            features: Audio file's extracted features tensor.
-            features_length: Features length tensor.
-
-        """
-        sample = self.audio_batch[index]
-        features = self.featurizer.process(sample, trim=self.trim)
-        features_length = torch.tensor(features.shape[0]).long()
-
-        return features, features_length
-
-    def __len__(self) -> int:
-        return len(self.audio_batch)
-
-
-class AudioInferDataLayer(CustomDataLayerBase):
-    """Data Layer for ASR pipeline inference."""
-
-    @property
-    @add_port_docs()
-    def output_ports(self) -> Dict[str, NeuralType]:
-        return {
-            "audio_signal": NeuralType(('B', 'T'), AudioSignal(freq=self._sample_rate)),
-            "a_sig_length": NeuralType(tuple('B'), LengthsType())
-        }
-
-    def __init__(self, *,
-                 audio_batch: List[Union[str, BytesIO]],
-                 batch_size: int = 32,
-                 sample_rate: int = 16000,
-                 int_values: bool = False,
-                 trim_silence: bool = False,
-                 **kwargs) -> None:
-        """Initializes Data Loader.
-
-        Args:
-            audio_batch: Batch to be read. Elements could be either paths to audio files or Binary I/O objects.
-            batch_size: How many samples per batch to load.
-            sample_rate: Target sampling rate for data. Audio files will be resampled to sample_rate if
-                it is not already.
-            int_values: If true, load data as 32-bit integers.
-            trim_silence: Trim leading and trailing silence from an audio signal if True.
-
-        """
-        self._sample_rate = sample_rate
-
-        dataset = AudioInferDataset(audio_batch=audio_batch, sample_rate=sample_rate, int_values=int_values,
-                                    trim=trim_silence)
-
-        dataloader = DataLoader(dataset=dataset, batch_size=batch_size, collate_fn=self.seq_collate_fn)
-        super(AudioInferDataLayer, self).__init__(dataset, dataloader, **kwargs)
-
-    @staticmethod
-    def seq_collate_fn(batch: Tuple[Tuple[Tensor], Tuple[Tensor]]) -> Tuple[Optional[Tensor], Optional[Tensor]]:
-        """Collates batch of audio signal and audio length, zero pads audio signal.
-
-        Args:
-            batch: A tuple of tuples of audio signals and signal lengths. This collate function assumes the signals
-                are 1d torch tensors (i.e. mono audio).
-
-        Returns:
-            audio_signal: Zero padded audio signal tensor.
-            audio_length: Audio signal length tensor.
-
-        """
-        _, audio_lengths = zip(*batch)
-        max_audio_len = 0
-        has_audio = audio_lengths[0] is not None
-        if has_audio:
-            max_audio_len = max(audio_lengths).item()
-
-        audio_signal = []
-        for sig, sig_len in batch:
-            if has_audio:
-                sig_len = sig_len.item()
-                if sig_len < max_audio_len:
-                    pad = (0, max_audio_len - sig_len)
-                    sig = torch.nn.functional.pad(sig, pad)
-                audio_signal.append(sig)
-
-        if has_audio:
-            audio_signal = torch.stack(audio_signal)
-            audio_lengths = torch.stack(audio_lengths)
-        else:
-            audio_signal, audio_lengths = None, None
-
-        return audio_signal, audio_lengths
-
-
-@register('nemo_asr')
-class NeMoASR(NeMoBase):
-    """ASR model on NeMo modules."""
-
-    def __init__(self, load_path: Union[str, Path], nemo_params_path: Union[str, Path], **kwargs) -> None:
-        """Initializes NeuralModules for ASR.
-
-        Args:
-            load_path: Path to a directory with pretrained checkpoints for JasperEncoder and JasperDecoderForCTC.
-            nemo_params_path: Path to a file containig labels and params for AudioToMelSpectrogramPreprocessor,
-                JasperEncoder, JasperDecoderForCTC and AudioInferDataLayer.
-
-        """
-        super(NeMoASR, self).__init__(load_path=load_path, nemo_params_path=nemo_params_path, **kwargs)
-
-        self.labels = self.nemo_params['labels']
-
-        self.data_preprocessor = AudioToMelSpectrogramPreprocessor(
-            **self.nemo_params['AudioToMelSpectrogramPreprocessor']
-        )
-        self.jasper_encoder = JasperEncoder(**self.nemo_params['JasperEncoder'])
-        self.jasper_decoder = JasperDecoderForCTC(num_classes=len(self.labels), **self.nemo_params['JasperDecoder'])
-        self.greedy_decoder = GreedyCTCDecoder()
-        self.modules_to_restore = [self.jasper_encoder, self.jasper_decoder]
-
-        self.load()
-
-    def __call__(self, audio_batch: List[Union[str, BytesIO]]) -> List[str]:
-        """Transcripts audio batch to text.
-
-        Args:
-            audio_batch: Batch to be transcribed. Elements could be either paths to audio files or Binary I/O objects.
-
-        Returns:
-            text_batch: Batch of transcripts.
-
-        """
-        data_layer = AudioInferDataLayer(audio_batch=audio_batch, **self.nemo_params['AudioToTextDataLayer'])
-        audio_signal, audio_signal_len = data_layer()
-        processed_signal, processed_signal_len = self.data_preprocessor(input_signal=audio_signal,
-                                                                        length=audio_signal_len)
-        encoded, encoded_len = self.jasper_encoder(audio_signal=processed_signal, length=processed_signal_len)
-        log_probs = self.jasper_decoder(encoder_output=encoded)
-        predictions = self.greedy_decoder(log_probs=log_probs)
-        eval_tensors = [predictions]
-        tensors = self.neural_factory.infer(tensors=eval_tensors)
-        text_batch = post_process_predictions(tensors[0], self.labels)
-
-        return text_batch
diff --git a/deeppavlov/models/nemo/common.py b/deeppavlov/models/nemo/common.py
deleted file mode 100644
index 883483c5d6..0000000000
--- a/deeppavlov/models/nemo/common.py
+++ /dev/null
@@ -1,117 +0,0 @@
-# Copyright 2020 Neural Networks and Deep Learning lab, MIPT
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import base64
-from io import BytesIO
-from logging import getLogger
-from pathlib import Path
-from typing import Union
-
-import nemo
-import torch
-from nemo.backends.pytorch import DataLayerNM
-from torch.utils.data import Dataset, DataLoader
-
-from deeppavlov.core.commands.utils import expand_path
-from deeppavlov.core.common.file import read_yaml
-from deeppavlov.core.common.registry import register
-from deeppavlov.core.models.component import Component
-from deeppavlov.core.models.serializable import Serializable
-
-log = getLogger(__name__)
-
-
-@register('base64_decode_bytesIO')
-def ascii_to_bytes_io(batch: Union[str, list]) -> Union[BytesIO, list]:
-    """Recursively searches for strings in the input batch and converts them into the base64-encoded bytes wrapped in
-    Binary I/O objects.
-
-    Args:
-        batch: A string or an iterable container with strings at some level of nesting.
-
-    Returns:
-        The same structure where all strings are converted into the base64-encoded bytes wrapped in Binary I/O objects.
-
-    """
-    if isinstance(batch, str):
-        return BytesIO(base64.decodebytes(batch.encode()))
-
-    return list(map(ascii_to_bytes_io, batch))
-
-
-@register('bytesIO_encode_base64')
-def bytes_io_to_ascii(batch: Union[BytesIO, list]) -> Union[str, list]:
-    """Recursively searches for Binary I/O objects in the input batch and converts them into ASCII-strings.
-
-    Args:
-        batch: A BinaryIO object or an iterable container with BinaryIO objects at some level of nesting.
-
-    Returns:
-        The same structure where all BinaryIO objects are converted into strings.
-
-    """
-    if isinstance(batch, BytesIO):
-        return base64.encodebytes(batch.read()).decode('ascii')
-
-    return list(map(bytes_io_to_ascii, batch))
-
-
-class NeMoBase(Component, Serializable):
-    """Base class for NeMo Chainer's pipeline components."""
-
-    def __init__(self, load_path: Union[str, Path], nemo_params_path: Union[str, Path], **kwargs) -> None:
-        """Initializes NeuralModuleFactory on CPU or GPU and reads nemo modules params from yaml.
-
-        Args:
-            load_path: Path to a directory with pretrained checkpoints for NeMo modules.
-            nemo_params_path: Path to a file containig NeMo modules params.
-
-        """
-        super(NeMoBase, self).__init__(save_path=None, load_path=load_path, **kwargs)
-        placement = nemo.core.DeviceType.GPU if torch.cuda.is_available() else nemo.core.DeviceType.CPU
-        self.neural_factory = nemo.core.NeuralModuleFactory(placement=placement)
-        self.modules_to_restore = []
-        self.nemo_params = read_yaml(expand_path(nemo_params_path))
-
-    def __call__(self, *args, **kwargs):
-        raise NotImplementedError
-
-    def load(self) -> None:
-        """Loads pretrained checkpoints for modules from self.modules_to_restore list."""
-        module_names = [str(module) for module in self.modules_to_restore]
-        checkpoints = nemo.utils.get_checkpoint_from_dir(module_names, self.load_path)
-        for module, checkpoint in zip(self.modules_to_restore, checkpoints):
-            log.info(f'Restoring {module} from {checkpoint}')
-            module.restore_from(checkpoint)
-
-    def save(self, *args, **kwargs) -> None:
-        pass
-
-
-class CustomDataLayerBase(DataLayerNM):
-    def __init__(self, dataset: Dataset, dataloader: DataLoader, **kwargs) -> None:
-        super(CustomDataLayerBase, self).__init__()
-        self._dataset = dataset
-        self._dataloader = dataloader
-
-    def __len__(self) -> int:
-        return len(self._dataset)
-
-    @property
-    def dataset(self) -> None:
-        return None
-
-    @property
-    def data_iterator(self) -> torch.utils.data.DataLoader:
-        return self._dataloader
diff --git a/deeppavlov/models/nemo/tts.py b/deeppavlov/models/nemo/tts.py
deleted file mode 100644
index d31fa0bcfb..0000000000
--- a/deeppavlov/models/nemo/tts.py
+++ /dev/null
@@ -1,210 +0,0 @@
-# Copyright 2020 Neural Networks and Deep Learning lab, MIPT
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from functools import partial
-from io import BytesIO
-from logging import getLogger
-from pathlib import Path
-from typing import List, Optional, Tuple, Union, Dict
-
-import torch
-from nemo.collections.asr.parts import collections, parsers
-from nemo.collections.asr.parts.dataset import TranscriptDataset
-from nemo.collections.tts import TextEmbedding, Tacotron2Encoder, Tacotron2DecoderInfer, Tacotron2Postnet
-from nemo.core.neural_types import NeuralType, LabelsType, LengthsType
-from nemo.utils.decorators import add_port_docs
-from nemo.utils.misc import pad_to
-from scipy.io import wavfile
-from torch import Tensor
-
-from deeppavlov.core.commands.utils import expand_path
-from deeppavlov.core.common.registry import register
-from deeppavlov.models.nemo.common import CustomDataLayerBase, NeMoBase
-from deeppavlov.models.nemo.vocoder import WaveGlow, GriffinLim
-
-log = getLogger(__name__)
-
-
-class TextDataset(TranscriptDataset):
-    def __init__(self,
-                 text_batch: List[str],
-                 labels: List[str],
-                 bos_id: Optional[int] = None,
-                 eos_id: Optional[int] = None,
-                 lowercase: bool = True) -> None:
-        """Text dataset reader for TextDataLayer.
-
-        Args:
-            text_batch: Texts to be used for speech synthesis.
-            labels: List of string labels to use when to str2int translation.
-            bos_id: Label position of beginning of string symbol.
-            eos_id: Label position of end of string symbol.
-            lowercase: Whether to convert all uppercase characters in a text batch into lowercase characters.
-
-        """
-        parser = parsers.make_parser(labels, do_lowercase=lowercase)
-        self.texts = collections.Text(text_batch, parser)
-        self.bos_id = bos_id
-        self.eos_id = eos_id
-
-
-class TextDataLayer(CustomDataLayerBase):
-    @property
-    @add_port_docs()
-    def output_ports(self) -> Dict[str, NeuralType]:
-        return {
-            'texts': NeuralType(('B', 'T'), LabelsType()),
-            "texts_length": NeuralType(tuple('B'), LengthsType())
-        }
-
-    def __init__(self, *,
-                 text_batch: List[str],
-                 labels: List[str],
-                 batch_size: int = 32,
-                 bos_id: Optional[int] = None,
-                 eos_id: Optional[int] = None,
-                 pad_id: Optional[int] = None,
-                 **kwargs) -> None:
-        """A simple Neural Module for loading text data.
-
-        Args:
-            text_batch: Texts to be used for speech synthesis.
-            labels: List of string labels to use when to str2int translation.
-            batch_size: How many strings per batch to load.
-            bos_id: Label position of beginning of string symbol. If None is initialized as `len(labels)`.
-            eos_id: Label position of end of string symbol. If None is initialized as `len(labels) + 1`.
-            pad_id: Label position of pad symbol. If None is initialized as `len(labels) + 2`.
-
-        """
-        len_labels = len(labels)
-        if bos_id is None:
-            bos_id = len_labels
-        if eos_id is None:
-            eos_id = len_labels + 1
-        if pad_id is None:
-            pad_id = len_labels + 2
-
-        dataset = TextDataset(text_batch=text_batch, labels=labels, bos_id=bos_id, eos_id=eos_id)
-
-        dataloader = torch.utils.data.DataLoader(dataset=dataset, batch_size=batch_size,
-                                                 collate_fn=partial(self._collate_fn, pad_id=pad_id))
-        super(TextDataLayer, self).__init__(dataset, dataloader, **kwargs)
-
-    @staticmethod
-    def _collate_fn(batch: Tuple[Tuple[Tensor], Tuple[Tensor]], pad_id: int) -> Tuple[Tensor, Tensor]:
-        """Collates batch of texts.
-
-        Args:
-            batch: A tuple of tuples of audio signals and signal lengths.
-            pad_id: Label position of pad symbol.
-
-        Returns:
-            texts: Padded texts tensor.
-            texts_len: Text lengths tensor.
-
-        """
-        texts_list, texts_len = zip(*batch)
-        max_len = max(texts_len)
-        max_len = pad_to(max_len, 8)
-
-        texts = torch.empty(len(texts_list), max_len, dtype=torch.long)
-        texts.fill_(pad_id)
-
-        for i, text in enumerate(texts_list):
-            texts[i].narrow(0, 0, text.size(0)).copy_(text)
-
-        if len(texts.shape) != 2:
-            raise ValueError(f'Texts in collate function have shape {texts.shape}, should have 2 dimensions.')
-
-        return texts, torch.stack(texts_len)
-
-
-@register('nemo_tts')
-class NeMoTTS(NeMoBase):
-    """TTS model on NeMo modules."""
-    def __init__(self,
-                 load_path: Union[str, Path],
-                 nemo_params_path: Union[str, Path],
-                 vocoder: str = 'waveglow',
-                 **kwargs) -> None:
-        """Initializes NeuralModules for TTS.
-
-        Args:
-            load_path: Path to a directory with pretrained checkpoints for TextEmbedding, Tacotron2Encoder,
-                Tacotron2DecoderInfer, Tacotron2Postnet and, if Waveglow vocoder is selected, WaveGlowInferNM.
-            nemo_params_path: Path to a file containig sample_rate, labels and params for TextEmbedding,
-                Tacotron2Encoder, Tacotron2Decoder, Tacotron2Postnet and TranscriptDataLayer.
-            vocoder: Vocoder used to convert from spectrograms to audio. Available options: `waveglow` (needs pretrained
-                checkpoint) and `griffin-lim`.
-
-        """
-        super(NeMoTTS, self).__init__(load_path=load_path, nemo_params_path=nemo_params_path, **kwargs)
-
-        self.sample_rate = self.nemo_params['sample_rate']
-        self.text_embedding = TextEmbedding(
-            len(self.nemo_params['labels']) + 3,  # + 3 special chars
-            **self.nemo_params['TextEmbedding']
-        )
-        self.t2_enc = Tacotron2Encoder(**self.nemo_params['Tacotron2Encoder'])
-        self.t2_dec = Tacotron2DecoderInfer(**self.nemo_params['Tacotron2Decoder'])
-        self.t2_postnet = Tacotron2Postnet(**self.nemo_params['Tacotron2Postnet'])
-        self.modules_to_restore = [self.text_embedding, self.t2_enc, self.t2_dec, self.t2_postnet]
-
-        if vocoder == 'waveglow':
-            self.vocoder = WaveGlow(**self.nemo_params['WaveGlowNM'])
-            self.modules_to_restore.append(self.vocoder)
-        elif vocoder == 'griffin-lim':
-            self.vocoder = GriffinLim(**self.nemo_params['GriffinLim'])
-        else:
-            raise ValueError(f'{vocoder} vocoder is not supported.')
-
-        self.load()
-
-    def __call__(self,
-                 text_batch: List[str],
-                 path_batch: Optional[List[str]] = None) -> Union[List[BytesIO], List[str]]:
-        """Creates wav files or file objects with speech.
-
-        Args:
-            text_batch: Text from which human audible speech should be generated.
-            path_batch: i-th element of `path_batch` is the path to save i-th generated speech file. If argument isn't
-                specified, the synthesized speech will be stored to Binary I/O objects.
-
-        Returns:
-            List of Binary I/O objects with generated speech if `path_batch` was not specified, list of paths to files
-                with synthesized speech otherwise.
-
-        """
-        if path_batch is None:
-            path_batch = [BytesIO() for _ in text_batch]
-        elif len(text_batch) != len(path_batch):
-            raise ValueError('Text batch length differs from path batch length.')
-        else:
-            path_batch = [expand_path(path) for path in path_batch]
-
-        data_layer = TextDataLayer(text_batch=text_batch, **self.nemo_params['TranscriptDataLayer'])
-        transcript, transcript_len = data_layer()
-        transcript_embedded = self.text_embedding(char_phone=transcript)
-        transcript_encoded = self.t2_enc(char_phone_embeddings=transcript_embedded, embedding_length=transcript_len)
-        mel_decoder, gate, alignments, mel_len = self.t2_dec(char_phone_encoded=transcript_encoded,
-                                                             encoded_length=transcript_len)
-        mel_postnet = self.t2_postnet(mel_input=mel_decoder)
-        infer_tensors = [self.vocoder(mel_postnet), mel_len]
-        evaluated_tensors = self.neural_factory.infer(tensors=infer_tensors)
-        synthesized_batch = self.vocoder.get_audio(*evaluated_tensors)
-
-        for fout, synthesized_audio in zip(path_batch, synthesized_batch):
-            wavfile.write(fout, self.sample_rate, synthesized_audio)
-
-        return path_batch
diff --git a/deeppavlov/models/nemo/vocoder.py b/deeppavlov/models/nemo/vocoder.py
deleted file mode 100644
index 3ec918d266..0000000000
--- a/deeppavlov/models/nemo/vocoder.py
+++ /dev/null
@@ -1,131 +0,0 @@
-# Copyright 2020 Neural Networks and Deep Learning lab, MIPT
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from logging import getLogger
-from typing import List
-
-import librosa
-import numpy as np
-from nemo.core.neural_types import NmTensor
-from nemo.collections.tts import WaveGlowInferNM
-from numpy import ndarray
-
-log = getLogger(__name__)
-
-
-class BaseVocoder:
-    """Class is used to maintain consistency in the construction of the TTS pipeline based on NeMo modules."""
-
-    def __call__(self, tensor: NmTensor) -> NmTensor:
-        """Should return the tensor after the evaluation of which speech could be synthesized with `get_audio` method"""
-        raise NotImplementedError
-
-    def get_audio(self, evaluated_tensor: list, mel_len: list):
-        """Synthesizes audio from the evaluated tensor constructed by `__call__` method."""
-        raise NotImplementedError
-
-
-class WaveGlow(BaseVocoder):
-    def __init__(self, *, denoiser_strength: float = 0.0, n_window_stride: int = 160, **kwargs) -> None:
-        """Wraps WaveGlowInferNM module.
-
-        Args:
-            denoiser_strength: Denoiser strength for waveglow.
-            n_window_stride: Stride of window for FFT in samples used in model training.
-            kwargs: Named arguments for WaveGlowInferNM constructor.
-
-        """
-        self.waveglow = WaveGlowInferNM(**kwargs)
-        self.denoiser_strength = denoiser_strength
-        self.n_window_stride = n_window_stride
-
-    def __call__(self, mel_postnet: NmTensor) -> NmTensor:
-        return self.waveglow(mel_spectrogram=mel_postnet)
-
-    def __str__(self):
-        return str(self.waveglow)
-
-    def restore_from(self, path: str) -> None:
-        """Wraps WaveGlowInferNM restore_from method."""
-        self.waveglow.restore_from(path)
-        if self.denoiser_strength > 0:
-            log.info('Setup denoiser for WaveGlow')
-            self.waveglow.setup_denoiser()
-
-    def get_audio(self, evaluated_audio: list, mel_len: list) -> List[ndarray]:
-        """Unpacks audio data from evaluated tensor and denoises it if `denoiser_strength` > 0."""
-        audios = []
-        for i, batch in enumerate(evaluated_audio):
-            audio = batch.cpu().numpy()
-            for j, sample in enumerate(audio):
-                sample_len = mel_len[i][j] * self.n_window_stride
-                sample = sample[:sample_len]
-                if self.denoiser_strength > 0:
-                    sample, _ = self.waveglow.denoise(sample, strength=self.denoiser_strength)
-                audios.append(sample)
-        return audios
-
-
-class GriffinLim(BaseVocoder):
-    def __init__(self, *,
-                 sample_rate: float = 16000.0,
-                 n_fft: int = 1024,
-                 mag_scale: float = 2048.0,
-                 power: float = 1.2,
-                 n_iters: int = 50,
-                 **kwargs) -> None:
-        """Uses Griffin Lim algorithm to generate speech from spectrograms.
-
-        Args:
-            sample_rate:  Generated audio data sample rate.
-            n_fft: The number of points to use for the FFT.
-            mag_scale: Multiplied with the linear spectrogram to avoid audio sounding muted due to mel filter
-                normalization.
-            power: The linear spectrogram is raised to this power prior to running the Griffin Lim algorithm. A power
-                of greater than 1 has been shown to improve audio quality.
-            n_iters: Number of iterations of convertion magnitude spectrograms to audio signal.
-
-        """
-        self.mag_scale = mag_scale
-        self.power = power
-        self.n_iters = n_iters
-        self.n_fft = n_fft
-        self.filterbank = librosa.filters.mel(sr=sample_rate, n_fft=n_fft, **kwargs)
-
-    def __call__(self, mel_postnet: NmTensor) -> NmTensor:
-        return mel_postnet
-
-    def get_audio(self, mel_spec: list, mel_len: list) -> List[ndarray]:
-        audios = []
-        for i, batch in enumerate(mel_spec):
-            log_mel = batch.cpu().numpy().transpose(0, 2, 1)
-            mel = np.exp(log_mel)
-            magnitudes = np.dot(mel, self.filterbank) * self.mag_scale
-            for j, sample in enumerate(magnitudes):
-                sample = sample[:mel_len[i][j], :]
-                audio = self.griffin_lim(sample.T ** self.power)
-                audios.append(audio)
-        return audios
-
-    def griffin_lim(self, magnitudes):
-        """Griffin-Lim algorithm to convert magnitude spectrograms to audio signals."""
-        phase = np.exp(2j * np.pi * np.random.rand(*magnitudes.shape))
-        complex_spec = magnitudes * phase
-        signal = librosa.istft(complex_spec)
-
-        for _ in range(self.n_iters):
-            _, phase = librosa.magphase(librosa.stft(signal, n_fft=self.n_fft))
-            complex_spec = magnitudes * phase
-            signal = librosa.istft(complex_spec)
-        return signal
diff --git a/deeppavlov/requirements/nemo-asr.txt b/deeppavlov/requirements/nemo-asr.txt
deleted file mode 100644
index 1a072b36b7..0000000000
--- a/deeppavlov/requirements/nemo-asr.txt
+++ /dev/null
@@ -1,7 +0,0 @@
-frozendict==1.2
-kaldi-io==0.9.4
-inflect==4.1.0
-unidecode==1.1.1
-librosa==0.7.2
-torch-stft==0.1.4
-numba==0.48
\ No newline at end of file
diff --git a/deeppavlov/requirements/nemo-tts.txt b/deeppavlov/requirements/nemo-tts.txt
deleted file mode 100644
index a0f3139b34..0000000000
--- a/deeppavlov/requirements/nemo-tts.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-matplotlib==3.2.1
-sentencepiece==0.1.85
-youtokentome==1.0.6
\ No newline at end of file
diff --git a/deeppavlov/requirements/nemo.txt b/deeppavlov/requirements/nemo.txt
deleted file mode 100644
index e6f8ff402a..0000000000
--- a/deeppavlov/requirements/nemo.txt
+++ /dev/null
@@ -1 +0,0 @@
-nemo-toolkit==0.10.0
\ No newline at end of file
diff --git a/deeppavlov/requirements/pytorch14.txt b/deeppavlov/requirements/pytorch14.txt
deleted file mode 100644
index f940e921a8..0000000000
--- a/deeppavlov/requirements/pytorch14.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-torch==1.4.0
-torchvision==0.5.0
\ No newline at end of file
diff --git a/docs/apiref/models/nemo.rst b/docs/apiref/models/nemo.rst
deleted file mode 100644
index 27c2054336..0000000000
--- a/docs/apiref/models/nemo.rst
+++ /dev/null
@@ -1,32 +0,0 @@
-deeppavlov.models.nemo
-======================
-
-.. autoclass:: deeppavlov.models.nemo.asr.NeMoASR
-
-    .. automethod:: __init__
-    .. automethod:: __call__
-
-.. autoclass:: deeppavlov.models.nemo.tts.NeMoTTS
-
-    .. automethod:: __init__
-    .. automethod:: __call__
-
-.. autofunction:: deeppavlov.models.nemo.common.ascii_to_bytes_io
-
-.. autofunction:: deeppavlov.models.nemo.common.bytes_io_to_ascii
-
-.. autoclass:: deeppavlov.models.nemo.asr.AudioInferDataLayer
-
-    .. automethod:: __init__
-
-.. autoclass:: deeppavlov.models.nemo.tts.TextDataLayer
-
-    .. automethod:: __init__
-
-.. autoclass:: deeppavlov.models.nemo.vocoder.WaveGlow
-
-    .. automethod:: __init__
-
-.. autoclass:: deeppavlov.models.nemo.vocoder.GriffinLim
-
-    .. automethod:: __init__
diff --git a/docs/conf.py b/docs/conf.py
index b3a4f11237..bf2c5039b1 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -190,10 +190,10 @@
 
 # -- Extension configuration -------------------------------------------------
 
-autodoc_mock_imports = ['bert_dp', 'bs4', 'faiss', 'fastText', 'fasttext', 'gensim', 'hdt', 'kenlm', 'librosa',
-                        'lxml', 'nemo', 'nemo_asr', 'nemo_tts', 'nltk', 'opt_einsum', 'rapidfuzz', 'rasa',
-                        'russian_tagsets', 'sacremoses', 'sortedcontainers', 'spacy', 'tensorflow', 'tensorflow_hub',
-                        'torch', 'transformers', 'udapi', 'ufal_udpipe', 'whapi', 'xeger']
+autodoc_mock_imports = ['bert_dp', 'bs4', 'faiss', 'fastText', 'fasttext', 'gensim', 'hdt', 'kenlm', 'librosa', 'lxml',
+                        'nltk', 'opt_einsum', 'rapidfuzz', 'rasa', 'russian_tagsets', 'sacremoses', 'sortedcontainers',
+                        'spacy', 'tensorflow', 'tensorflow_hub', 'torch', 'transformers', 'udapi', 'ufal_udpipe',
+                        'whapi', 'xeger']
 
 extlinks = {
     'config': (f'https://github.com/deepmipt/DeepPavlov/blob/{release}/deeppavlov/configs/%s', None)
diff --git a/docs/features/models/nemo.rst b/docs/features/models/nemo.rst
deleted file mode 100644
index bfa3bd4421..0000000000
--- a/docs/features/models/nemo.rst
+++ /dev/null
@@ -1,164 +0,0 @@
-Speech recognition and synthesis (ASR and TTS)
-==============================================
-
-DeepPavlov contains models for automatic speech recognition (ASR) and text synthesis (TTS) based on pre-build modules
-from `NeMo <https://nvidia.github.io/NeMo/index.html>`__ (v0.10.0) - NVIDIA toolkit for defining and building
-Conversational AI applications. Named arguments for modules initialization are taken from the NeMo config file (please
-do not confuse with the DeepPavlov config file that defines model pipeline).
-
-Speech recognition
-------------------
-
-The ASR pipeline is based on Jasper: an CTC-based end-to-end model. The model transcripts speech samples without
-any additional alignment information. :class:`~deeppavlov.models.nemo.asr.NeMoASR` contains following modules:
-
--  `AudioToMelSpectrogramPreprocessor <https://github.com/NVIDIA/NeMo/blob/v0.10.0/nemo/collections/asr/audio_preprocessing.py>`_ - uses arguments from ``AudioToMelSpectrogramPreprocessor`` section of the NeMo config file.
--  `JasperEncoder <https://nvidia.github.io/NeMo/collections/nemo_asr.html#nemo.collections.asr.jasper.JasperEncoder>`__ - uses arguments from ``JasperEncoder`` section of the NeMo config file. Needs pretrained checkpoint.
--  `JasperDecoderForCTC <https://nvidia.github.io/NeMo/collections/nemo_asr.html#nemo.collections.asr.jasper.JasperDecoderForCTC>`__ - uses arguments from ``JasperDecoder`` section of the NeMo config file. Needs pretrained checkpoint.
--  `GreedyCTCDecoder <https://github.com/NVIDIA/NeMo/blob/v0.10.0/nemo/collections/asr/greedy_ctc_decoder.py>`__ - doesn't use any arguments.
--  :class:`~deeppavlov.models.nemo.asr.AudioInferDataLayer` - uses arguments from ``AudioToTextDataLayer`` section of the NeMo config file.
-
-NeMo config file for ASR should contain ``labels`` argument besides named arguments for the modules above. ``labels`` is
-a list of characters that can be output by the ASR model used in model training.
-
-Speech synthesis
-----------------
-
-The TTS pipeline that creates human audible speech from text is based on Tacotron 2 and Waveglow models.
-:class:`~deeppavlov.models.nemo.tts.NeMoTTS` contains following modules:
-
--  `TextEmbedding <https://nvidia.github.io/NeMo/collections/nemo_tts.html#nemo.collections.tts.tacotron2_modules.TextEmbedding>`__ - uses arguments from ``TextEmbedding`` section of the NeMo config file. Needs pretrained checkpoint.
--  `Tacotron2Encoder <https://nvidia.github.io/NeMo/collections/nemo_tts.html#nemo.collections.tts.tacotron2_modules.Tacotron2Encoder>`__ - uses arguments from ``Tacotron2Encoder`` section of the NeMo config file. Needs pretrained checkpoint.
--  `Tacotron2DecoderInfer <https://nvidia.github.io/NeMo/collections/nemo_tts.html#nemo.collections.tts.tacotron2_modules.Tacotron2Decoder>`__ - uses arguments from ``Tacotron2Decoder`` section of the NeMo config file. Needs pretrained checkpoint.
--  `Tacotron2Postnet <https://nvidia.github.io/NeMo/collections/nemo_tts.html#nemo.collections.tts.tacotron2_modules.Tacotron2Postnet>`__ - uses arguments from ``Tacotron2Postnet`` section of the NeMo config file. Needs pretrained checkpoint.
--  :class:`~deeppavlov.models.nemo.vocoder.WaveGlow` - uses arguments from ``WaveGlowNM`` section of the NeMo config file. Needs pretrained checkpoint.
--  :class:`~deeppavlov.models.nemo.vocoder.GriffinLim` - uses arguments from ``GriffinLim`` section of the NeMo config file.
--  :class:`~deeppavlov.models.nemo.tts.TextDataLayer` - uses arguments from ``TranscriptDataLayer`` section of the NeMo config file.
-
-NeMo config file for TTS should contain ``labels`` and ``sample_rate`` args besides named arguments for the modules
-above. ``labels`` is a list of characters used in TTS model training.
-
-Audio encoding end decoding.
-----------------------------
-
-:func:`~deeppavlov.models.nemo.common.ascii_to_bytes_io` and :func:`~deeppavlov.models.nemo.common.bytes_io_to_ascii`
-was added to the library to achieve uniformity at work with both text and audio data. Components can be used to encode
-binary data to ascii string and decode back.
-
-Quck Start
-----------
-
-Preparation
-~~~~~~~~~~~
-
-Install requirements and download model files.
-
-.. code:: bash
-
-    python -m deeppavlov install asr_tts
-    python -m deeppavlov download asr_tts
-
-Examples below use `sounddevice <https://python-sounddevice.readthedocs.io/en/0.3.15/index.html>`_ library. Install
-it with ``pip install sounddevice==0.3.15``. You may need to install ``libportaudio2`` package with
-``sudo apt-get install libportaudio2`` to make ``sounddevice`` work.
-
-.. note::
-    ASR reads and TTS generates single channel WAV files. Files transferred to ASR are resampled to the frequency
-    specified in the NeMo config file (16 kHz for models from DeepPavlov configs).
-
-Speech recognition
-~~~~~~~~~~~~~~~~~~
-
-DeepPavlov :config:`asr <nemo/asr.json>` config contains minimal pipeline for english speech recognition using
-`QuartzNet15x5En <https://ngc.nvidia.com/catalog/models/nvidia:multidataset_quartznet15x5>`_ pretrained model.
-To record speech on your computer and print transcription run following script:
-
-.. code:: python
-
-    from io import BytesIO
-
-    import sounddevice as sd
-    from scipy.io.wavfile import write
-
-    from deeppavlov import build_model, configs
-
-    sr = 16000
-    duration = 3
-
-    print('Recording...')
-    myrecording = sd.rec(duration*sr, samplerate=sr, channels=1)
-    sd.wait()
-    print('done')
-
-    out = BytesIO()
-    write(out, sr, myrecording)
-
-    model = build_model(configs.nemo.asr)
-    text_batch = model([out])
-
-    print(text_batch[0])
-
-Speech synthesis
-~~~~~~~~~~~~~~~~
-
-DeepPavlov :config:`tts <nemo/tts.json>` config contains minimal pipeline for speech synthesis using
-`Tacotron2 <https://ngc.nvidia.com/catalog/models/nvidia:tacotron2_ljspeech>`_ and
-`WaveGlow <https://ngc.nvidia.com/catalog/models/nvidia:waveglow_ljspeech>`_ pretrained models.
-To generate audiofile and save it to hard drive run following script:
-
-.. code:: python
-
-    from deeppavlov import build_model, configs
-
-    model = build_model(configs.nemo.tts)
-    filepath_batch = model(['Hello world'], ['~/hello_world.wav'])
-
-    print(f'Generated speech has successfully saved at {filepath_batch[0]}')
-
-Speech to speech
-~~~~~~~~~~~~~~~~
-
-Previous examples assume files with speech to recognize and files to be generated are on the same system where the
-DeepPavlov is running. DeepPavlov :config:`asr_tts <nemo/asr_tts.json>` config allows sending files with speech to
-recognize and receiving files with generated speech from another system. This config is recognizes received speech and
-re-sounds it.
-
-Run ``asr_tts`` in REST Api mode:
-
-.. code:: bash
-
-    python -m deeppavlov riseapi asr_tts
-
-This python script supposes that you already have file with speech to recognize. You can use code from speech
-recognition example to record speech on your system. ``127.0.0.1`` should be replased by address of system where
-DeepPavlov has started.
-
-.. code:: python
-
-    from base64 import encodebytes, decodebytes
-
-    from requests import post
-
-    with open('/path/to/wav/file/with/speech', 'rb') as fin:
-        input_speech = fin.read()
-
-    input_ascii = encodebytes(input_speech).decode('ascii')
-
-    resp = post('http://127.0.0.1:5000/model', json={"speech_in_encoded": [input_ascii]})
-    text, generated_speech_ascii = resp.json()[0]
-    generated_speech = decodebytes(generated_speech_ascii.encode())
-
-    with open('/path/where/to/save/generated/wav/file', 'wb') as fout:
-        fout.write(generated_speech)
-
-    print(f'Speech transcriptions is: {text}')
-
-.. warning::
-    NeMo library v0.10.0 doesn't allow to infer batches longer than one without compatible NVIDIA GPU.
-
-Models training
----------------
-
-To get your own pre-trained checkpoints for NeMo modules see `Speech recognition <https://nvidia.github.io/NeMo/asr/intro.html>`_
-and `Speech Synthesis <https://nvidia.github.io/NeMo/tts/intro.html>`_ tutorials. Pre-trained models list could be found
-`here <https://github.com/NVIDIA/NeMo/tree/v0.10.0#pre-trained-models>`_.
\ No newline at end of file
diff --git a/docs/index.rst b/docs/index.rst
index d6ca9489b3..2557de9120 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -35,7 +35,6 @@ Welcome to DeepPavlov's documentation!
    Morphological Tagger <features/models/morphotagger>
    Named Entity Recognition <features/models/ner>
    Neural Ranking <features/models/neural_ranking>
-   Speech recognition and synthesis <features/models/nemo>
    Spelling Correction <features/models/spelling_correction>
    Syntactic Parser <features/models/syntaxparser>
    TF-IDF Ranking <features/models/tfidf_ranking>
diff --git a/tests/test_configs/nemo/tts2asr_test.json b/tests/test_configs/nemo/tts2asr_test.json
deleted file mode 100644
index fbc46bd6f0..0000000000
--- a/tests/test_configs/nemo/tts2asr_test.json
+++ /dev/null
@@ -1,49 +0,0 @@
-{
-  "chainer": {
-    "in": ["text"],
-    "pipe": [
-      {
-        "class_name": "nemo_tts",
-        "nemo_params_path": "{TTS_PATH}/tacotron2_waveglow.yaml",
-        "load_path": "{TTS_PATH}",
-        "in": ["text"],
-        "out": ["speech"]
-      },
-      {
-        "class_name": "bytesIO_encode_base64",
-        "in": ["speech"],
-        "out": ["ascii"]
-      },
-      {
-        "class_name": "base64_decode_bytesIO",
-        "in": ["ascii"],
-        "out": ["speech_restored"]
-      },
-      {
-        "class_name": "nemo_asr",
-        "nemo_params_path": "{NEMO_PATH}/quartznet15x5/quartznet15x5.yaml",
-        "load_path": "{NEMO_PATH}/quartznet15x5",
-        "in": ["speech_restored"],
-        "out": ["transcription"]
-      }
-    ],
-    "out": ["transcription"]
-  },
-  "metadata": {
-    "variables": {
-      "ROOT_PATH": "~/.deeppavlov",
-      "NEMO_PATH": "{ROOT_PATH}/models/nemo",
-      "TTS_PATH": "{NEMO_PATH}/tacotron2_waveglow"
-    },
-    "download": [
-      {
-        "url": "http://files.deeppavlov.ai/deeppavlov_data/nemo/tacotron2_waveglow.tar.gz",
-        "subdir": "{NEMO_PATH}"
-      },
-      {
-        "url": "http://files.deeppavlov.ai/deeppavlov_data/nemo/quartznet15x5.tar.gz",
-        "subdir": "{NEMO_PATH}"
-      }
-    ]
-  }
-}
\ No newline at end of file
diff --git a/tests/test_quick_start.py b/tests/test_quick_start.py
index 4f5315bb09..d6be223549 100644
--- a/tests/test_quick_start.py
+++ b/tests/test_quick_start.py
@@ -271,9 +271,6 @@
     "syntax_tagger": {
         ("syntax/syntax_ru_syntagrus_bert.json", "syntax_ru_bert", ('IP', 'TI')): [ONE_ARGUMENT_INFER_CHECK],
         ("syntax/ru_syntagrus_joint_parsing.json", "syntax_ru_bert", ('IP',)): [ONE_ARGUMENT_INFER_CHECK]
-    },
-    "nemo": {
-        ("nemo/tts2asr_test.json", "nemo", ('IP',)): [ONE_ARGUMENT_INFER_CHECK]
     }
 }