diff --git a/CHANGELOG.md b/CHANGELOG.md
index 49a264a7e..20250d672 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,10 +7,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## Unreleased
 ### Added
-- 
-- 
-- 
-- 
+- Add `TS2VecEmbeddingModel` model ([#253](https://github.com/etna-team/etna/pull/253))
+- Add `EmbeddingSegmentTransform` ([#265](https://github.com/etna-team/etna/pull/265))
+- Add `EmbeddingWindowTransform` ([#265](https://github.com/etna-team/etna/pull/265))
+- Add `TSTCCEmbeddingModel` ([#294](https://github.com/etna-team/etna/pull/294))
+- Add `210-embedding_models` example notebook ([#304](https://github.com/etna-team/etna/pull/304))
 - 
 - 
 - 
diff --git a/README.md b/README.md
index 0b92a0f48..e186b74c7 100644
--- a/README.md
+++ b/README.md
@@ -175,27 +175,28 @@ To set up a configuration for your project you should create a `.etna` file at t
 
 We have also prepared a set of tutorials for an easy introduction:
 
-| Notebook                                                                                                                    |                                                                                                                                        Interactive launch |
-|:----------------------------------------------------------------------------------------------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------:|
-| [Get started](https://github.com/etna-team/etna/tree/master/examples/101-get_started.ipynb)                                 |                [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/etna-team/etna/master?filepath=examples/101-get_started.ipynb) |
-| [Backtest](https://github.com/etna-team/etna/tree/master/examples/102-backtest.ipynb)                                       |                   [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/etna-team/etna/master?filepath=examples/102-backtest.ipynb) |
-| [EDA](https://github.com/etna-team/etna/tree/master/examples/103-EDA.ipynb)                                                 |                        [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/etna-team/etna/master?filepath=examples/103-EDA.ipynb) |
-| [Regressors and exogenous data](https://github.com/etna-team/etna/tree/master/examples/201-exogenous_data.ipynb)            |             [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/etna-team/etna/master?filepath=examples/201-exogenous_data.ipynb) |
-| [Deep learning models](https://github.com/etna-team/etna/tree/master/examples/202-NN_examples.ipynb)                        |                [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/etna-team/etna/master?filepath=examples/202-NN_examples.ipynb) |
-| [Ensembles](https://github.com/etna-team/etna/tree/master/examples/303-ensembles.ipynb)                                     |                  [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/etna-team/etna/master?filepath=examples/203-ensembles.ipynb) |
-| [Outliers](https://github.com/etna-team/etna/tree/master/examples/204-outliers.ipynb)                                       |                   [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/etna-team/etna/master?filepath=examples/204-outliers.ipynb) |
-| [AutoML](https://github.com/etna-team/etna/tree/master/examples/205-automl.ipynb)                                           |                     [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/etna-team/etna/master?filepath=examples/205-automl.ipynb) |
-| [Clustering](https://github.com/etna-team/etna/tree/master/examples/206-clustering.ipynb)                                   |                 [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/etna-team/etna/master?filepath=examples/206-clustering.ipynb) |
-| [Feature selection](https://github.com/etna-team/etna/blob/master/examples/207-feature_selection.ipynb)                     |          [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/etna-team/etna/master?filepath=examples/207-feature_selection.ipynb) |
-| [Forecasting strategies](https://github.com/etna-team/etna/tree/master/examples/208-forecasting_strategies.ipynb)           |     [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/etna-team/etna/master?filepath=examples/208-forecasting_strategies.ipynb) |
-| [Mechanics of forecasting](https://github.com/etna-team/etna/blob/master/examples/209-mechanics_of_forecasting.ipynb)       |   [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/etna-team/etna/master?filepath=examples/209-mechanics_of_forecasting.ipynb) |
-| [Custom model and transform](https://github.com/etna-team/etna/tree/master/examples/301-custom_transform_and_model.ipynb)   | [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/etna-team/etna/master?filepath=examples/301-custom_transform_and_model.ipynb) |
-| [Inference: using saved pipeline on a new data](https://github.com/etna-team/etna/tree/master/examples/302-inference.ipynb) |                  [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/etna-team/etna/master?filepath=examples/302-inference.ipynb) |
-| [Hierarchical time series](https://github.com/etna-team/etna/blob/master/examples/303-hierarchical_pipeline.ipynb)          |      [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/etna-team/etna/master?filepath=examples/303-hierarchical_pipeline.ipynb) |
-| [Forecast interpretation](https://github.com/etna-team/etna/tree/master/examples/304-forecasting_interpretation.ipynb)      |    [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/etna-team/etna/master?filepath=examples/304-forecasting_interpretation.ipynb) |
-| [Classification](https://github.com/etna-team/etna/blob/master/examples/305-classification.ipynb)                           |             [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/etna-team/etna/master?filepath=examples/305-classification.ipynb) |
-| [Prediction intervals](https://github.com/etna-team/etna/tree/master/examples/306-prediction_intervals.ipynb)               |       [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/etna-team/etna/master?filepath=examples/306-prediction_intervals.ipynb) |
-| [Working with misaligned data](https://github.com/etna-team/etna/tree/master/examples/307-working_with_misaligned_data.ipynb)       |       [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/etna-team/etna/master?filepath=examples/307-working_with_misaligned_data.ipynb) |
+| Notebook                                                                                                                      |                                                                                                                                          Interactive launch |
+|:------------------------------------------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------:|
+| [Get started](https://github.com/etna-team/etna/tree/master/examples/101-get_started.ipynb)                                   |                  [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/etna-team/etna/master?filepath=examples/101-get_started.ipynb) |
+| [Backtest](https://github.com/etna-team/etna/tree/master/examples/102-backtest.ipynb)                                         |                     [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/etna-team/etna/master?filepath=examples/102-backtest.ipynb) |
+| [EDA](https://github.com/etna-team/etna/tree/master/examples/103-EDA.ipynb)                                                   |                          [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/etna-team/etna/master?filepath=examples/103-EDA.ipynb) |
+| [Regressors and exogenous data](https://github.com/etna-team/etna/tree/master/examples/201-exogenous_data.ipynb)              |               [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/etna-team/etna/master?filepath=examples/201-exogenous_data.ipynb) |
+| [Deep learning models](https://github.com/etna-team/etna/tree/master/examples/202-NN_examples.ipynb)                          |                  [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/etna-team/etna/master?filepath=examples/202-NN_examples.ipynb) |
+| [Ensembles](https://github.com/etna-team/etna/tree/master/examples/303-ensembles.ipynb)                                       |                    [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/etna-team/etna/master?filepath=examples/203-ensembles.ipynb) |
+| [Outliers](https://github.com/etna-team/etna/tree/master/examples/204-outliers.ipynb)                                         |                     [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/etna-team/etna/master?filepath=examples/204-outliers.ipynb) |
+| [AutoML](https://github.com/etna-team/etna/tree/master/examples/205-automl.ipynb)                                             |                       [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/etna-team/etna/master?filepath=examples/205-automl.ipynb) |
+| [Clustering](https://github.com/etna-team/etna/tree/master/examples/206-clustering.ipynb)                                     |                   [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/etna-team/etna/master?filepath=examples/206-clustering.ipynb) |
+| [Feature selection](https://github.com/etna-team/etna/blob/master/examples/207-feature_selection.ipynb)                       |            [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/etna-team/etna/master?filepath=examples/207-feature_selection.ipynb) |
+| [Forecasting strategies](https://github.com/etna-team/etna/tree/master/examples/208-forecasting_strategies.ipynb)             |       [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/etna-team/etna/master?filepath=examples/208-forecasting_strategies.ipynb) |
+| [Mechanics of forecasting](https://github.com/etna-team/etna/blob/master/examples/209-mechanics_of_forecasting.ipynb)         |     [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/etna-team/etna/master?filepath=examples/209-mechanics_of_forecasting.ipynb) |
+| [Embedding models](https://github.com/etna-team/etna/blob/master/examples/210-embedding_models.ipynb)                         |             [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/etna-team/etna/master?filepath=examples/210-embedding_models.ipynb) |
+| [Custom model and transform](https://github.com/etna-team/etna/tree/master/examples/301-custom_transform_and_model.ipynb)     |   [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/etna-team/etna/master?filepath=examples/301-custom_transform_and_model.ipynb) |
+| [Inference: using saved pipeline on a new data](https://github.com/etna-team/etna/tree/master/examples/302-inference.ipynb)   |                    [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/etna-team/etna/master?filepath=examples/302-inference.ipynb) |
+| [Hierarchical time series](https://github.com/etna-team/etna/blob/master/examples/303-hierarchical_pipeline.ipynb)            |        [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/etna-team/etna/master?filepath=examples/303-hierarchical_pipeline.ipynb) |
+| [Forecast interpretation](https://github.com/etna-team/etna/tree/master/examples/304-forecasting_interpretation.ipynb)        |   [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/etna-team/etna/master?filepath=examples/304-forecasting_interpretation.ipynb) |
+| [Classification](https://github.com/etna-team/etna/blob/master/examples/305-classification.ipynb)                             |               [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/etna-team/etna/master?filepath=examples/305-classification.ipynb) |
+| [Prediction intervals](https://github.com/etna-team/etna/tree/master/examples/306-prediction_intervals.ipynb)                 |         [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/etna-team/etna/master?filepath=examples/306-prediction_intervals.ipynb) |
+| [Working with misaligned data](https://github.com/etna-team/etna/tree/master/examples/307-working_with_misaligned_data.ipynb) | [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/etna-team/etna/master?filepath=examples/307-working_with_misaligned_data.ipynb) |
 
 ## Documentation
 
diff --git a/docs/source/api_reference/transforms.rst b/docs/source/api_reference/transforms.rst
index 75a487c7d..2ae54ed23 100644
--- a/docs/source/api_reference/transforms.rst
+++ b/docs/source/api_reference/transforms.rst
@@ -56,6 +56,18 @@ Categorical encoding transforms:
    LabelEncoderTransform
    OneHotEncoderTransform
 
+.. _embeddings:
+Embedding transforms and their utilities:
+
+.. autosummary::
+   :toctree: api/
+   :template: class.rst
+
+   EmbeddingSegmentTransform
+   EmbeddingWindowTransform
+   embeddings.models.TS2VecEmbeddingModel
+   embeddings.models.TSTCCEmbeddingModel
+
 Feature selection transforms:
 
 .. autosummary::
diff --git a/docs/source/tutorials.rst b/docs/source/tutorials.rst
index 80122dba5..aab745ea9 100644
--- a/docs/source/tutorials.rst
+++ b/docs/source/tutorials.rst
@@ -125,6 +125,15 @@ Intermediate
       ^^^
       How pipelines are making forecasts under the hood
 
+   .. grid-item-card:: Embedding models
+      :text-align: center
+      :link: tutorials/210-embedding_models
+      :link-type: doc
+      :class-header: card-tutorial-intermediate
+
+      ^^^
+      How to use embedding models
+
 Advanced
 --------
 
diff --git a/etna/libs/ts2vec/__init__.py b/etna/libs/ts2vec/__init__.py
new file mode 100644
index 000000000..7438a82db
--- /dev/null
+++ b/etna/libs/ts2vec/__init__.py
@@ -0,0 +1 @@
+from etna.libs.ts2vec.ts2vec import TS2Vec
diff --git a/etna/libs/ts2vec/dilated_conv.py b/etna/libs/ts2vec/dilated_conv.py
new file mode 100644
index 000000000..eb09b89bb
--- /dev/null
+++ b/etna/libs/ts2vec/dilated_conv.py
@@ -0,0 +1,83 @@
+"""
+MIT License
+
+Copyright (c) 2022 Zhihan Yue
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+"""
+# Note: Copied from ts2vec repository (https://github.com/yuezhihan/ts2vec/tree/main)
+
+import torch
+from torch import nn
+import torch.nn.functional as F
+import numpy as np
+
+
+class SamePadConv(nn.Module):
+    def __init__(self, in_channels, out_channels, kernel_size, dilation=1, groups=1):
+        super().__init__()
+        self.receptive_field = (kernel_size - 1) * dilation + 1
+        padding = self.receptive_field // 2
+        self.conv = nn.Conv1d(
+            in_channels, out_channels, kernel_size,
+            padding=padding,
+            dilation=dilation,
+            groups=groups
+        )
+        self.remove = 1 if self.receptive_field % 2 == 0 else 0
+
+    def forward(self, x):
+        out = self.conv(x)
+        if self.remove > 0:
+            out = out[:, :, : -self.remove]
+        return out
+
+
+class ConvBlock(nn.Module):
+    def __init__(self, in_channels, out_channels, kernel_size, dilation, final=False):
+        super().__init__()
+        self.conv1 = SamePadConv(in_channels, out_channels, kernel_size, dilation=dilation)
+        self.conv2 = SamePadConv(out_channels, out_channels, kernel_size, dilation=dilation)
+        self.projector = nn.Conv1d(in_channels, out_channels, 1) if in_channels != out_channels or final else None
+
+    def forward(self, x):
+        residual = x if self.projector is None else self.projector(x)
+        x = F.gelu(x)
+        x = self.conv1(x)
+        x = F.gelu(x)
+        x = self.conv2(x)
+        return x + residual
+
+
+class DilatedConvEncoder(nn.Module):
+    def __init__(self, in_channels, channels, kernel_size):
+        super().__init__()
+        self.net = nn.Sequential(*[
+            ConvBlock(
+                channels[i - 1] if i > 0 else in_channels,
+                channels[i],
+                kernel_size=kernel_size,
+                dilation=2 ** i,
+                final=(i == len(channels) - 1)
+            )
+            for i in range(len(channels))
+        ])
+
+    def forward(self, x):
+        return self.net(x)
diff --git a/etna/libs/ts2vec/encoder.py b/etna/libs/ts2vec/encoder.py
new file mode 100644
index 000000000..983a4c38b
--- /dev/null
+++ b/etna/libs/ts2vec/encoder.py
@@ -0,0 +1,101 @@
+"""
+MIT License
+
+Copyright (c) 2022 Zhihan Yue
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+"""
+# Note: Copied from ts2vec repository (https://github.com/yuezhihan/ts2vec/tree/main)
+
+import torch
+from torch import nn
+import torch.nn.functional as F
+import numpy as np
+from etna.libs.ts2vec.dilated_conv import DilatedConvEncoder
+
+
+def generate_continuous_mask(B, T, n=5, l=0.1):
+    res = torch.full((B, T), True, dtype=torch.bool)
+    if isinstance(n, float):
+        n = int(n * T)
+    n = max(min(n, T // 2), 1)
+
+    if isinstance(l, float):
+        l = int(l * T)
+    l = max(l, 1)
+
+    for i in range(B):
+        for _ in range(n):
+            t = np.random.randint(T - l + 1)
+            res[i, t:t + l] = False
+    return res
+
+
+def generate_binomial_mask(B, T, p=0.5):
+    return torch.from_numpy(np.random.binomial(1, p, size=(B, T))).to(torch.bool)
+
+
+class TSEncoder(nn.Module):
+    def __init__(self, input_dims, output_dims, hidden_dims=64, depth=10, mask_mode='binomial'):
+        super().__init__()
+        self.input_dims = input_dims
+        self.output_dims = output_dims
+        self.hidden_dims = hidden_dims
+        self.mask_mode = mask_mode
+        self.input_fc = nn.Linear(input_dims, hidden_dims)
+        self.feature_extractor = DilatedConvEncoder(
+            hidden_dims,
+            [hidden_dims] * depth + [output_dims],
+            kernel_size=3
+        )
+        self.repr_dropout = nn.Dropout(p=0.1)
+
+    def forward(self, x, mask=None):  # x: B x T x input_dims
+        nan_mask = ~x.isnan().any(axis=-1)
+        x[~nan_mask] = 0
+        x = self.input_fc(x)  # B x T x Ch
+
+        # generate & apply mask
+        if mask is None:
+            if self.training:
+                mask = self.mask_mode
+            else:
+                mask = 'all_true'
+
+        if mask == 'binomial':
+            mask = generate_binomial_mask(x.size(0), x.size(1)).to(x.device)
+        elif mask == 'continuous':
+            mask = generate_continuous_mask(x.size(0), x.size(1)).to(x.device)
+        elif mask == 'all_true':
+            mask = x.new_full((x.size(0), x.size(1)), True, dtype=torch.bool)
+        elif mask == 'all_false':
+            mask = x.new_full((x.size(0), x.size(1)), False, dtype=torch.bool)
+        elif mask == 'mask_last':
+            mask = x.new_full((x.size(0), x.size(1)), True, dtype=torch.bool)
+            mask[:, -1] = False
+
+        mask &= nan_mask
+        x[~mask] = 0
+
+        # conv encoder
+        x = x.transpose(1, 2)  # B x Ch x T
+        x = self.repr_dropout(self.feature_extractor(x))  # B x Co x T
+        x = x.transpose(1, 2)  # B x T x Co
+
+        return x
diff --git a/etna/libs/ts2vec/losses.py b/etna/libs/ts2vec/losses.py
new file mode 100644
index 000000000..bcc0d23f4
--- /dev/null
+++ b/etna/libs/ts2vec/losses.py
@@ -0,0 +1,78 @@
+"""
+MIT License
+
+Copyright (c) 2022 Zhihan Yue
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+"""
+# Note: Copied from ts2vec repository (https://github.com/yuezhihan/ts2vec/tree/main)
+
+import torch
+from torch import nn
+import torch.nn.functional as F
+
+
+def hierarchical_contrastive_loss(z1, z2, alpha=0.5, temporal_unit=0):
+    loss = torch.tensor(0., device=z1.device)
+    d = 0
+    while z1.size(1) > 1:
+        if alpha != 0:
+            loss += alpha * instance_contrastive_loss(z1, z2)
+        if d >= temporal_unit:
+            if 1 - alpha != 0:
+                loss += (1 - alpha) * temporal_contrastive_loss(z1, z2)
+        d += 1
+        z1 = F.max_pool1d(z1.transpose(1, 2), kernel_size=2).transpose(1, 2)
+        z2 = F.max_pool1d(z2.transpose(1, 2), kernel_size=2).transpose(1, 2)
+    if z1.size(1) == 1:
+        if alpha != 0:
+            loss += alpha * instance_contrastive_loss(z1, z2)
+        d += 1
+    return loss / d
+
+
+def instance_contrastive_loss(z1, z2):
+    B, T = z1.size(0), z1.size(1)
+    if B == 1:
+        return z1.new_tensor(0.)
+    z = torch.cat([z1, z2], dim=0)  # 2B x T x C
+    z = z.transpose(0, 1)  # T x 2B x C
+    sim = torch.matmul(z, z.transpose(1, 2))  # T x 2B x 2B
+    logits = torch.tril(sim, diagonal=-1)[:, :, :-1]  # T x 2B x (2B-1)
+    logits += torch.triu(sim, diagonal=1)[:, :, 1:]
+    logits = -F.log_softmax(logits, dim=-1)
+
+    i = torch.arange(B, device=z1.device)
+    loss = (logits[:, i, B + i - 1].mean() + logits[:, B + i, i].mean()) / 2
+    return loss
+
+
+def temporal_contrastive_loss(z1, z2):
+    B, T = z1.size(0), z1.size(1)
+    if T == 1:
+        return z1.new_tensor(0.)
+    z = torch.cat([z1, z2], dim=1)  # B x 2T x C
+    sim = torch.matmul(z, z.transpose(1, 2))  # B x 2T x 2T
+    logits = torch.tril(sim, diagonal=-1)[:, :, :-1]  # B x 2T x (2T-1)
+    logits += torch.triu(sim, diagonal=1)[:, :, 1:]
+    logits = -F.log_softmax(logits, dim=-1)
+
+    t = torch.arange(T, device=z1.device)
+    loss = (logits[:, t, T + t - 1].mean() + logits[:, T + t, t].mean()) / 2
+    return loss
diff --git a/etna/libs/ts2vec/ts2vec.py b/etna/libs/ts2vec/ts2vec.py
new file mode 100644
index 000000000..0a222efef
--- /dev/null
+++ b/etna/libs/ts2vec/ts2vec.py
@@ -0,0 +1,337 @@
+"""
+MIT License
+
+Copyright (c) 2022 Zhihan Yue
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+"""
+# Note: Copied from ts2vec repository (https://github.com/yuezhihan/ts2vec/tree/main)
+# Removed skipping training loop when model is already pretrained. Removed "multiscale" encode option.
+# Move lr parameter to fit method
+
+import torch
+import torch.nn.functional as F
+from torch.utils.data import TensorDataset, DataLoader
+import numpy as np
+from etna.libs.ts2vec.encoder import TSEncoder
+from etna.loggers import tslogger, ConsoleLogger
+from etna.libs.ts2vec.losses import  hierarchical_contrastive_loss
+from etna.libs.ts2vec.utils import take_per_row, split_with_nan, centerize_vary_length_series, torch_pad_nan, AveragedModel
+import math
+
+
+class TS2Vec:
+    '''The TS2Vec model'''
+
+    def __init__(
+            self,
+            input_dims,
+            output_dims=320,
+            hidden_dims=64,
+            depth=10,
+            device='cuda',
+            batch_size=16,
+            max_train_length=None,
+            temporal_unit=0,
+            after_iter_callback=None,
+            after_epoch_callback=None
+    ):
+        ''' Initialize a TS2Vec model.
+
+        Args:
+            input_dims (int): The input dimension. For a univariate time series, this should be set to 1.
+            output_dims (int): The representation dimension.
+            hidden_dims (int): The hidden dimension of the encoder.
+            depth (int): The number of hidden residual blocks in the encoder.
+            device (str): The gpu used for training and inference.
+            batch_size (int): The batch size.
+            max_train_length (Union[int, NoneType]): The maximum allowed sequence length for training. For sequence with a length greater than <max_train_length>, it would be cropped into some sequences, each of which has a length less than <max_train_length>.
+            temporal_unit (int): The minimum unit to perform temporal contrast. When training on a very long sequence, this param helps to reduce the cost of time and memory.
+            after_iter_callback (Union[Callable, NoneType]): A callback function that would be called after each iteration.
+            after_epoch_callback (Union[Callable, NoneType]): A callback function that would be called after each epoch.
+        '''
+
+        super().__init__()
+        self.device = device
+        self.batch_size = batch_size
+        self.max_train_length = max_train_length
+        self.temporal_unit = temporal_unit
+
+        self._net = TSEncoder(input_dims=input_dims, output_dims=output_dims, hidden_dims=hidden_dims, depth=depth).to(
+            self.device)
+        self.net = AveragedModel(self._net)
+        self.net.update_parameters(self._net)
+
+        self.after_iter_callback = after_iter_callback
+        self.after_epoch_callback = after_epoch_callback
+
+        self.n_epochs = 0
+        self.n_iters = 0
+
+    def fit(self, train_data, lr=0.001, n_epochs=None, n_iters=None, verbose=False):
+        ''' Training the TS2Vec model.
+
+        Args:
+            train_data (numpy.ndarray): The training data. It should have a shape of (n_instance, n_timestamps, n_features). All missing data should be set to NaN.
+            lr (float): The learning rate.
+            n_epochs (Union[int, NoneType]): The number of epochs. When this reaches, the training stops.
+            n_iters (Union[int, NoneType]): The number of iterations. When this reaches, the training stops. If both n_epochs and n_iters are not specified, a default setting would be used that sets n_iters to 200 for a dataset with size <= 100000, 600 otherwise.
+            verbose (bool): Whether to print the training loss after each epoch.
+
+        Returns:
+            loss_log: a list containing the training losses on each epoch.
+        '''
+
+        assert train_data.ndim == 3
+
+        if n_iters is None and n_epochs is None:
+            n_iters = 200 if train_data.size <= 100000 else 600  # default param for n_iters
+
+        if self.max_train_length is not None:
+            sections = train_data.shape[1] // self.max_train_length
+            if sections >= 2:
+                train_data = np.concatenate(split_with_nan(train_data, sections, axis=1), axis=0)
+
+        temporal_missing = np.isnan(train_data).all(axis=-1).any(axis=0)
+        if temporal_missing[0] or temporal_missing[-1]:
+            train_data = centerize_vary_length_series(train_data)
+
+        train_data = train_data[~np.isnan(train_data).all(axis=2).all(axis=1)]
+
+        train_dataset = TensorDataset(torch.from_numpy(train_data).to(torch.float))
+        train_loader = DataLoader(train_dataset, batch_size=min(self.batch_size, len(train_dataset)), shuffle=True,
+                                  drop_last=True)
+
+        optimizer = torch.optim.AdamW(self._net.parameters(), lr=lr)
+
+        loss_log = []
+
+        cur_epoch = 0
+        cur_iter = 0
+        while True:
+            if n_epochs is not None and cur_epoch >= n_epochs:
+                break
+
+            cum_loss = 0
+            n_epoch_iters = 0
+
+            interrupted = False
+            for batch in train_loader:
+                if n_iters is not None and cur_iter >= n_iters:
+                    interrupted = True
+                    break
+
+                x = batch[0]
+                if self.max_train_length is not None and x.size(1) > self.max_train_length:
+                    window_offset = np.random.randint(x.size(1) - self.max_train_length + 1)
+                    x = x[:, window_offset: window_offset + self.max_train_length]
+                x = x.to(self.device)
+
+                ts_l = x.size(1)
+                crop_l = np.random.randint(low=2 ** (self.temporal_unit + 1), high=ts_l + 1)
+                crop_left = np.random.randint(ts_l - crop_l + 1)
+                crop_right = crop_left + crop_l
+                crop_eleft = np.random.randint(crop_left + 1)
+                crop_eright = np.random.randint(low=crop_right, high=ts_l + 1)
+                crop_offset = np.random.randint(low=-crop_eleft, high=ts_l - crop_eright + 1, size=x.size(0))
+
+                optimizer.zero_grad()
+
+                out1 = self._net(take_per_row(x, crop_offset + crop_eleft, crop_right - crop_eleft))
+                out1 = out1[:, -crop_l:]
+
+                out2 = self._net(take_per_row(x, crop_offset + crop_left, crop_eright - crop_left))
+                out2 = out2[:, :crop_l]
+
+                loss = hierarchical_contrastive_loss(
+                    out1,
+                    out2,
+                    temporal_unit=self.temporal_unit
+                )
+
+                loss.backward()
+                optimizer.step()
+                self.net.update_parameters(self._net)
+
+                cum_loss += loss.item()
+                n_epoch_iters += 1
+
+                cur_iter += 1
+
+                if self.after_iter_callback is not None:
+                    self.after_iter_callback(self, loss.item())
+
+            if interrupted:
+                break
+
+            cum_loss /= n_epoch_iters
+            loss_log.append(cum_loss)
+            if verbose:
+                tslogger.log(f"Epoch {cur_epoch}: loss={cum_loss:.4f}")
+            cur_epoch += 1
+
+            if self.after_epoch_callback is not None:
+                self.after_epoch_callback(self, cum_loss)
+
+        return loss_log
+
+    def _eval_with_pooling(self, x, mask=None, slicing=None, encoding_window=None):
+        out = self.net(x.to(self.device, non_blocking=True), mask)
+        if encoding_window == 'full_series':
+            if slicing is not None:
+                out = out[:, slicing]
+            out = F.max_pool1d(
+                out.transpose(1, 2),
+                kernel_size=out.size(1),
+            ).transpose(1, 2)
+
+        elif isinstance(encoding_window, int):
+            out = F.max_pool1d(
+                out.transpose(1, 2),
+                kernel_size=encoding_window,
+                stride=1,
+                padding=encoding_window // 2
+            ).transpose(1, 2)
+            if encoding_window % 2 == 0:
+                out = out[:, :-1]
+            if slicing is not None:
+                out = out[:, slicing]
+
+        else:
+            if slicing is not None:
+                out = out[:, slicing]
+
+        return out.cpu()
+
+    def encode(self, data, mask=None, encoding_window=None, causal=False, sliding_length=None, sliding_padding=0,
+               batch_size=None):
+        ''' Compute representations using the model.
+
+        Args:
+            data (numpy.ndarray): This should have a shape of (n_instance, n_timestamps, n_features). All missing data should be set to NaN.
+            mask (str): The mask used by encoder can be specified with this parameter. This can be set to 'binomial', 'continuous', 'all_true', 'all_false' or 'mask_last'.
+            encoding_window (Union[str, int]): When this param is specified, the computed representation would the max pooling over this window. This can be set to 'full_series' or an integer specifying the pooling kernel size.
+            causal (bool): When this param is set to True, the future information would not be encoded into representation of each timestamp.
+            sliding_length (Union[int, NoneType]): The length of sliding window. When this param is specified, a sliding inference would be applied on the time series.
+            sliding_padding (int): This param specifies the contextual data length used for inference every sliding windows.
+            batch_size (Union[int, NoneType]): The batch size used for inference. If not specified, this would be the same batch size as training.
+
+        Returns:
+            repr: The representations for data.
+        '''
+        assert self.net is not None, 'please train or load a net first'
+        assert data.ndim == 3
+        if batch_size is None:
+            batch_size = self.batch_size
+        n_samples, ts_l, _ = data.shape
+
+        org_training = self.net.training
+        self.net.eval()
+
+        dataset = TensorDataset(torch.from_numpy(data).to(torch.float))
+        loader = DataLoader(dataset, batch_size=batch_size)
+
+        with torch.no_grad():
+            output = []
+            for batch in loader:
+                x = batch[0]
+                if sliding_length is not None:
+                    reprs = []
+                    if n_samples < batch_size:
+                        calc_buffer = []
+                        calc_buffer_l = 0
+                    for i in range(0, ts_l, sliding_length):
+                        l = i - sliding_padding
+                        r = i + sliding_length + (sliding_padding if not causal else 0)
+                        x_sliding = torch_pad_nan(
+                            x[:, max(l, 0): min(r, ts_l)],
+                            left=-l if l < 0 else 0,
+                            right=r - ts_l if r > ts_l else 0,
+                            dim=1
+                        )
+                        if n_samples < batch_size:
+                            if calc_buffer_l + n_samples > batch_size:
+                                out = self._eval_with_pooling(
+                                    torch.cat(calc_buffer, dim=0),
+                                    mask,
+                                    slicing=slice(sliding_padding, sliding_padding + sliding_length),
+                                    encoding_window=encoding_window
+                                )
+                                reprs += torch.split(out, n_samples)
+                                calc_buffer = []
+                                calc_buffer_l = 0
+                            calc_buffer.append(x_sliding)
+                            calc_buffer_l += n_samples
+                        else:
+                            out = self._eval_with_pooling(
+                                x_sliding,
+                                mask,
+                                slicing=slice(sliding_padding, sliding_padding + sliding_length),
+                                encoding_window=encoding_window
+                            )
+                            reprs.append(out)
+
+                    if n_samples < batch_size:
+                        if calc_buffer_l > 0:
+                            out = self._eval_with_pooling(
+                                torch.cat(calc_buffer, dim=0),
+                                mask,
+                                slicing=slice(sliding_padding, sliding_padding + sliding_length),
+                                encoding_window=encoding_window
+                            )
+                            reprs += torch.split(out, n_samples)
+                            calc_buffer = []
+                            calc_buffer_l = 0
+
+                    out = torch.cat(reprs, dim=1)
+                    if encoding_window == 'full_series':
+                        out = F.max_pool1d(
+                            out.transpose(1, 2).contiguous(),
+                            kernel_size=out.size(1),
+                        ).squeeze(1)
+                else:
+                    out = self._eval_with_pooling(x, mask, encoding_window=encoding_window)
+                    if encoding_window == 'full_series':
+                        out = out.squeeze(1)
+
+                output.append(out)
+
+            output = torch.cat(output, dim=0)
+
+        self.net.train(org_training)
+        if encoding_window == 'full_series':
+            return output.numpy()
+        return output.numpy()[:, :ts_l, :]
+
+    def save(self, fn):
+        ''' Save the model to a file.
+
+        Args:
+            fn (str): filename.
+        '''
+        torch.save(self.net.state_dict(), fn)
+
+    def load(self, fn):
+        ''' Load the model from a file.
+
+        Args:
+            fn (str): filename.
+        '''
+        state_dict = torch.load(fn, map_location=self.device)
+        self.net.load_state_dict(state_dict)
diff --git a/etna/libs/ts2vec/utils.py b/etna/libs/ts2vec/utils.py
new file mode 100644
index 000000000..578fabd78
--- /dev/null
+++ b/etna/libs/ts2vec/utils.py
@@ -0,0 +1,100 @@
+"""
+MIT License
+
+Copyright (c) 2022 Zhihan Yue
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+"""
+# Note: Copied from ts2vec repository (https://github.com/yuezhihan/ts2vec/tree/main)
+
+import numpy as np
+import pickle
+import torch
+import random
+from datetime import datetime
+from copy import deepcopy
+
+
+def torch_pad_nan(arr, left=0, right=0, dim=0):
+    if left > 0:
+        padshape = list(arr.shape)
+        padshape[dim] = left
+        arr = torch.cat((torch.full(padshape, np.nan), arr), dim=dim)
+    if right > 0:
+        padshape = list(arr.shape)
+        padshape[dim] = right
+        arr = torch.cat((arr, torch.full(padshape, np.nan)), dim=dim)
+    return arr
+
+
+def pad_nan_to_target(array, target_length, axis=0, both_side=False):
+    assert array.dtype in [np.float16, np.float32, np.float64]
+    pad_size = target_length - array.shape[axis]
+    if pad_size <= 0:
+        return array
+    npad = [(0, 0)] * array.ndim
+    if both_side:
+        npad[axis] = (pad_size // 2, pad_size - pad_size // 2)
+    else:
+        npad[axis] = (0, pad_size)
+    return np.pad(array, pad_width=npad, mode='constant', constant_values=np.nan)
+
+
+def split_with_nan(x, sections, axis=0):
+    assert x.dtype in [np.float16, np.float32, np.float64]
+    arrs = np.array_split(x, sections, axis=axis)
+    target_length = arrs[0].shape[axis]
+    for i in range(len(arrs)):
+        arrs[i] = pad_nan_to_target(arrs[i], target_length, axis=axis)
+    return arrs
+
+
+def take_per_row(A, indx, num_elem):
+    all_indx = indx[:, None] + np.arange(num_elem)
+    return A[torch.arange(all_indx.shape[0])[:, None], all_indx]
+
+
+def centerize_vary_length_series(x):
+    prefix_zeros = np.argmax(~np.isnan(x).all(axis=-1), axis=1)
+    suffix_zeros = np.argmax(~np.isnan(x[:, ::-1]).all(axis=-1), axis=1)
+    offset = (prefix_zeros + suffix_zeros) // 2 - prefix_zeros
+    rows, column_indices = np.ogrid[:x.shape[0], :x.shape[1]]
+    offset[offset < 0] += x.shape[1]
+    column_indices = column_indices - offset[:, np.newaxis]
+    return x[rows, column_indices]
+
+
+class AveragedModel(torch.optim.swa_utils.AveragedModel):
+
+    def __init__(self, model, device=None, avg_fn=None, use_buffers=False):
+        super(torch.optim.swa_utils.AveragedModel, self).__init__()
+        self.module = deepcopy(model)
+        if device is not None:
+            self.module = self.module.to(device)
+        self.register_buffer('n_averaged',
+                             torch.tensor(0, dtype=torch.long, device=device))
+        if avg_fn is None:
+            avg_fn = AveragedModel.avg_fn_impl
+        self.avg_fn = avg_fn
+        self.use_buffers = use_buffers
+
+    @staticmethod
+    def avg_fn_impl(averaged_model_parameter, model_parameter, num_averaged):
+        return averaged_model_parameter + \
+           (model_parameter - averaged_model_parameter) / (num_averaged + 1)
diff --git a/etna/libs/tstcc/__init__.py b/etna/libs/tstcc/__init__.py
new file mode 100644
index 000000000..b6544e1ab
--- /dev/null
+++ b/etna/libs/tstcc/__init__.py
@@ -0,0 +1 @@
+from etna.libs.tstcc.tstcc import TSTCC
diff --git a/etna/libs/tstcc/attention.py b/etna/libs/tstcc/attention.py
new file mode 100644
index 000000000..e27d0163e
--- /dev/null
+++ b/etna/libs/tstcc/attention.py
@@ -0,0 +1,133 @@
+"""
+MIT License
+
+Copyright (c) 2022 Emadeldeen Eldele
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+"""
+# Note: Copied from ts-tcc repository (https://github.com/emadeldeen24/TS-TCC/tree/main)
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from einops import rearrange, repeat
+
+
+class Residual(nn.Module):
+    def __init__(self, fn):
+        super().__init__()
+        self.fn = fn
+
+    def forward(self, x, **kwargs):
+        return self.fn(x, **kwargs) + x
+
+
+class PreNorm(nn.Module):
+    def __init__(self, dim, fn):
+        super().__init__()
+        self.norm = nn.LayerNorm(dim)
+        self.fn = fn
+
+    def forward(self, x, **kwargs):
+        return self.fn(self.norm(x), **kwargs)
+
+
+class FeedForward(nn.Module):
+    def __init__(self, dim, hidden_dim, dropout=0.):
+        super().__init__()
+        self.net = nn.Sequential(
+            nn.Linear(dim, hidden_dim),
+            nn.ReLU(),
+            nn.Dropout(dropout),
+            nn.Linear(hidden_dim, dim),
+            nn.Dropout(dropout)
+        )
+
+    def forward(self, x):
+        return self.net(x)
+
+
+class Attention(nn.Module):
+    def __init__(self, dim, heads=8, dropout=0.):
+        super().__init__()
+        self.heads = heads
+        self.scale = dim ** -0.5
+
+        self.to_qkv = nn.Linear(dim, dim * 3, bias=False)
+        self.to_out = nn.Sequential(
+            nn.Linear(dim, dim),
+            nn.Dropout(dropout)
+        )
+
+    def forward(self, x, mask=None):
+        b, n, _, h = x.shape[0], x.shape[1], x.shape[2], self.heads
+        qkv = self.to_qkv(x).chunk(3, dim=-1)
+        q, k, v = map(lambda t: rearrange(t, 'b n (h d) -> b h n d', h=h), qkv)
+
+        dots = torch.einsum('bhid,bhjd->bhij', q, k) * self.scale
+
+        if mask is not None:
+            mask = F.pad(mask.flatten(1), (1, 0), value=True)
+            assert mask.shape[-1] == dots.shape[-1], 'mask has incorrect dimensions'
+            mask = mask[:, None, :] * mask[:, :, None]
+            dots.masked_fill_(~mask, float('-inf'))
+            del mask
+
+        attn = dots.softmax(dim=-1)
+
+        out = torch.einsum('bhij,bhjd->bhid', attn, v)
+        out = rearrange(out, 'b h n d -> b n (h d)')
+        out = self.to_out(out)
+        return out
+
+
+class Transformer(nn.Module):
+    def __init__(self, dim, depth, heads, mlp_dim, dropout):
+        super().__init__()
+        self.layers = nn.ModuleList([])
+        for _ in range(depth):
+            self.layers.append(nn.ModuleList([
+                Residual(PreNorm(dim, Attention(dim, heads=heads, dropout=dropout))),
+                Residual(PreNorm(dim, FeedForward(dim, mlp_dim, dropout=dropout)))
+            ]))
+
+    def forward(self, x, mask=None):
+        for attn, ff in self.layers:
+            x = attn(x, mask=mask)
+            x = ff(x)
+        return x
+
+
+class Seq_Transformer(nn.Module):
+    def __init__(self, *, patch_size, dim, depth, heads, mlp_dim, channels=1, dropout=0.1):
+        super().__init__()
+        patch_dim = channels * patch_size
+        self.patch_to_embedding = nn.Linear(patch_dim, dim)
+        self.c_token = nn.Parameter(torch.randn(1, 1, dim))
+        self.transformer = Transformer(dim, depth, heads, mlp_dim, dropout)
+        self.to_c_token = nn.Identity()
+
+    def forward(self, forward_seq):
+        x = self.patch_to_embedding(forward_seq)
+        b, n, _ = x.shape
+        c_tokens = repeat(self.c_token, '() n d -> b n d', b=b)
+        x = torch.cat((c_tokens, x), dim=1)
+        x = self.transformer(x)
+        c_t = self.to_c_token(x[:, 0])
+        return c_t
diff --git a/etna/libs/tstcc/augmentations.py b/etna/libs/tstcc/augmentations.py
new file mode 100644
index 000000000..de7de2840
--- /dev/null
+++ b/etna/libs/tstcc/augmentations.py
@@ -0,0 +1,73 @@
+"""
+MIT License
+
+Copyright (c) 2022 Emadeldeen Eldele
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+"""
+# Note: Copied from ts-tcc repository (https://github.com/emadeldeen24/TS-TCC/tree/main)
+# Fix numpy warning in `permutation` function
+
+import numpy as np
+import torch
+
+
+def DataTransform(sample, jitter_scale_ratio, max_seg, jitter_ratio):
+
+    weak_aug = torch.from_numpy(scaling(sample, jitter_scale_ratio))
+    strong_aug = jitter(permutation(sample, max_segments=max_seg), jitter_ratio)
+
+    return weak_aug, strong_aug
+
+
+def jitter(x, sigma=0.8):
+    # https://arxiv.org/pdf/1706.00527.pdf
+    return x + np.random.normal(loc=0., scale=sigma, size=x.shape)
+
+
+def scaling(x, sigma=1.1):
+    # https://arxiv.org/pdf/1706.00527.pdf
+    factor = np.random.normal(loc=2., scale=sigma, size=(x.shape[0], x.shape[2]))
+    ai = []
+    for i in range(x.shape[1]):
+        xi = x[:, i, :]
+        ai.append(np.multiply(xi, factor[:, :])[:, np.newaxis, :])
+    return np.concatenate((ai), axis=1)
+
+
+def permutation(x, max_segments=5, seg_mode="random"):
+    orig_steps = np.arange(x.shape[2])
+
+    num_segs = np.random.randint(1, max_segments, size=(x.shape[0]))
+
+    ret = np.zeros_like(x)
+    for i, pat in enumerate(x):
+        if num_segs[i] > 1:
+            if seg_mode == "random":
+                split_points = np.random.choice(x.shape[2] - 2, num_segs[i] - 1, replace=False)
+                split_points.sort()
+                splits = np.split(orig_steps, split_points)
+            else:
+                splits = np.array_split(orig_steps, num_segs[i])
+            # add `np.asarray(splits, dtype=object)` instead of `splits` due to warning about different length of arrays
+            warp = np.concatenate(np.random.permutation(np.asarray(splits, dtype=object))).ravel().astype(float)
+            ret[i] = pat[0, warp]
+        else:
+            ret[i] = pat
+    return torch.from_numpy(ret)
diff --git a/etna/libs/tstcc/dataloader.py b/etna/libs/tstcc/dataloader.py
new file mode 100644
index 000000000..a8dd55f34
--- /dev/null
+++ b/etna/libs/tstcc/dataloader.py
@@ -0,0 +1,74 @@
+"""
+MIT License
+
+Copyright (c) 2022 Emadeldeen Eldele
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+"""
+# Note: Copied from ts-tcc repository (https://github.com/emadeldeen24/TS-TCC/tree/main)
+
+import torch
+from torch.utils.data import Dataset
+
+from etna.libs.tstcc.augmentations import DataTransform
+
+
+class Load_Dataset(Dataset):
+    # Initialize your data, download, etc.
+    def __init__(
+            self,
+            dataset,
+            mode,
+            jitter_scale_ratio,
+            max_seg,
+            jitter_ratio
+    ):
+        """
+        Notes
+        -----
+        In this implementation we replace NaNs with 0 values to work with time-series with different length.
+        """
+        super(Load_Dataset, self).__init__()
+        self.mode = mode
+        self.jitter_scale_ratio = jitter_scale_ratio
+        self.max_seg = max_seg
+        self.jitter_ratio = jitter_ratio
+
+        X_train = torch.from_numpy(dataset)
+        X_train = torch.nan_to_num(X_train, nan=0)
+
+        self.x_data = X_train
+        self.len = X_train.shape[0]
+        if self.mode == "train":
+            aug1, aug2 = DataTransform(
+                self.x_data,
+                jitter_scale_ratio=self.jitter_scale_ratio,
+                max_seg=self.max_seg,
+                jitter_ratio=self.jitter_ratio
+            )
+            self.aug1, self.aug2 = aug1.float(), aug2.float()
+
+    def __getitem__(self, index):
+        if self.mode == "train":
+            return self.aug1[index], self.aug2[index]
+        else:
+            return self.x_data[index].float()
+
+    def __len__(self):
+        return self.len
diff --git a/etna/libs/tstcc/encoder.py b/etna/libs/tstcc/encoder.py
new file mode 100644
index 000000000..6e90feade
--- /dev/null
+++ b/etna/libs/tstcc/encoder.py
@@ -0,0 +1,74 @@
+"""
+MIT License
+
+Copyright (c) 2022 Emadeldeen Eldele
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+"""
+# Note: Copied from ts-tcc repository (https://github.com/emadeldeen24/TS-TCC/tree/main)
+
+# In the original implementation, the name of this file is "model.py".
+# Added ignoring warning about even kernel lengths and odd dilation in nn.Conv1d blocks.
+import warnings
+
+from torch import nn
+
+
+class ConvEncoder(nn.Module):
+    def __init__(
+            self,
+            input_dims,
+            kernel_size,
+            dropout,
+            output_dims
+    ):
+        super(ConvEncoder, self).__init__()
+
+        self.input_dims = input_dims
+        self.kernel_size = kernel_size
+        self.dropout = dropout
+        self.output_dims = output_dims
+
+        self.conv_block1 = nn.Sequential(
+            nn.Conv1d(self.input_dims, 32, kernel_size=self.kernel_size,
+                      stride=1, bias=False, padding="same"),
+            nn.BatchNorm1d(32),
+            nn.ReLU(),
+            nn.Dropout(dropout)
+        )
+
+        self.conv_block2 = nn.Sequential(
+            nn.Conv1d(32, 64, kernel_size=8, stride=1, bias=False, padding="same"),
+            nn.BatchNorm1d(64),
+            nn.ReLU(),
+        )
+
+        self.conv_block3 = nn.Sequential(
+            nn.Conv1d(64, output_dims, kernel_size=8, stride=1, bias=False, padding="same"),
+            nn.BatchNorm1d(output_dims),
+            nn.ReLU(),
+        )
+
+    def forward(self, x_in):
+        with warnings.catch_warnings():
+            warnings.simplefilter("ignore")
+            x = self.conv_block1(x_in)
+            x = self.conv_block2(x)
+            x = self.conv_block3(x)
+        return x
diff --git a/etna/libs/tstcc/loss.py b/etna/libs/tstcc/loss.py
new file mode 100644
index 000000000..66d0a2cd8
--- /dev/null
+++ b/etna/libs/tstcc/loss.py
@@ -0,0 +1,90 @@
+"""
+MIT License
+
+Copyright (c) 2022 Emadeldeen Eldele
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+"""
+# Note: Copied from ts-tcc repository (https://github.com/emadeldeen24/TS-TCC/tree/main)
+
+import torch
+import numpy as np
+
+
+class NTXentLoss(torch.nn.Module):
+
+    def __init__(self, device, batch_size, temperature, use_cosine_similarity):
+        super(NTXentLoss, self).__init__()
+        self.batch_size = batch_size
+        self.temperature = temperature
+        self.device = device
+        self.softmax = torch.nn.Softmax(dim=-1)
+        self.mask_samples_from_same_repr = self._get_correlated_mask().type(torch.bool)
+        self.similarity_function = self._get_similarity_function(use_cosine_similarity)
+        self.criterion = torch.nn.CrossEntropyLoss(reduction="sum")
+
+    def _get_similarity_function(self, use_cosine_similarity):
+        if use_cosine_similarity:
+            self._cosine_similarity = torch.nn.CosineSimilarity(dim=-1)
+            return self._cosine_simililarity
+        else:
+            return self._dot_simililarity
+
+    def _get_correlated_mask(self):
+        diag = np.eye(2 * self.batch_size)
+        l1 = np.eye((2 * self.batch_size), 2 * self.batch_size, k=-self.batch_size)
+        l2 = np.eye((2 * self.batch_size), 2 * self.batch_size, k=self.batch_size)
+        mask = torch.from_numpy((diag + l1 + l2))
+        mask = (1 - mask).type(torch.bool)
+        return mask.to(self.device)
+
+    @staticmethod
+    def _dot_simililarity(x, y):
+        v = torch.tensordot(x.unsqueeze(1), y.T.unsqueeze(0), dims=2)
+        # x shape: (N, 1, C)
+        # y shape: (1, C, 2N)
+        # v shape: (N, 2N)
+        return v
+
+    def _cosine_simililarity(self, x, y):
+        # x shape: (N, 1, C)
+        # y shape: (1, 2N, C)
+        # v shape: (N, 2N)
+        v = self._cosine_similarity(x.unsqueeze(1), y.unsqueeze(0))
+        return v
+
+    def forward(self, zis, zjs):
+        representations = torch.cat([zjs, zis], dim=0)
+
+        similarity_matrix = self.similarity_function(representations, representations)
+
+        # filter out the scores from the positive samples
+        l_pos = torch.diag(similarity_matrix, self.batch_size)
+        r_pos = torch.diag(similarity_matrix, -self.batch_size)
+        positives = torch.cat([l_pos, r_pos]).view(2 * self.batch_size, 1)
+
+        negatives = similarity_matrix[self.mask_samples_from_same_repr].view(2 * self.batch_size, -1)
+
+        logits = torch.cat((positives, negatives), dim=1)
+        logits /= self.temperature
+
+        labels = torch.zeros(2 * self.batch_size).to(self.device).long()
+        loss = self.criterion(logits, labels)
+
+        return loss / (2 * self.batch_size)
diff --git a/etna/libs/tstcc/tc.py b/etna/libs/tstcc/tc.py
new file mode 100644
index 000000000..247308749
--- /dev/null
+++ b/etna/libs/tstcc/tc.py
@@ -0,0 +1,98 @@
+"""
+MIT License
+
+Copyright (c) 2022 Emadeldeen Eldele
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+"""
+# Note: Copied from ts-tcc repository (https://github.com/emadeldeen24/TS-TCC/tree/main)
+# Added ignoring warning about empty linear layer in self.projection_head when input_dims < 4
+
+import warnings
+
+import torch
+import torch.nn as nn
+import numpy as np
+from etna.libs.tstcc.attention import Seq_Transformer
+
+
+class TC(nn.Module):
+    def __init__(
+            self,
+            input_dims,
+            timesteps,
+            hidden_dim,
+            heads,
+            depth,
+            device,
+            n_seq_steps
+    ):
+        super(TC, self).__init__()
+        self.hidden_dim = hidden_dim
+        self.num_channels = input_dims
+        self.timestep = timesteps
+        self.heads = heads
+        self.depth = depth
+        self.Wk = nn.ModuleList([nn.Linear(hidden_dim, self.num_channels) for i in range(self.timestep)])
+        self.lsoftmax = nn.LogSoftmax(dim=1)
+        self.device = device
+        self.n_seq_steps = n_seq_steps
+        with warnings.catch_warnings():
+            warnings.simplefilter("ignore")
+            self.projection_head = nn.Sequential(
+                nn.Linear(hidden_dim, input_dims // 2),
+                nn.BatchNorm1d(input_dims // 2),
+                nn.ReLU(inplace=True),
+                nn.Linear(input_dims // 2, input_dims // 4),
+            )
+
+        self.seq_transformer = Seq_Transformer(patch_size=self.num_channels, dim=self.hidden_dim, depth=self.depth,
+                                               heads=self.heads, mlp_dim=64)
+
+    def forward(self, features_aug1, features_aug2):
+        z_aug1 = features_aug1  # features are (batch_size, #channels, seq_len)
+        seq_len = z_aug1.shape[2]
+        z_aug1 = z_aug1.transpose(1, 2)
+
+        z_aug2 = features_aug2
+        z_aug2 = z_aug2.transpose(1, 2)
+
+        batch = z_aug1.shape[0]
+        t_samples = torch.randint(seq_len - self.timestep, size=(1,)).long().to(
+            self.device)  # randomly pick time stamps
+
+        score = 0  # average over timestep and batch
+        encode_samples = torch.empty((self.timestep, batch, self.num_channels)).float().to(self.device)
+
+        for i in np.arange(1, self.timestep + 1):
+            encode_samples[i - 1] = z_aug2[:, t_samples + i, :].view(batch, self.num_channels)
+
+        forward_seq = z_aug1[:, max(0, t_samples - self.n_seq_steps):t_samples + 1, :]
+
+        c_t = self.seq_transformer(forward_seq)
+
+        pred = torch.empty((self.timestep, batch, self.num_channels)).float().to(self.device)
+        for i in np.arange(0, self.timestep):
+            linear = self.Wk[i]
+            pred[i] = linear(c_t)
+        for i in np.arange(0, self.timestep):
+            total = torch.mm(encode_samples[i], torch.transpose(pred[i], 0, 1))
+            score += torch.sum(torch.diag(self.lsoftmax(total)))
+        score /= -1. * batch * self.timestep
+        return score, self.projection_head(c_t)
diff --git a/etna/libs/tstcc/tstcc.py b/etna/libs/tstcc/tstcc.py
new file mode 100644
index 000000000..aa84ff340
--- /dev/null
+++ b/etna/libs/tstcc/tstcc.py
@@ -0,0 +1,275 @@
+"""
+MIT License
+
+Copyright (c) 2022 Emadeldeen Eldele
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+"""
+# Note: Copied from ts-tcc repository (https://github.com/emadeldeen24/TS-TCC/tree/main)
+# Moved training and encoding parameters from __init__ to fit and encode, respectively
+# Changed input and output data shapes
+
+from etna.libs.tstcc.encoder import ConvEncoder
+from etna.libs.tstcc.tc import TC
+from etna.libs.tstcc.dataloader import Load_Dataset
+from etna.libs.tstcc.loss import NTXentLoss
+from etna.loggers import tslogger
+import numpy as np
+import torch
+import torch.nn.functional as F
+from torch.utils.data import DataLoader
+
+
+class TSTCC:
+    '''TS-TCC model'''
+    def __init__(
+            self,
+            input_dims,
+            encoder_output_dim,
+            kernel_size,
+            dropout,
+            timesteps,
+            hidden_dim,
+            heads,
+            depth,
+            n_seq_steps,
+            jitter_scale_ratio,
+            max_seg,
+            jitter_ratio,
+            use_cosine_similarity,
+            batch_size,
+            device,
+            num_workers,
+    ):
+        """
+        Init TSTCC model
+
+        Parameters
+        ----------
+        input_dims:
+            The input dimension. For a univariate time series, this should be set to 1.
+        encoder_output_dim:
+            The output dimension after encoder.
+        kernel_size:
+            Kernel size of first convolution in encoder.
+        dropout:
+            Dropout rate in first convolution block in encoder.
+        timesteps:
+            The number of timestamps to predict in temporal contrasting model.
+        hidden_dim:
+            The output dimension after temporal_contr_model.
+        heads:
+            Number of heads in attention block in temporal contrasting model. Parameter output_dims must be a multiple
+            of the number of heads.
+        depth:
+            Depth in attention block in temporal contrasting model.
+        n_seq_steps:
+            Max context size in temporal contrasting model.
+        jitter_scale_ratio:
+            Jitter ratio in weak augmentation.
+        max_seg:
+            Number of segments in strong augmentation.
+        jitter_ratio:
+            Jitter ratio in strong augmentation.
+        use_cosine_similarity:
+            If True NTXentLoss uses cosine similarity, if False NTXentLoss uses dot product.
+        batch_size:
+            The batch size.
+        device:
+            The device used for training and inference.
+        num_workers:
+            How many subprocesses to use for data loading.
+        """
+
+        super().__init__()
+
+        self.input_dims = input_dims
+        self.batch_size = batch_size
+
+        self.device = device
+        self.num_workers = num_workers
+
+        self.n_seq_steps = n_seq_steps
+
+        self.model = torch.nn.ModuleDict({
+                "encoder": ConvEncoder(
+                    input_dims=self.input_dims,
+                    kernel_size=kernel_size,
+                    dropout=dropout,
+                    output_dims=encoder_output_dim
+                ),
+                "temporal_contr_model": TC(
+                    input_dims=encoder_output_dim,
+                    timesteps=timesteps,
+                    hidden_dim=hidden_dim,
+                    heads=heads,
+                    depth=depth,
+                    device=self.device,
+                    n_seq_steps=self.n_seq_steps
+                )
+        }).to(device=self.device)
+
+        self.jitter_scale_ratio = jitter_scale_ratio
+        self.max_seg = max_seg
+        self.jitter_ratio = jitter_ratio
+
+        self.use_cosine_similarity = use_cosine_similarity
+
+    def prepare_data(self, data, mode):
+        data = data.transpose(0, 2, 1)
+        dataset = Load_Dataset(
+            dataset=data,
+            mode=mode,
+            jitter_scale_ratio=self.jitter_scale_ratio,
+            max_seg=self.max_seg,
+            jitter_ratio=self.jitter_ratio
+        )
+        if mode == "train":
+            data_loader = DataLoader(
+                dataset=dataset,
+                batch_size=self.batch_size,
+                shuffle=True,
+                drop_last=True,
+                num_workers=self.num_workers
+            )
+        else:
+            data_loader = DataLoader(
+                dataset=dataset,
+                batch_size=self.batch_size,
+                shuffle=False,
+                drop_last=False,
+                num_workers=self.num_workers
+            )
+        return data_loader
+
+    def fit(self, train_data, n_epochs, lr, temperature, lambda1, lambda2, verbose):
+        """
+        Fit model
+
+        Parameters
+        ----------
+        train_data:
+            train data
+        n_epochs:
+            The number of epochs. When this reaches, the training stops.
+        lr:
+            The learning rate.
+        temperature:
+            Temperature in NTXentLoss.
+        lambda1:
+            The relative weight of the first item in the loss (temporal contrasting loss).
+        lambda2:
+            The relative weight of the second item in the loss (contextual contrasting loss).
+        verbose:
+            Whether to print the training loss after each epoch.
+        """
+        train_loader = self.prepare_data(data=train_data, mode="train")
+        model_optimizer = torch.optim.Adam(self.model.parameters(), lr=lr, betas=(0.9, 0.99),
+                                           weight_decay=3e-4)
+        self.model.train()
+        for epoch in range(n_epochs):
+
+            total_loss = []
+            for batch_idx, (aug1, aug2) in enumerate(train_loader):
+                # send to device
+                aug1, aug2 = aug1.to(self.device), aug2.to(self.device)
+
+                # optimizer
+                model_optimizer.zero_grad()
+
+                features1 = self.model.encoder(aug1)
+                features2 = self.model.encoder(aug2)
+
+                # normalize projection feature vectors
+                features1 = F.normalize(features1, dim=1)
+                features2 = F.normalize(features2, dim=1)
+
+                temp_cont_loss1, temp_cont_lstm_feat1 = self.model.temporal_contr_model(features1, features2)
+                temp_cont_loss2, temp_cont_lstm_feat2 = self.model.temporal_contr_model(features2, features1)
+
+                # normalize projection feature vectors
+                zis = temp_cont_lstm_feat1
+                zjs = temp_cont_lstm_feat2
+
+                # compute loss
+                nt_xent_criterion = NTXentLoss(
+                    device=self.device,
+                    batch_size=self.batch_size,
+                    temperature=temperature,
+                    use_cosine_similarity=self.use_cosine_similarity
+                )
+                loss = (temp_cont_loss1 + temp_cont_loss2) * lambda1 + nt_xent_criterion(zis, zjs) * lambda2
+
+                total_loss.append(loss.item())
+                loss.backward()
+                model_optimizer.step()
+
+            train_loss = torch.tensor(total_loss).mean()
+            if verbose:
+                tslogger.log(f"Epoch {epoch}: loss={train_loss:.4f}")
+
+    def encode(self, data, encode_full_series):
+        """
+        Encode data
+
+        Parameters
+        ----------
+        data:
+            data to encode
+        encode_full_series:
+            if True the entire segment will be encoded.
+        """
+        data_loader = self.prepare_data(data=data, mode="encode")
+
+        self.model.eval()
+
+        embeddings = []
+        with torch.no_grad():
+            for data in data_loader:
+                data = data.to(self.device)
+                features = self.model.encoder(data)
+
+                # normalize projection feature vectors
+                features = F.normalize(features, dim=1)
+
+                embeddings.append(features.cpu())
+
+        embeddings = torch.cat(embeddings, dim=0)
+        if encode_full_series:
+            embeddings = F.max_pool1d(embeddings, kernel_size=embeddings.shape[2],).squeeze(2)
+        else:
+            embeddings = embeddings.movedim(1, 2)
+        return embeddings.numpy()
+
+    def save(self, fn):
+        ''' Save the model to a file.
+
+        Args:
+            fn_enc (str): filename
+        '''
+        torch.save(self.model.state_dict(), fn)
+
+    def load(self, fn):
+        ''' Load the model from a file.
+
+        Args:
+            fn_enc (str): filename
+        '''
+        state_dict = torch.load(fn, map_location=self.device)
+        self.model.load_state_dict(state_dict)
diff --git a/etna/transforms/__init__.py b/etna/transforms/__init__.py
index 21f063ce8..9af8b7e78 100644
--- a/etna/transforms/__init__.py
+++ b/etna/transforms/__init__.py
@@ -18,6 +18,8 @@
 from etna.transforms.decomposition import STLTransform
 from etna.transforms.decomposition import TheilSenTrendTransform
 from etna.transforms.decomposition import TrendTransform
+from etna.transforms.embeddings import EmbeddingSegmentTransform
+from etna.transforms.embeddings import EmbeddingWindowTransform
 from etna.transforms.encoders import LabelEncoderTransform
 from etna.transforms.encoders import MeanSegmentEncoderTransform
 from etna.transforms.encoders import OneHotEncoderTransform
diff --git a/etna/transforms/embeddings/__init__.py b/etna/transforms/embeddings/__init__.py
new file mode 100644
index 000000000..e2bcb4c3c
--- /dev/null
+++ b/etna/transforms/embeddings/__init__.py
@@ -0,0 +1,2 @@
+from etna.transforms.embeddings.embedding_segment import EmbeddingSegmentTransform
+from etna.transforms.embeddings.embedding_window import EmbeddingWindowTransform
diff --git a/etna/transforms/embeddings/embedding_segment.py b/etna/transforms/embeddings/embedding_segment.py
new file mode 100644
index 000000000..cebaa0fce
--- /dev/null
+++ b/etna/transforms/embeddings/embedding_segment.py
@@ -0,0 +1,135 @@
+import pathlib
+import tempfile
+import zipfile
+from typing import Any
+from typing import Dict
+from typing import List
+from typing import Optional
+
+import numpy as np
+import pandas as pd
+
+from etna.core import load
+from etna.transforms.base import IrreversibleTransform
+from etna.transforms.embeddings.models import BaseEmbeddingModel
+
+
+class EmbeddingSegmentTransform(IrreversibleTransform):
+    """Create the constant embedding features using embedding model."""
+
+    def __init__(
+        self,
+        in_columns: List[str],
+        embedding_model: BaseEmbeddingModel,
+        encoding_params: Optional[Dict[str, Any]] = None,
+        training_params: Optional[Dict[str, Any]] = None,
+        out_column: str = "embedding_segment",
+    ):
+        """Init EmbeddingSegmentTransform.
+
+        Parameters
+        ----------
+        in_columns:
+            Columns to use for creating embeddings
+        embedding_model:
+            Model to create the embeddings
+        encoding_params:
+            Parameters to use during encoding. Parameters for corresponding models can be found at :ref:`embedding section <embeddings>`.
+        training_params:
+            Parameters to use during training. Parameters for corresponding models can be found at :ref:`embedding section <embeddings>`.
+        out_column:
+            Prefix for output columns, the output columns format is '{out_column}_{i}'
+        """
+        super().__init__(required_features=in_columns)
+        self.in_columns = in_columns
+        self.embedding_model = embedding_model
+        self.encoding_params = encoding_params if encoding_params is not None else {}
+        self.training_params = training_params if training_params is not None else {}
+        self.out_column = out_column
+
+    def _get_out_columns(self) -> List[str]:
+        """Create the output columns names."""
+        return [f"{self.out_column}_{i}" for i in range(self.embedding_model.output_dims)]
+
+    def _prepare_data(self, df: pd.DataFrame) -> np.ndarray:
+        """Reshape data into (n_segments, n_timestamps, input_dims)."""
+        last_timestamp = max(np.where(~df.isna().all(axis=1))[0])
+        df = df[: last_timestamp + 1]
+        n_timestamps = len(df.index)
+        n_segments = df.columns.get_level_values("segment").nunique()
+        x = df.values.reshape((n_timestamps, n_segments, len(self.in_columns))).transpose(1, 0, 2)
+        return x
+
+    def _fit(self, df: pd.DataFrame):
+        """Fit transform."""
+        x = self._prepare_data(df)
+        self.embedding_model.fit(x, **self.training_params)
+
+    def _transform(self, df: pd.DataFrame) -> pd.DataFrame:
+        """Create embedding features."""
+        segments = df.columns.get_level_values("segment").unique()
+        n_timestamps = len(df.index)
+        x = self._prepare_data(df)
+        embeddings = self.embedding_model.encode_segment(x=x, **self.encoding_params)  # (n_segments, output_dim)
+        embeddings = np.repeat(embeddings[np.newaxis, :, :], n_timestamps, axis=0).reshape(
+            n_timestamps, -1
+        )  # (n_timestamps, n_segments * output_dim)
+
+        df_encoded = pd.DataFrame(
+            embeddings, columns=pd.MultiIndex.from_product([segments, self._get_out_columns()]), index=df.index
+        )
+        df = pd.concat([df, df_encoded], axis=1)
+        df = df.sort_index(axis=1)
+        return df
+
+    def get_regressors_info(self) -> List[str]:
+        """Return the list with regressors created by the transform."""
+        return self._get_out_columns()
+
+    def save(self, path: pathlib.Path):
+        """Save the object.
+
+        Parameters
+        ----------
+        path:
+            Path to save object to.
+        """
+        self._save(path=path, skip_attributes=["embedding_model"])
+
+        # Save embedding_model
+        with zipfile.ZipFile(path, "a") as archive:
+            with tempfile.TemporaryDirectory() as _temp_dir:
+                temp_dir = pathlib.Path(_temp_dir)
+
+                model_save_path = temp_dir / "model.zip"
+                self.embedding_model.save(path=model_save_path)
+                archive.write(model_save_path, "model.zip")
+
+    @classmethod
+    def load(cls, path: pathlib.Path) -> "EmbeddingSegmentTransform":
+        """Load an object.
+
+        Parameters
+        ----------
+        path:
+            Path to load object from.
+
+        Returns
+        -------
+        :
+            Loaded object.
+        """
+        # Load transform embedding_model
+        obj: EmbeddingSegmentTransform = super().load(path=path)
+
+        # Load embedding_model
+        with zipfile.ZipFile(path, "r") as archive:
+            with tempfile.TemporaryDirectory() as _temp_dir:
+                temp_dir = pathlib.Path(_temp_dir)
+
+                archive.extractall(temp_dir)
+
+                model_path = temp_dir / "model.zip"
+                obj.embedding_model = load(path=model_path)
+
+        return obj
diff --git a/etna/transforms/embeddings/embedding_window.py b/etna/transforms/embeddings/embedding_window.py
new file mode 100644
index 000000000..8a2bde49d
--- /dev/null
+++ b/etna/transforms/embeddings/embedding_window.py
@@ -0,0 +1,133 @@
+import pathlib
+import tempfile
+import zipfile
+from typing import Any
+from typing import Dict
+from typing import List
+from typing import Optional
+
+import numpy as np
+import pandas as pd
+
+from etna.core import load
+from etna.transforms.base import IrreversibleTransform
+from etna.transforms.embeddings.models import BaseEmbeddingModel
+
+
+class EmbeddingWindowTransform(IrreversibleTransform):
+    """Create the embedding features for each timestamp using embedding model."""
+
+    def __init__(
+        self,
+        in_columns: List[str],
+        embedding_model: BaseEmbeddingModel,
+        encoding_params: Optional[Dict[str, Any]] = None,
+        training_params: Optional[Dict[str, Any]] = None,
+        out_column: str = "embedding_window",
+    ):
+        """Init EmbeddingWindowTransform.
+
+        Parameters
+        ----------
+        in_columns:
+            Columns to use for creating embeddings
+        embedding_model:
+            Model to create the embeddings
+        encoding_params:
+            Parameters to use during encoding. Parameters for corresponding models can be found at :ref:`embedding section <embeddings>`.
+        training_params:
+            Parameters to use during training. Parameters for corresponding models can be found at :ref:`embedding section <embeddings>`.
+        out_column:
+            Prefix for output columns, the output columns format is '{out_column}_{i}'
+        """
+        super().__init__(required_features=in_columns)
+        self.in_columns = in_columns
+        self.embedding_model = embedding_model
+        self.encoding_params = encoding_params if encoding_params is not None else {}
+        self.training_params = training_params if training_params is not None else {}
+        self.out_column = out_column
+
+    def _prepare_data(self, df: pd.DataFrame) -> np.ndarray:
+        """Reshape data into (n_segments, n_timestamps, input_dims)."""
+        n_timestamps = len(df.index)
+        n_segments = df.columns.get_level_values("segment").nunique()
+        x = df.values.reshape((n_timestamps, n_segments, len(self.in_columns))).transpose(1, 0, 2)
+        return x
+
+    def _get_out_columns(self) -> List[str]:
+        """Create the output columns names."""
+        return [f"{self.out_column}_{i}" for i in range(self.embedding_model.output_dims)]
+
+    def _fit(self, df: pd.DataFrame):
+        """Fit transform."""
+        x = self._prepare_data(df)
+        self.embedding_model.fit(x, **self.training_params)
+
+    def _transform(self, df: pd.DataFrame) -> pd.DataFrame:
+        """Create embedding features."""
+        segments = df.columns.get_level_values("segment").unique()
+        n_timestamps = len(df.index)
+        x = self._prepare_data(df)
+        embeddings = self.embedding_model.encode_window(
+            x=x, **self.encoding_params
+        )  # (n_segments, n_timestamps, output_dim)
+        embeddings = embeddings.transpose(1, 0, 2).reshape(n_timestamps, -1)  # (n_timestamps, n_segments * output_dim)
+
+        df_encoded = pd.DataFrame(
+            embeddings, columns=pd.MultiIndex.from_product([segments, self._get_out_columns()]), index=df.index
+        )
+        df = pd.concat([df, df_encoded], axis=1)
+        df = df.sort_index(axis=1)
+        return df
+
+    def get_regressors_info(self) -> List[str]:
+        """Return the list with regressors created by the transform."""
+        return []
+
+    def save(self, path: pathlib.Path):
+        """Save the object.
+
+        Parameters
+        ----------
+        path:
+            Path to save object to.
+        """
+        self._save(path=path, skip_attributes=["embedding_model"])
+
+        # Save embedding_model
+        with zipfile.ZipFile(path, "a") as archive:
+            with tempfile.TemporaryDirectory() as _temp_dir:
+                temp_dir = pathlib.Path(_temp_dir)
+
+                model_save_path = temp_dir / "model.zip"
+                self.embedding_model.save(path=model_save_path)
+                archive.write(model_save_path, "model.zip")
+
+    @classmethod
+    def load(cls, path: pathlib.Path) -> "EmbeddingWindowTransform":
+        """Load an object.
+
+        Parameters
+        ----------
+        path:
+            Path to load object from.
+
+        Returns
+        -------
+        :
+            Loaded object.
+        """
+        # Load transform embedding_model
+        obj: EmbeddingWindowTransform = super().load(path=path)
+
+        # Load embedding_model
+        with zipfile.ZipFile(path, "r") as archive:
+            with tempfile.TemporaryDirectory() as _temp_dir:
+                temp_dir = pathlib.Path(_temp_dir)
+
+                archive.extractall(temp_dir)
+
+                model_path = temp_dir / "model.zip"
+                obj.embedding_model = load(path=model_path)
+
+        return obj
diff --git a/etna/transforms/embeddings/models/__init__.py b/etna/transforms/embeddings/models/__init__.py
new file mode 100644
index 000000000..f7e380218
--- /dev/null
+++ b/etna/transforms/embeddings/models/__init__.py
@@ -0,0 +1,3 @@
+from etna.transforms.embeddings.models.base import BaseEmbeddingModel
+from etna.transforms.embeddings.models.ts2vec import TS2VecEmbeddingModel
+from etna.transforms.embeddings.models.tstcc import TSTCCEmbeddingModel
diff --git a/etna/transforms/embeddings/models/base.py b/etna/transforms/embeddings/models/base.py
new file mode 100644
index 000000000..090c0f75b
--- /dev/null
+++ b/etna/transforms/embeddings/models/base.py
@@ -0,0 +1,36 @@
+from abc import abstractmethod
+
+import numpy as np
+
+from etna.core import BaseMixin
+from etna.core import SaveMixin
+
+
+class BaseEmbeddingModel(BaseMixin, SaveMixin):
+    """Base class for embedding models."""
+
+    def __init__(self, output_dims: int):
+        """Init BaseEmbeddingModel.
+
+        Parameters
+        ----------
+        output_dims:
+            Dimension of the output embeddings
+        """
+        super().__init__()
+        self.output_dims = output_dims
+
+    @abstractmethod
+    def fit(self, x: np.ndarray) -> "BaseEmbeddingModel":
+        """Fit the embedding model."""
+        pass
+
+    @abstractmethod
+    def encode_segment(self, x: np.ndarray) -> np.ndarray:
+        """Create embeddings of the input data."""
+        pass
+
+    @abstractmethod
+    def encode_window(self, x: np.ndarray) -> np.ndarray:
+        """Create embeddings of the input data."""
+        pass
diff --git a/etna/transforms/embeddings/models/ts2vec.py b/etna/transforms/embeddings/models/ts2vec.py
new file mode 100644
index 000000000..948ba3803
--- /dev/null
+++ b/etna/transforms/embeddings/models/ts2vec.py
@@ -0,0 +1,272 @@
+import pathlib
+import tempfile
+import zipfile
+from typing import Literal
+from typing import Optional
+
+import numpy as np
+
+from etna.libs.ts2vec import TS2Vec
+from etna.transforms.embeddings.models import BaseEmbeddingModel
+
+
+class TS2VecEmbeddingModel(BaseEmbeddingModel):
+    """TS2Vec embedding model.
+
+    If there are NaNs in series, embeddings will not contain NaNs.
+
+    Each following calling of ``fit`` method continues the learning of the same model.
+
+    For more details read the
+    `paper <https://arxiv.org/abs/2106.10466>`_.
+    """
+
+    def __init__(
+        self,
+        input_dims: int,
+        output_dims: int = 320,
+        hidden_dims: int = 64,
+        depth: int = 10,
+        device: Literal["cpu", "cuda"] = "cpu",
+        batch_size: int = 16,
+        max_train_length: Optional[int] = None,
+        temporal_unit: int = 0,
+    ):
+        """Init TS2VecEmbeddingModel.
+
+        Parameters
+        ----------
+        input_dims:
+            The input dimension. For a univariate time series, this should be set to 1.
+        output_dims:
+            The representation dimension.
+        hidden_dims:
+            The hidden dimension of the encoder.
+        depth:
+            The number of hidden residual blocks in the encoder.
+        device:
+            The device used for training and inference.
+        batch_size:
+            The batch size.
+        max_train_length:
+            The maximum allowed sequence length for training. For sequence with a length greater than ``max_train_length``,
+            it would be cropped into some sequences, each of which has a length less than ``max_train_length``.
+        temporal_unit:
+            The minimum unit to perform temporal contrast. When training on a very long sequence,
+            this param helps to reduce the cost of time and memory.
+        Notes
+        -----
+        In case of long series to reduce memory consumption it is recommended to use max_train_length parameter or manually break the series into smaller subseries.
+        """
+        super().__init__(output_dims=output_dims)
+        self.input_dims = input_dims
+        self.output_dims = output_dims
+        self.hidden_dims = hidden_dims
+        self.depth = depth
+        self.max_train_length = max_train_length
+        self.temporal_unit = temporal_unit
+
+        self.device = device
+        self.batch_size = batch_size
+
+        self.embedding_model = TS2Vec(
+            input_dims=self.input_dims,
+            output_dims=self.output_dims,
+            hidden_dims=self.hidden_dims,
+            depth=self.depth,
+            max_train_length=self.max_train_length,
+            temporal_unit=self.temporal_unit,
+            device=self.device,
+            batch_size=self.batch_size,
+        )
+
+        self._is_freezed: bool = False
+
+    @property
+    def is_freezed(self):
+        """Return whether to skip training during ``fit``."""
+        return self._is_freezed
+
+    def freeze(self, is_freezed: bool = True):
+        """Enable or disable skipping training in ``fit``.
+
+        Parameters
+        ----------
+        is_freezed:
+            whether to skip training during ``fit``.
+        """
+        self._is_freezed = is_freezed
+
+    def fit(
+        self,
+        x: np.ndarray,
+        lr: float = 0.001,
+        n_epochs: Optional[int] = None,
+        n_iters: Optional[int] = None,
+        verbose: Optional[bool] = None,
+    ) -> "TS2VecEmbeddingModel":
+        """Fit TS2Vec embedding model.
+
+        Parameters
+        ----------
+        x:
+            data with shapes (n_segments, n_timestamps, input_dims).
+        lr:
+            The learning rate.
+        n_epochs:
+            The number of epochs. When this reaches, the training stops.
+        n_iters:
+            The number of iterations. When this reaches, the training stops. If both n_epochs and n_iters are not specified,
+            a default setting would be used that sets n_iters to 200 for a dataset with size <= 100000, 600 otherwise.
+        verbose:
+            Whether to print the training loss after each epoch.
+        """
+        if not self._is_freezed:
+            self.embedding_model.fit(train_data=x, lr=lr, n_epochs=n_epochs, n_iters=n_iters, verbose=verbose)
+        return self
+
+    def encode_segment(
+        self,
+        x: np.ndarray,
+        mask: Literal["binomial", "continuous", "all_true", "all_false", "mask_last"] = "all_true",
+        sliding_length: Optional[int] = None,
+        sliding_padding: int = 0,
+    ) -> np.ndarray:
+        """Create embeddings of the whole series.
+
+        Parameters
+        ----------
+        x:
+            data with shapes (n_segments, n_timestamps, input_dims).
+        mask:
+            the mask used by encoder on the test phase can be specified with this parameter. The possible options are:
+
+            - 'binomial' - mask timestamp with probability 0.5 (default one, used in the paper). It is used on the training phase.
+            - 'continuous' - mask random windows of timestamps
+            - 'all_true' - mask none of the timestamps
+            - 'all_false' - mask all timestamps
+            - 'mask_last' - mask last timestamp
+        sliding_length:
+            the length of sliding window. When this param is specified, a sliding inference would be applied on the time series.
+        sliding_padding:
+            contextual data length used for inference every sliding windows.
+
+        Returns
+        -------
+        :
+            array with embeddings of shape (n_segments, output_dim)
+        """
+        embeddings = self.embedding_model.encode(  # (n_segments, output_dim)
+            data=x,
+            mask=mask,
+            encoding_window="full_series",
+            causal=False,
+            sliding_length=sliding_length,
+            sliding_padding=sliding_padding,
+            batch_size=self.batch_size,
+        )
+
+        return embeddings
+
+    def encode_window(
+        self,
+        x: np.ndarray,
+        mask: Literal["binomial", "continuous", "all_true", "all_false", "mask_last"] = "all_true",
+        sliding_length: Optional[int] = None,
+        sliding_padding: int = 0,
+        encoding_window: Optional[int] = None,
+    ) -> np.ndarray:
+        """Create embeddings of each series timestamp.
+
+        Parameters
+        ----------
+        x:
+            data with shapes (n_segments, n_timestamps, input_dims).
+        mask:
+            the mask used by encoder on the test phase can be specified with this parameter. The possible options are:
+
+            - 'binomial' - mask timestamp with probability 0.5 (default one, used in the paper). It is used on the training phase.
+            - 'continuous' - mask random windows of timestamps
+            - 'all_true' - mask none of the timestamps
+            - 'all_false' - mask all timestamps
+            - 'mask_last' - mask last timestamp
+        sliding_length:
+            the length of sliding window. When this param is specified, a sliding inference would be applied on the time series.
+        sliding_padding:
+            the contextual data length used for inference every sliding windows.
+        encoding_window:
+            when this param is specified, the computed representation would be the max pooling over this window.
+            This param will be ignored when encoding full series
+
+        Returns
+        -------
+        :
+            array with embeddings of shape (n_segments, n_timestamps, output_dim)
+        """
+        embeddings = self.embedding_model.encode(  # (n_segments, n_timestamps, output_dim)
+            data=x,
+            mask=mask,
+            encoding_window=encoding_window,
+            causal=True,
+            sliding_length=sliding_length,
+            sliding_padding=sliding_padding,
+            batch_size=self.batch_size,
+        )
+        return embeddings
+
+    def save(self, path: pathlib.Path):
+        """Save the object.
+
+        Parameters
+        ----------
+        path:
+            Path to save object to.
+        """
+        self._save(path=path, skip_attributes=["embedding_model"])
+
+        # Save embedding_model
+        with zipfile.ZipFile(path, "a") as archive:
+            with tempfile.TemporaryDirectory() as _temp_dir:
+                temp_dir = pathlib.Path(_temp_dir)
+
+                # save model separately
+                model_save_path = temp_dir / "model.pt"
+                self.embedding_model.save(fn=str(model_save_path))
+                archive.write(model_save_path, "model.zip")
+
+    @classmethod
+    def load(cls, path: pathlib.Path) -> "TS2VecEmbeddingModel":
+        """Load an object.
+
+        Parameters
+        ----------
+        path:
+            Path to load object from.
+
+        Returns
+        -------
+        :
+            Loaded object.
+        """
+        obj: TS2VecEmbeddingModel = super().load(path=path)
+        obj.embedding_model = TS2Vec(
+            input_dims=obj.input_dims,
+            output_dims=obj.output_dims,
+            hidden_dims=obj.hidden_dims,
+            depth=obj.depth,
+            max_train_length=obj.max_train_length,
+            temporal_unit=obj.temporal_unit,
+            device=obj.device,
+            batch_size=obj.batch_size,
+        )
+
+        with zipfile.ZipFile(path, "r") as archive:
+            with tempfile.TemporaryDirectory() as _temp_dir:
+                temp_dir = pathlib.Path(_temp_dir)
+
+                archive.extractall(temp_dir)
+
+                model_path = temp_dir / "model.zip"
+                obj.embedding_model.load(fn=str(model_path))
+
+        return obj
diff --git a/etna/transforms/embeddings/models/tstcc.py b/etna/transforms/embeddings/models/tstcc.py
new file mode 100644
index 000000000..863418893
--- /dev/null
+++ b/etna/transforms/embeddings/models/tstcc.py
@@ -0,0 +1,281 @@
+import pathlib
+import tempfile
+import zipfile
+from typing import Literal
+
+import numpy as np
+
+from etna.libs.tstcc import TSTCC
+from etna.transforms.embeddings.models import BaseEmbeddingModel
+
+
+class TSTCCEmbeddingModel(BaseEmbeddingModel):
+    """TSTCC embedding model.
+
+    If there are NaNs in series, embeddings will not contain NaNs.
+
+    Each following calling of ``fit`` method continues the learning of the same model.
+
+    Using custom `output_dims`, set it to a value > 3 to have the loss calculated correctly.
+
+    For more details read the
+    `paper <https://arxiv.org/abs/2106.14112>`_.
+
+    Notes
+    -----
+    This model cannot be fitted with `batch_size=1`. So, it cannot be fitted on a dataset with 1 segment.
+    """
+
+    def __init__(
+        self,
+        input_dims: int,
+        output_dims: int = 32,
+        tc_hidden_dim: int = 32,
+        kernel_size: int = 7,
+        dropout: float = 0.35,
+        timesteps: int = 7,
+        heads: int = 1,
+        depth: int = 4,
+        jitter_scale_ratio: float = 1.1,
+        max_seg: int = 4,
+        jitter_ratio: float = 0.8,
+        use_cosine_similarity: bool = True,
+        n_seq_steps: int = 0,
+        device: Literal["cpu", "cuda"] = "cpu",
+        batch_size: int = 16,
+        num_workers: int = 0,
+    ):
+        """Init TSTCCEmbeddingModel.
+
+        Parameters
+        ----------
+        input_dims:
+            The input dimension. For a univariate time series, this should be set to 1.
+        output_dims:
+            The representation dimension.
+        tc_hidden_dim:
+            The output dimension after temporal_contr_model.
+        kernel_size:
+            Kernel size of first convolution in encoder.
+        dropout:
+            Dropout rate in first convolution block in encoder.
+        timesteps:
+            The number of timestamps to predict in temporal contrasting model.
+        heads:
+            Number of heads in attention block in temporal contrasting model. Parameter output_dims must be a multiple
+            of the number of heads.
+        depth:
+            Depth in attention block in temporal contrasting model.
+        n_seq_steps:
+            Max context size in temporal contrasting model.
+        jitter_scale_ratio:
+            Jitter ratio in weak augmentation.
+        max_seg:
+            Number of segments in strong augmentation.
+        jitter_ratio:
+            Jitter ratio in strong augmentation.
+        use_cosine_similarity:
+            If True NTXentLoss uses cosine similarity, if False NTXentLoss uses dot product.
+        device:
+            The device used for training and inference.
+        batch_size:
+            The batch size (number of segments in a batch).
+        num_workers:
+            How many subprocesses to use for data loading.
+        """
+        super().__init__(output_dims=output_dims)
+        self.input_dims = input_dims
+        self.output_dims = output_dims
+        self.tc_hidden_dim = tc_hidden_dim
+        self.kernel_size = kernel_size
+        self.dropout = dropout
+        self.timesteps = timesteps
+        self.heads = heads
+        self.depth = depth
+        self.n_seq_steps = n_seq_steps
+
+        self.jitter_scale_ratio = jitter_scale_ratio
+        self.max_seg = max_seg
+        self.jitter_ratio = jitter_ratio
+
+        self.use_cosine_similarity = use_cosine_similarity
+
+        self.batch_size = batch_size
+
+        self.device = device
+        self.num_workers = num_workers
+
+        self.embedding_model = TSTCC(
+            input_dims=self.input_dims,
+            encoder_output_dim=self.output_dims,
+            kernel_size=self.kernel_size,
+            dropout=self.dropout,
+            timesteps=self.timesteps,
+            hidden_dim=self.tc_hidden_dim,
+            heads=self.heads,
+            depth=self.depth,
+            n_seq_steps=self.n_seq_steps,
+            device=self.device,
+            num_workers=self.num_workers,
+            batch_size=self.batch_size,
+            jitter_scale_ratio=self.jitter_scale_ratio,
+            max_seg=self.max_seg,
+            jitter_ratio=self.jitter_ratio,
+            use_cosine_similarity=self.use_cosine_similarity,
+        )
+
+        self._is_freezed: bool = False
+
+    @property
+    def is_freezed(self):
+        """Return whether to skip training during ``fit``."""
+        return self._is_freezed
+
+    def freeze(self, is_freezed: bool = True):
+        """Enable or disable skipping training in ``fit``.
+
+        Parameters
+        ----------
+        is_freezed:
+            whether to skip training during ``fit``.
+        """
+        self._is_freezed = is_freezed
+
+    def fit(
+        self,
+        x: np.ndarray,
+        n_epochs: int = 40,
+        lr: float = 0.001,
+        temperature: float = 0.2,
+        lambda1: float = 1,
+        lambda2: float = 0.7,
+        verbose: bool = False,
+    ) -> "TSTCCEmbeddingModel":
+        """Fit TSTCC embedding model.
+
+        Parameters
+        ----------
+        x:
+            data with shapes (n_segments, n_timestamps, input_dims).
+        n_epochs:
+            The number of epochs. When this reaches, the training stops.
+        lr:
+            The learning rate.
+        temperature:
+            Temperature in NTXentLoss.
+        lambda1:
+            The relative weight of the first item in the loss (temporal contrasting loss).
+        lambda2:
+            The relative weight of the second item in the loss (contextual contrasting loss).
+        verbose:
+            Whether to print the training loss after each epoch.
+        """
+        if not self._is_freezed:
+            self.embedding_model.fit(
+                train_data=x,
+                n_epochs=n_epochs,
+                lr=lr,
+                temperature=temperature,
+                lambda1=lambda1,
+                lambda2=lambda2,
+                verbose=verbose,
+            )
+        return self
+
+    def encode_segment(self, x: np.ndarray) -> np.ndarray:
+        """Create embeddings of the whole series.
+
+        Parameters
+        ----------
+        x:
+            data with shapes (n_segments, n_timestamps, input_dims).
+        Returns
+        -------
+        :
+            array with embeddings of shape (n_segments, output_dim)
+        """
+        embeddings = self.embedding_model.encode(data=x, encode_full_series=True)  # (n_segments, output_dim)
+
+        return embeddings
+
+    def encode_window(self, x: np.ndarray) -> np.ndarray:
+        """Create embeddings of each series timestamp.
+
+        Parameters
+        ----------
+        x:
+            data with shapes (n_segments, n_timestamps, input_dims).
+
+        Returns
+        -------
+        :
+            array with embeddings of shape (n_segments, n_timestamps, output_dim)
+        """
+        embeddings = self.embedding_model.encode(
+            data=x, encode_full_series=False
+        )  # (n_segments, n_timestamps, output_dim)
+        return embeddings
+
+    def save(self, path: pathlib.Path):
+        """Save the object.
+
+        Parameters
+        ----------
+        path:
+            Path to save object to.
+        """
+        self._save(path=path, skip_attributes=["embedding_model"])
+
+        # Save embedding_model
+        with zipfile.ZipFile(path, "a") as archive:
+            with tempfile.TemporaryDirectory() as _temp_dir:
+                temp_dir = pathlib.Path(_temp_dir)
+
+                # save model separately
+                model_save_path = temp_dir / "model.pt"
+                self.embedding_model.save(fn=str(model_save_path))
+                archive.write(model_save_path, "model.zip")
+
+    @classmethod
+    def load(cls, path: pathlib.Path) -> "TSTCCEmbeddingModel":
+        """Load an object.
+
+        Parameters
+        ----------
+        path:
+            Path to load object from.
+
+        Returns
+        -------
+        :
+            Loaded object.
+        """
+        obj: TSTCCEmbeddingModel = super().load(path=path)
+        obj.embedding_model = TSTCC(
+            input_dims=obj.input_dims,
+            encoder_output_dim=obj.output_dims,
+            kernel_size=obj.kernel_size,
+            dropout=obj.dropout,
+            timesteps=obj.timesteps,
+            heads=obj.heads,
+            depth=obj.depth,
+            hidden_dim=obj.tc_hidden_dim,
+            n_seq_steps=obj.n_seq_steps,
+            device=obj.device,
+            num_workers=obj.num_workers,
+            batch_size=obj.batch_size,
+            jitter_scale_ratio=obj.jitter_scale_ratio,
+            max_seg=obj.max_seg,
+            jitter_ratio=obj.jitter_ratio,
+            use_cosine_similarity=obj.use_cosine_similarity,
+        )
+
+        with zipfile.ZipFile(path, "r") as archive:
+            with tempfile.TemporaryDirectory() as _temp_dir:
+                temp_dir = pathlib.Path(_temp_dir)
+
+                archive.extractall(temp_dir)
+                model_path = temp_dir / "model.zip"
+                obj.embedding_model.load(fn=str(model_path))
+
+        return obj
diff --git a/examples/210-embedding_models.ipynb b/examples/210-embedding_models.ipynb
new file mode 100644
index 000000000..cd732b270
--- /dev/null
+++ b/examples/210-embedding_models.ipynb
@@ -0,0 +1,1242 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "2c8984e0-0792-4cf8-b3c6-446b45b717f2",
+   "metadata": {},
+   "source": [
+    "# Embedding models\n",
+    "\n",
+    "[![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/etna-team/etna/master?filepath=examples/210-embedding_models.ipynb)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "94e7669f-de54-4df8-86ba-aa72c6d5fb55",
+   "metadata": {},
+   "source": [
+    "This notebooks contains examples with embedding models.\n",
+    "\n",
+    "**Table of contents**\n",
+    "\n",
+    "* [Using embedding models directly](#chapter1)  \n",
+    "* [Using embedding models with transforms](#chapter2)\n",
+    "    * [Baseline](#section_2_1)\n",
+    "    * [EmbeddingSegmentTransform](#section_2_2)\n",
+    "    * [EmbeddingWindowTransform](#section_2_3)\n",
+    "* [Saving and loading models](#chapter3)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "bf32c6a9-f920-4888-ac9d-f4a1c454cd91",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "import warnings\n",
+    "\n",
+    "warnings.filterwarnings(\"ignore\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "d732e5b1-2c10-4de3-93ce-c6395ddbd4f1",
+   "metadata": {},
+   "source": [
+    "## 1. Using embedding models directly <a class=\"anchor\" id=\"chapter1\"></a>"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "4c63da5a-eed8-472b-9786-9884a5bb78d1",
+   "metadata": {},
+   "source": [
+    "We have two models to generate embeddings for time series: `TS2VecEmbeddingModel` and `TSTCCEmbeddingModel`.\n",
+    "\n",
+    "Each model has following methods:\n",
+    "- `fit` to train model:\n",
+    "- `encode_segment` to generate embeddings for the whole series. These features are regressors.\n",
+    "- `encode_window` to generate embeddings for each timestamp. These features aren't regressors and lag transformation should be applied to them before using in forecasting.\n",
+    "- `freeze` to enable or disable skipping training in `fit` method. It is useful, for example, when you have a pretrained model and you want only to generate embeddings without new training during `backtest`.\n",
+    "- `save` and `load` to save and load pretrained models, respectively."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "d5ec9757-dd5a-423c-9be1-e4835b4b2a03",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m Disabling SSL verification.  Connections to this server are not verified and may be insecure!\n",
+      "Global seed set to 42\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "42"
+      ]
+     },
+     "execution_count": 2,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from pytorch_lightning import seed_everything\n",
+    "\n",
+    "seed_everything(42, workers=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "f99c90c5-8a8b-481a-848f-ebcb00b22bb0",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead tr th {\n",
+       "        text-align: left;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead tr:last-of-type th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr>\n",
+       "      <th>segment</th>\n",
+       "      <th>segment_0</th>\n",
+       "      <th>segment_1</th>\n",
+       "      <th>segment_2</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>feature</th>\n",
+       "      <th>target</th>\n",
+       "      <th>target</th>\n",
+       "      <th>target</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>timestamp</th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>2001-01-01</th>\n",
+       "      <td>1.624345</td>\n",
+       "      <td>1.462108</td>\n",
+       "      <td>-1.100619</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2001-01-02</th>\n",
+       "      <td>1.012589</td>\n",
+       "      <td>-0.598033</td>\n",
+       "      <td>0.044105</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2001-01-03</th>\n",
+       "      <td>0.484417</td>\n",
+       "      <td>-0.920450</td>\n",
+       "      <td>0.945695</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2001-01-04</th>\n",
+       "      <td>-0.588551</td>\n",
+       "      <td>-1.304504</td>\n",
+       "      <td>1.448190</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2001-01-05</th>\n",
+       "      <td>0.276856</td>\n",
+       "      <td>-0.170735</td>\n",
+       "      <td>2.349046</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "segment    segment_0 segment_1 segment_2\n",
+       "feature       target    target    target\n",
+       "timestamp                               \n",
+       "2001-01-01  1.624345  1.462108 -1.100619\n",
+       "2001-01-02  1.012589 -0.598033  0.044105\n",
+       "2001-01-03  0.484417 -0.920450  0.945695\n",
+       "2001-01-04 -0.588551 -1.304504  1.448190\n",
+       "2001-01-05  0.276856 -0.170735  2.349046"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from etna.datasets import TSDataset\n",
+    "from etna.datasets import generate_ar_df\n",
+    "\n",
+    "df = generate_ar_df(periods=10, start_time=\"2001-01-01\", n_segments=3)\n",
+    "ts = TSDataset(df, freq=\"D\")\n",
+    "ts.head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "9712e58c-73fe-475e-807b-ae082752fcf8",
+   "metadata": {},
+   "source": [
+    "Now let's work with models directly.\n",
+    "\n",
+    "They are expecting array with shapes\n",
+    "(n_segments, n_timestamps, num_features). The example shows working with `TS2VecEmbeddingModel`, it is all the same with `TSTCCEmbeddingModel`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "05a191ee-17dd-4cb1-a993-73aee7706272",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(3, 10, 1)"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "x = ts.df.values.reshape(ts.size()).transpose(1, 0, 2)\n",
+    "x.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "0263277f-b642-4c1b-8f19-a42520d6d09e",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(3, 2)"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from etna.transforms.embeddings.models import TS2VecEmbeddingModel\n",
+    "from etna.transforms.embeddings.models import TSTCCEmbeddingModel\n",
+    "\n",
+    "model_ts2vec = TS2VecEmbeddingModel(input_dims=1, output_dims=2)\n",
+    "model_ts2vec.fit(x, n_epochs=1)\n",
+    "segment_embeddings = model_ts2vec.encode_segment(x)\n",
+    "segment_embeddings.shape"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "26329bf0-e955-46ad-9962-4ea1295ef671",
+   "metadata": {},
+   "source": [
+    "As we are using `encode_segment` we get `output_dims` features consisting of one value for each segment.\n",
+    "\n",
+    "And what about `encode_window`?"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "9a307886-cdf2-4e98-9a8e-3917741f287c",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(3, 10, 2)"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "window_embeddings = model_ts2vec.encode_window(x)\n",
+    "window_embeddings.shape"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "aded40fc-382c-4f7a-901b-8498f9258c3b",
+   "metadata": {},
+   "source": [
+    "We get `output_dims` features consisting of `n_timestamps` values for each segment."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "ffbb2210-d77f-426e-91b2-3729544ce872",
+   "metadata": {},
+   "source": [
+    "## 2. Using embedding models with transforms <a class=\"anchor\" id=\"chapter2\"></a>"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "459e90a9-97fb-4922-bc6a-52500b3a132e",
+   "metadata": {},
+   "source": [
+    "In this section we will test our models on example."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "b7827e25-4597-451a-88f8-5e0475556041",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "HORIZON = 6"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "17955757-7585-4db0-889b-dbd978339822",
+   "metadata": {
+    "tags": []
+   },
+   "source": [
+    "### 2.1 Baseline <a class=\"anchor\" id=\"section_2_1\"></a>"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "8c92c86f-fce7-442b-a7f4-0c024344bec9",
+   "metadata": {},
+   "source": [
+    "Before working with embedding features, let's make forecasts using usual features."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "21ac6694-1c3c-4fdc-a96a-3d0544ee90df",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead tr th {\n",
+       "        text-align: left;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead tr:last-of-type th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr>\n",
+       "      <th>segment</th>\n",
+       "      <th>M1000_MACRO</th>\n",
+       "      <th>M1001_MACRO</th>\n",
+       "      <th>M1002_MACRO</th>\n",
+       "      <th>M1003_MACRO</th>\n",
+       "      <th>M1004_MACRO</th>\n",
+       "      <th>M1005_MACRO</th>\n",
+       "      <th>M1006_MACRO</th>\n",
+       "      <th>M1007_MACRO</th>\n",
+       "      <th>M1008_MACRO</th>\n",
+       "      <th>M1009_MACRO</th>\n",
+       "      <th>...</th>\n",
+       "      <th>M992_MACRO</th>\n",
+       "      <th>M993_MACRO</th>\n",
+       "      <th>M994_MACRO</th>\n",
+       "      <th>M995_MACRO</th>\n",
+       "      <th>M996_MACRO</th>\n",
+       "      <th>M997_MACRO</th>\n",
+       "      <th>M998_MACRO</th>\n",
+       "      <th>M999_MACRO</th>\n",
+       "      <th>M99_MICRO</th>\n",
+       "      <th>M9_MICRO</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>feature</th>\n",
+       "      <th>target</th>\n",
+       "      <th>target</th>\n",
+       "      <th>target</th>\n",
+       "      <th>target</th>\n",
+       "      <th>target</th>\n",
+       "      <th>target</th>\n",
+       "      <th>target</th>\n",
+       "      <th>target</th>\n",
+       "      <th>target</th>\n",
+       "      <th>target</th>\n",
+       "      <th>...</th>\n",
+       "      <th>target</th>\n",
+       "      <th>target</th>\n",
+       "      <th>target</th>\n",
+       "      <th>target</th>\n",
+       "      <th>target</th>\n",
+       "      <th>target</th>\n",
+       "      <th>target</th>\n",
+       "      <th>target</th>\n",
+       "      <th>target</th>\n",
+       "      <th>target</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>timestamp</th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>...</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>...</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>...</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>...</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>...</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>5 rows × 1428 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "segment   M1000_MACRO M1001_MACRO M1002_MACRO M1003_MACRO M1004_MACRO  \\\n",
+       "feature        target      target      target      target      target   \n",
+       "timestamp                                                               \n",
+       "0                 NaN         NaN         NaN         NaN         NaN   \n",
+       "1                 NaN         NaN         NaN         NaN         NaN   \n",
+       "2                 NaN         NaN         NaN         NaN         NaN   \n",
+       "3                 NaN         NaN         NaN         NaN         NaN   \n",
+       "4                 NaN         NaN         NaN         NaN         NaN   \n",
+       "\n",
+       "segment   M1005_MACRO M1006_MACRO M1007_MACRO M1008_MACRO M1009_MACRO  ...  \\\n",
+       "feature        target      target      target      target      target  ...   \n",
+       "timestamp                                                              ...   \n",
+       "0                 NaN         NaN         NaN         NaN         NaN  ...   \n",
+       "1                 NaN         NaN         NaN         NaN         NaN  ...   \n",
+       "2                 NaN         NaN         NaN         NaN         NaN  ...   \n",
+       "3                 NaN         NaN         NaN         NaN         NaN  ...   \n",
+       "4                 NaN         NaN         NaN         NaN         NaN  ...   \n",
+       "\n",
+       "segment   M992_MACRO M993_MACRO M994_MACRO M995_MACRO M996_MACRO M997_MACRO  \\\n",
+       "feature       target     target     target     target     target     target   \n",
+       "timestamp                                                                     \n",
+       "0                NaN        NaN        NaN        NaN        NaN        NaN   \n",
+       "1                NaN        NaN        NaN        NaN        NaN        NaN   \n",
+       "2                NaN        NaN        NaN        NaN        NaN        NaN   \n",
+       "3                NaN        NaN        NaN        NaN        NaN        NaN   \n",
+       "4                NaN        NaN        NaN        NaN        NaN        NaN   \n",
+       "\n",
+       "segment   M998_MACRO M999_MACRO M99_MICRO M9_MICRO  \n",
+       "feature       target     target    target   target  \n",
+       "timestamp                                           \n",
+       "0                NaN        NaN       NaN      NaN  \n",
+       "1                NaN        NaN       NaN      NaN  \n",
+       "2                NaN        NaN       NaN      NaN  \n",
+       "3                NaN        NaN       NaN      NaN  \n",
+       "4                NaN        NaN       NaN      NaN  \n",
+       "\n",
+       "[5 rows x 1428 columns]"
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from etna.datasets import load_dataset\n",
+    "\n",
+    "ts = load_dataset(\"m3_monthly\")\n",
+    "ts.drop_features(features=[\"origin_timestamp\"])\n",
+    "ts.df_exog = None\n",
+    "ts.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "fe224f12-6b86-4513-8d61-3fa0cb895eb1",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "[Parallel(n_jobs=1)]: Done   1 tasks      | elapsed:    6.0s\n",
+      "[Parallel(n_jobs=1)]: Done   2 tasks      | elapsed:   12.3s\n",
+      "[Parallel(n_jobs=1)]: Done   3 tasks      | elapsed:   18.8s\n",
+      "[Parallel(n_jobs=1)]: Done   3 tasks      | elapsed:   18.8s\n",
+      "[Parallel(n_jobs=1)]: Done   1 tasks      | elapsed:    0.7s\n",
+      "[Parallel(n_jobs=1)]: Done   2 tasks      | elapsed:    1.3s\n",
+      "[Parallel(n_jobs=1)]: Done   3 tasks      | elapsed:    2.2s\n",
+      "[Parallel(n_jobs=1)]: Done   3 tasks      | elapsed:    2.2s\n",
+      "[Parallel(n_jobs=1)]: Done   1 tasks      | elapsed:    0.1s\n",
+      "[Parallel(n_jobs=1)]: Done   2 tasks      | elapsed:    0.1s\n",
+      "[Parallel(n_jobs=1)]: Done   3 tasks      | elapsed:    0.2s\n",
+      "[Parallel(n_jobs=1)]: Done   3 tasks      | elapsed:    0.2s\n"
+     ]
+    }
+   ],
+   "source": [
+    "from etna.metrics import SMAPE\n",
+    "from etna.models import CatBoostMultiSegmentModel\n",
+    "from etna.pipeline import Pipeline\n",
+    "from etna.transforms import LagTransform\n",
+    "\n",
+    "model = CatBoostMultiSegmentModel()\n",
+    "\n",
+    "lag_transform = LagTransform(in_column=\"target\", lags=list(range(HORIZON, HORIZON + 6)), out_column=\"lag\")\n",
+    "\n",
+    "pipeline = Pipeline(model=model, transforms=[lag_transform], horizon=HORIZON)\n",
+    "metrics_df, _, _ = pipeline.backtest(ts, metrics=[SMAPE()], n_folds=3)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "bfbab09d-eb27-4529-8954-3dc0e471668a",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "SMAPE:  14.719683971886594\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(\"SMAPE: \", metrics_df[\"SMAPE\"].mean())"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "efa358d6-c1a0-460a-b1a2-7a123b5b4eec",
+   "metadata": {},
+   "source": [
+    "### 2.2 EmbeddingSegmentTransform <a class=\"anchor\" id=\"section_2_2\"></a>"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "8f7ca5fd-4186-4bf4-ac32-0bace7802ca9",
+   "metadata": {},
+   "source": [
+    "`EmbeddingSegmentTransform` calls models' `encode_segment` method inside."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "id": "f05f8f02-4d24-4438-ac15-9ed45e2e4f78",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "from etna.transforms import EmbeddingSegmentTransform\n",
+    "from etna.transforms.embeddings.models import BaseEmbeddingModel\n",
+    "\n",
+    "\n",
+    "def forecast_with_segment_embeddings(emb_model: BaseEmbeddingModel, training_params: dict) -> float:\n",
+    "    model = CatBoostMultiSegmentModel()\n",
+    "\n",
+    "    emb_transform = EmbeddingSegmentTransform(\n",
+    "        in_columns=[\"target\"], embedding_model=emb_model, training_params=training_params, out_column=\"emb\"\n",
+    "    )\n",
+    "    pipeline = Pipeline(model=model, transforms=[lag_transform, emb_transform], horizon=HORIZON)\n",
+    "    metrics_df, _, _ = pipeline.backtest(ts, metrics=[SMAPE()], n_folds=3)\n",
+    "    smape_score = metrics_df[\"SMAPE\"].mean()\n",
+    "    return smape_score"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "6bc237e1-d2e3-48ee-99b5-ac35b957717b",
+   "metadata": {},
+   "source": [
+    "You can see training parameters of the model to pass it to transform.\n",
+    "\n",
+    "Let's begin with `TSTCCEmbeddingModel`"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "id": "3e6cc297-48bd-4614-bcbe-44e5732bf3a8",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "\u001b[0;31mSignature:\u001b[0m\n",
+       "\u001b[0mTSTCCEmbeddingModel\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\u001b[0m\n",
+       "\u001b[0;34m\u001b[0m    \u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
+       "\u001b[0;34m\u001b[0m    \u001b[0mx\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mnumpy\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mndarray\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
+       "\u001b[0;34m\u001b[0m    \u001b[0mn_epochs\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mint\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m40\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
+       "\u001b[0;34m\u001b[0m    \u001b[0mlr\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mfloat\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m0.001\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
+       "\u001b[0;34m\u001b[0m    \u001b[0mtemperature\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mfloat\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m0.2\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
+       "\u001b[0;34m\u001b[0m    \u001b[0mlambda1\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mfloat\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
+       "\u001b[0;34m\u001b[0m    \u001b[0mlambda2\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mfloat\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m0.7\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
+       "\u001b[0;34m\u001b[0m    \u001b[0mverbose\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mbool\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mFalse\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
+       "\u001b[0;34m\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m->\u001b[0m \u001b[0;34m'TSTCCEmbeddingModel'\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+       "\u001b[0;31mDocstring:\u001b[0m\n",
+       "Fit TSTCC embedding model.\n",
+       "\n",
+       "Parameters\n",
+       "----------\n",
+       "x:\n",
+       "    data with shapes (n_segments, n_timestamps, input_dims).\n",
+       "n_epochs:\n",
+       "    The number of epochs. When this reaches, the training stops.\n",
+       "lr:\n",
+       "    The learning rate.\n",
+       "temperature:\n",
+       "    Temperature in NTXentLoss.\n",
+       "lambda1:\n",
+       "    The relative weight of the first item in the loss (temporal contrasting loss).\n",
+       "lambda2:\n",
+       "    The relative weight of the second item in the loss (contextual contrasting loss).\n",
+       "verbose:\n",
+       "    Whether to print the training loss after each epoch.\n",
+       "\u001b[0;31mFile:\u001b[0m      /workdir/src/etna/etna/transforms/embeddings/models/tstcc.py\n",
+       "\u001b[0;31mType:\u001b[0m      function\n"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "?TSTCCEmbeddingModel.fit"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "id": "516b209e-7bd2-45c6-8db0-b1708ffda0fc",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "[Parallel(n_jobs=1)]: Done   1 tasks      | elapsed:   33.3s\n",
+      "[Parallel(n_jobs=1)]: Done   2 tasks      | elapsed:  1.1min\n",
+      "[Parallel(n_jobs=1)]: Done   3 tasks      | elapsed:  1.7min\n",
+      "[Parallel(n_jobs=1)]: Done   3 tasks      | elapsed:  1.7min\n",
+      "[Parallel(n_jobs=1)]: Done   1 tasks      | elapsed:    1.2s\n",
+      "[Parallel(n_jobs=1)]: Done   2 tasks      | elapsed:    2.4s\n",
+      "[Parallel(n_jobs=1)]: Done   3 tasks      | elapsed:    3.9s\n",
+      "[Parallel(n_jobs=1)]: Done   3 tasks      | elapsed:    3.9s\n",
+      "[Parallel(n_jobs=1)]: Done   1 tasks      | elapsed:    0.1s\n",
+      "[Parallel(n_jobs=1)]: Done   2 tasks      | elapsed:    0.1s\n",
+      "[Parallel(n_jobs=1)]: Done   3 tasks      | elapsed:    0.2s\n",
+      "[Parallel(n_jobs=1)]: Done   3 tasks      | elapsed:    0.2s\n"
+     ]
+    }
+   ],
+   "source": [
+    "emb_model = TSTCCEmbeddingModel(input_dims=1, tc_hidden_dim=16, depth=3, output_dims=6, device=\"cuda\")\n",
+    "training_params = {\"n_epochs\": 10}\n",
+    "smape_score = forecast_with_segment_embeddings(emb_model, training_params)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "id": "e673ff95-fdc2-4751-9025-98940f73211d",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "SMAPE:  14.18648029957201\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(\"SMAPE: \", smape_score)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "e5e35346-6f16-4d42-8a33-b98bf5679046",
+   "metadata": {},
+   "source": [
+    "Better then without embeddings. Let's try `TS2VecEmbeddingModel`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "id": "6b7e43f6-9ce3-4a3f-b6e9-70ed46b272e5",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "[Parallel(n_jobs=1)]: Done   1 tasks      | elapsed:   25.7s\n",
+      "[Parallel(n_jobs=1)]: Done   2 tasks      | elapsed:   52.0s\n",
+      "[Parallel(n_jobs=1)]: Done   3 tasks      | elapsed:  1.3min\n",
+      "[Parallel(n_jobs=1)]: Done   3 tasks      | elapsed:  1.3min\n",
+      "[Parallel(n_jobs=1)]: Done   1 tasks      | elapsed:    1.5s\n",
+      "[Parallel(n_jobs=1)]: Done   2 tasks      | elapsed:    2.8s\n",
+      "[Parallel(n_jobs=1)]: Done   3 tasks      | elapsed:    4.0s\n",
+      "[Parallel(n_jobs=1)]: Done   3 tasks      | elapsed:    4.0s\n",
+      "[Parallel(n_jobs=1)]: Done   1 tasks      | elapsed:    0.1s\n",
+      "[Parallel(n_jobs=1)]: Done   2 tasks      | elapsed:    0.1s\n",
+      "[Parallel(n_jobs=1)]: Done   3 tasks      | elapsed:    0.2s\n",
+      "[Parallel(n_jobs=1)]: Done   3 tasks      | elapsed:    0.2s\n"
+     ]
+    }
+   ],
+   "source": [
+    "emb_model = TS2VecEmbeddingModel(input_dims=1, hidden_dims=16, depth=3, output_dims=6, device=\"cuda\")\n",
+    "training_params = {\"n_epochs\": 10}\n",
+    "smape_score = forecast_with_segment_embeddings(emb_model, training_params)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "id": "5688ea80-5d6c-414a-89a7-7ec144d09b4f",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "SMAPE:  13.620591044127748\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(\"SMAPE: \", smape_score)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "da58c577-3519-41a7-b63c-1da896121954",
+   "metadata": {},
+   "source": [
+    "Much better. Now let's try another transform."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "949a07ba-548f-4b09-bcba-a07f76c9d501",
+   "metadata": {
+    "tags": []
+   },
+   "source": [
+    "### 2.3 EmbeddingWindowTransform <a class=\"anchor\" id=\"section_2_3\"></a>"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "dc3ef834-fcd7-4ee1-85e9-a8ace8dde8a4",
+   "metadata": {},
+   "source": [
+    "`EmbeddingWindowTransform` calls models' `encode_window` method inside. As we have discussed, these features are not regressors and should be used as lags for future."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "id": "b39d1abe-42f9-44ff-af5e-f93d08c0ac02",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "from etna.transforms import EmbeddingWindowTransform\n",
+    "from etna.transforms import FilterFeaturesTransform\n",
+    "\n",
+    "\n",
+    "def forecast_with_window_embeddings(emb_model: BaseEmbeddingModel, training_params: dict) -> float:\n",
+    "    model = CatBoostMultiSegmentModel()\n",
+    "\n",
+    "    output_dims = emb_model.output_dims\n",
+    "\n",
+    "    emb_transform = EmbeddingWindowTransform(\n",
+    "        in_columns=[\"target\"], embedding_model=emb_model, training_params=training_params, out_column=\"embedding_window\"\n",
+    "    )\n",
+    "    lag_emb_transforms = [\n",
+    "        LagTransform(in_column=f\"embedding_window_{i}\", lags=[HORIZON], out_column=f\"lag_emb_{i}\")\n",
+    "        for i in range(output_dims)\n",
+    "    ]\n",
+    "    filter_transforms = FilterFeaturesTransform(exclude=[f\"embedding_window_{i}\" for i in range(output_dims)])\n",
+    "\n",
+    "    transforms = [lag_transform] + [emb_transform] + lag_emb_transforms + [filter_transforms]\n",
+    "\n",
+    "    pipeline = Pipeline(model=model, transforms=transforms, horizon=HORIZON)\n",
+    "    metrics_df, _, _ = pipeline.backtest(ts, metrics=[SMAPE()], n_folds=3)\n",
+    "    smape_score = metrics_df[\"SMAPE\"].mean()\n",
+    "    return smape_score"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "id": "5e663aa0-778d-4393-80e6-7ef888210ec5",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "[Parallel(n_jobs=1)]: Done   1 tasks      | elapsed:   46.8s\n",
+      "[Parallel(n_jobs=1)]: Done   2 tasks      | elapsed:  1.6min\n",
+      "[Parallel(n_jobs=1)]: Done   3 tasks      | elapsed:  2.4min\n",
+      "[Parallel(n_jobs=1)]: Done   3 tasks      | elapsed:  2.4min\n",
+      "[Parallel(n_jobs=1)]: Done   1 tasks      | elapsed:   14.5s\n",
+      "[Parallel(n_jobs=1)]: Done   2 tasks      | elapsed:   29.1s\n",
+      "[Parallel(n_jobs=1)]: Done   3 tasks      | elapsed:   43.3s\n",
+      "[Parallel(n_jobs=1)]: Done   3 tasks      | elapsed:   43.3s\n",
+      "[Parallel(n_jobs=1)]: Done   1 tasks      | elapsed:    0.1s\n",
+      "[Parallel(n_jobs=1)]: Done   2 tasks      | elapsed:    0.1s\n",
+      "[Parallel(n_jobs=1)]: Done   3 tasks      | elapsed:    0.2s\n",
+      "[Parallel(n_jobs=1)]: Done   3 tasks      | elapsed:    0.2s\n"
+     ]
+    }
+   ],
+   "source": [
+    "emb_model = TSTCCEmbeddingModel(input_dims=1, tc_hidden_dim=16, depth=3, output_dims=6, device=\"cuda\")\n",
+    "training_params = {\"n_epochs\": 10}\n",
+    "smape_score = forecast_with_window_embeddings(emb_model, training_params)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "id": "d07041ad-698c-4b07-b339-16aed9856129",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "SMAPE:  123.02843183461513\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(\"SMAPE: \", smape_score)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "41d78b2c-7574-4b0b-806c-adb5db324998",
+   "metadata": {},
+   "source": [
+    "Oops... What about `TS2VecEmbeddingModel`?"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "id": "fab4711f-7b9d-4263-abbd-05a3cedcaaef",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "[Parallel(n_jobs=1)]: Done   1 tasks      | elapsed:   38.5s\n",
+      "[Parallel(n_jobs=1)]: Done   2 tasks      | elapsed:  1.3min\n",
+      "[Parallel(n_jobs=1)]: Done   3 tasks      | elapsed:  1.9min\n",
+      "[Parallel(n_jobs=1)]: Done   3 tasks      | elapsed:  1.9min\n",
+      "[Parallel(n_jobs=1)]: Done   1 tasks      | elapsed:   14.3s\n",
+      "[Parallel(n_jobs=1)]: Done   2 tasks      | elapsed:   28.5s\n",
+      "[Parallel(n_jobs=1)]: Done   3 tasks      | elapsed:   42.5s\n",
+      "[Parallel(n_jobs=1)]: Done   3 tasks      | elapsed:   42.5s\n",
+      "[Parallel(n_jobs=1)]: Done   1 tasks      | elapsed:    0.1s\n",
+      "[Parallel(n_jobs=1)]: Done   2 tasks      | elapsed:    0.1s\n",
+      "[Parallel(n_jobs=1)]: Done   3 tasks      | elapsed:    0.2s\n",
+      "[Parallel(n_jobs=1)]: Done   3 tasks      | elapsed:    0.2s\n"
+     ]
+    }
+   ],
+   "source": [
+    "emb_model = TS2VecEmbeddingModel(input_dims=1, hidden_dims=16, depth=3, output_dims=6, device=\"cuda\")\n",
+    "training_params = {\"n_epochs\": 10}\n",
+    "smape_score = forecast_with_window_embeddings(emb_model, training_params)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "id": "fd890c55-ea57-4f51-a2e1-320cc4111b46",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "SMAPE:  27.93511865651414\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(\"SMAPE: \", smape_score)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "3a73e55e-1e1b-4ca2-a42e-62a68142c517",
+   "metadata": {},
+   "source": [
+    "Window embeddings don't help with this dataset. It means that you should try both models and both transforms to get the best results."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "84fcd1b8-e61a-40d4-a80a-9c558637a8d4",
+   "metadata": {},
+   "source": [
+    "## 3. Saving and loading models <a class=\"anchor\" id=\"chapter3\"></a>\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "c5f0bc45-6388-4e92-bdc1-d8090af66b26",
+   "metadata": {},
+   "source": [
+    "If you have a pretrained embedding model and aren't going to train it on calling `fit`, you should \"freeze\" training loop. It is helpful for using the model inside transforms, which call `fit` method on each `fit` of the pipeline."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "id": "1d5fb109-b1c7-431c-a5bc-b2eeddc311f3",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "MODEL_PATH = \"model.zip\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "id": "24229c75-5e9a-4ff8-a7f4-1c723c62fc9e",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "emb_model.freeze()\n",
+    "emb_model.save(MODEL_PATH)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "e7a0275d-47ca-4aa4-a312-d02fcd06a7ae",
+   "metadata": {},
+   "source": [
+    "Now you are ready to load pretrained model. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "id": "5d3f522a-dc2d-46d4-a28b-8a1524793874",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "model_loaded = TS2VecEmbeddingModel.load(MODEL_PATH)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "98e5325a-abe1-4c88-9f2b-fb61ef5d110e",
+   "metadata": {
+    "tags": []
+   },
+   "source": [
+    "If you need to fine-tune pretrained model, you should \"unfreeze\" training loop. After that it will start fitting on calling `fit` method."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 25,
+   "id": "f9961758-6f5b-42f6-92f1-0aa68bb0a677",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "model_loaded.freeze(is_freezed=False)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "9ff472a4",
+   "metadata": {},
+   "source": [
+    "To get information about whether model is \"freezed\" or not use `is_freezed` property."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "id": "eba6d010",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "False"
+      ]
+     },
+     "execution_count": 26,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "model_loaded.is_freezed"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.6"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/examples/README.md b/examples/README.md
index 93736a1f5..732d1ba54 100644
--- a/examples/README.md
+++ b/examples/README.md
@@ -126,6 +126,14 @@ We have prepared a set of tutorials for an easy introduction:
   - ML models
 - Summary
 
+#### [Embedding models](https://github.com/etna-team/etna/tree/master/examples/210-embedding_models.ipynb)
+- Using embedding models directly
+- Using embedding models with transforms
+  - Baseline
+  - EmbeddingSegmentTransform
+  - EmbeddingWindowTransform
+- Saving and loading models
+
 ### Advanced
 
 #### [Custom model and transform](https://github.com/etna-team/etna/tree/master/examples/301-custom_transform_and_model.ipynb)
diff --git a/poetry.lock b/poetry.lock
index dd8298bc2..818f72444 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1352,6 +1352,18 @@ files = [
     {file = "docutils-0.18.1.tar.gz", hash = "sha256:679987caf361a7539d76e584cbeddc311e3aee937877c87346f31debc63e9d06"},
 ]
 
+[[package]]
+name = "einops"
+version = "0.7.0"
+description = "A new flavour of deep learning operations"
+category = "main"
+optional = true
+python-versions = ">=3.8"
+files = [
+    {file = "einops-0.7.0-py3-none-any.whl", hash = "sha256:0f3096f26b914f465f6ff3c66f5478f9a5e380bb367ffc6493a68143fbbf1fd1"},
+    {file = "einops-0.7.0.tar.gz", hash = "sha256:b2b04ad6081a3b227080c9bf5e3ace7160357ff03043cd66cc5b2319eb7031d1"},
+]
+
 [[package]]
 name = "ephem"
 version = "4.1.4"
@@ -1382,6 +1394,17 @@ files = [
     {file = "ephem-4.1.4-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:8f9b27117e7a82f7f70db9cb23b5cc36d37b166a2f73c55e14d7225d0ab95afa"},
     {file = "ephem-4.1.4-cp311-cp311-win32.whl", hash = "sha256:9bb21c0b117c9122c0141b0a71ee6fbbb087ed2aab4a7ab60f009e95e9f4a521"},
     {file = "ephem-4.1.4-cp311-cp311-win_amd64.whl", hash = "sha256:55d7fb5c34b2e453e01fa4ca7ee375b19b438c9401ae8c4099ae4a3a37656972"},
+    {file = "ephem-4.1.4-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:f9e24aeea560dfcece3c2e313eb94e6be3e84888091455e541fa88f3a44da584"},
+    {file = "ephem-4.1.4-cp312-cp312-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:653d99386932e5f78bb9cfc4495030ad9f3345eb4c2b32dca55547da8f1f0332"},
+    {file = "ephem-4.1.4-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:53786461a6d5799d5fffe76622ad51444b264d1c7263b92a6dfcac640c3da93a"},
+    {file = "ephem-4.1.4-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:268f57f8768ccb0abbdf4cefb4781c7db812950019868f687b407b428513ee53"},
+    {file = "ephem-4.1.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d630aa287255ea9fba6962f351e4e0729bb620570684d52fbfcc31b11527f09e"},
+    {file = "ephem-4.1.4-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:b5f229bbf62ecb4cd6bb3374b15d0f8ff7b3d970c2936fccd89bdf9d693907a2"},
+    {file = "ephem-4.1.4-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:d60d56f182de54bd84fadd6ea2dd8e8ef6fdef6a698c7cafd404ecb6eeefa598"},
+    {file = "ephem-4.1.4-cp312-cp312-musllinux_1_1_s390x.whl", hash = "sha256:404500c8d0030d75ec15bb6b98eee78ad163fd5252102c962ae6fb39c9488198"},
+    {file = "ephem-4.1.4-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:9fb020d6cc5ab1ad1cd9d3da4a6e2506beebb41d1b337d79cc20cc0a17f550f1"},
+    {file = "ephem-4.1.4-cp312-cp312-win32.whl", hash = "sha256:29e71636ee4719419d03184abc85085f76989c79a61844f5e60acbf2513d2b42"},
+    {file = "ephem-4.1.4-cp312-cp312-win_amd64.whl", hash = "sha256:549654f63d88e0ab6248ae25ac2939131474ab9f3a91bee6b68ca6f214747c2a"},
     {file = "ephem-4.1.4-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:40067fc050c946c8d4c2d779805b61f063471a091e6124cbabcf61ac538011b2"},
     {file = "ephem-4.1.4-cp36-cp36m-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7e2abe97aa2b091090012768b4d94793213cc01f0bf040dcc311a380ab08df69"},
     {file = "ephem-4.1.4-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3b2677d3a5b42aedc578de10b0eecdba6a50731f159cb28f7ad38c5f62143494"},
@@ -1778,6 +1801,7 @@ files = [
     {file = "greenlet-2.0.2-cp27-cp27m-win32.whl", hash = "sha256:6c3acb79b0bfd4fe733dff8bc62695283b57949ebcca05ae5c129eb606ff2d74"},
     {file = "greenlet-2.0.2-cp27-cp27m-win_amd64.whl", hash = "sha256:283737e0da3f08bd637b5ad058507e578dd462db259f7f6e4c5c365ba4ee9343"},
     {file = "greenlet-2.0.2-cp27-cp27mu-manylinux2010_x86_64.whl", hash = "sha256:d27ec7509b9c18b6d73f2f5ede2622441de812e7b1a80bbd446cb0633bd3d5ae"},
+    {file = "greenlet-2.0.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:d967650d3f56af314b72df7089d96cda1083a7fc2da05b375d2bc48c82ab3f3c"},
     {file = "greenlet-2.0.2-cp310-cp310-macosx_11_0_x86_64.whl", hash = "sha256:30bcf80dda7f15ac77ba5af2b961bdd9dbc77fd4ac6105cee85b0d0a5fcf74df"},
     {file = "greenlet-2.0.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:26fbfce90728d82bc9e6c38ea4d038cba20b7faf8a0ca53a9c07b67318d46088"},
     {file = "greenlet-2.0.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9190f09060ea4debddd24665d6804b995a9c122ef5917ab26e1566dcc712ceeb"},
@@ -1786,6 +1810,7 @@ files = [
     {file = "greenlet-2.0.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:76ae285c8104046b3a7f06b42f29c7b73f77683df18c49ab5af7983994c2dd91"},
     {file = "greenlet-2.0.2-cp310-cp310-win_amd64.whl", hash = "sha256:2d4686f195e32d36b4d7cf2d166857dbd0ee9f3d20ae349b6bf8afc8485b3645"},
     {file = "greenlet-2.0.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:c4302695ad8027363e96311df24ee28978162cdcdd2006476c43970b384a244c"},
+    {file = "greenlet-2.0.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:d4606a527e30548153be1a9f155f4e283d109ffba663a15856089fb55f933e47"},
     {file = "greenlet-2.0.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c48f54ef8e05f04d6eff74b8233f6063cb1ed960243eacc474ee73a2ea8573ca"},
     {file = "greenlet-2.0.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a1846f1b999e78e13837c93c778dcfc3365902cfb8d1bdb7dd73ead37059f0d0"},
     {file = "greenlet-2.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3a06ad5312349fec0ab944664b01d26f8d1f05009566339ac6f63f56589bc1a2"},
@@ -1815,6 +1840,7 @@ files = [
     {file = "greenlet-2.0.2-cp37-cp37m-win32.whl", hash = "sha256:3f6ea9bd35eb450837a3d80e77b517ea5bc56b4647f5502cd28de13675ee12f7"},
     {file = "greenlet-2.0.2-cp37-cp37m-win_amd64.whl", hash = "sha256:7492e2b7bd7c9b9916388d9df23fa49d9b88ac0640db0a5b4ecc2b653bf451e3"},
     {file = "greenlet-2.0.2-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:b864ba53912b6c3ab6bcb2beb19f19edd01a6bfcbdfe1f37ddd1778abfe75a30"},
+    {file = "greenlet-2.0.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:1087300cf9700bbf455b1b97e24db18f2f77b55302a68272c56209d5587c12d1"},
     {file = "greenlet-2.0.2-cp38-cp38-manylinux2010_x86_64.whl", hash = "sha256:ba2956617f1c42598a308a84c6cf021a90ff3862eddafd20c3333d50f0edb45b"},
     {file = "greenlet-2.0.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fc3a569657468b6f3fb60587e48356fe512c1754ca05a564f11366ac9e306526"},
     {file = "greenlet-2.0.2-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8eab883b3b2a38cc1e050819ef06a7e6344d4a990d24d45bc6f2cf959045a45b"},
@@ -1823,6 +1849,7 @@ files = [
     {file = "greenlet-2.0.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:b0ef99cdbe2b682b9ccbb964743a6aca37905fda5e0452e5ee239b1654d37f2a"},
     {file = "greenlet-2.0.2-cp38-cp38-win32.whl", hash = "sha256:b80f600eddddce72320dbbc8e3784d16bd3fb7b517e82476d8da921f27d4b249"},
     {file = "greenlet-2.0.2-cp38-cp38-win_amd64.whl", hash = "sha256:4d2e11331fc0c02b6e84b0d28ece3a36e0548ee1a1ce9ddde03752d9b79bba40"},
+    {file = "greenlet-2.0.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:8512a0c38cfd4e66a858ddd1b17705587900dd760c6003998e9472b77b56d417"},
     {file = "greenlet-2.0.2-cp39-cp39-macosx_11_0_x86_64.whl", hash = "sha256:88d9ab96491d38a5ab7c56dd7a3cc37d83336ecc564e4e8816dbed12e5aaefc8"},
     {file = "greenlet-2.0.2-cp39-cp39-manylinux2010_x86_64.whl", hash = "sha256:561091a7be172ab497a3527602d467e2b3fbe75f9e783d8b8ce403fa414f71a6"},
     {file = "greenlet-2.0.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:971ce5e14dc5e73715755d0ca2975ac88cfdaefcaab078a284fea6cfabf866df"},
@@ -5380,53 +5407,6 @@ build = ["cython (>=0.29.26)"]
 develop = ["cython (>=0.29.26)"]
 docs = ["ipykernel", "jupyter-client", "matplotlib", "nbconvert", "nbformat", "numpydoc", "pandas-datareader", "sphinx"]
 
-[[package]]
-name = "statsmodels"
-version = "0.13.3"
-description = "Statistical computations and models for Python"
-category = "main"
-optional = false
-python-versions = ">=3.7"
-files = [
-    {file = "statsmodels-0.13.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:b71bb64c6d4087dd6192eadfad390fbeb4074f676ef34c7e56579cead8c478e7"},
-    {file = "statsmodels-0.13.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:658b634c273c2f287a0086e56a5d6b95ec3ddac991cbb020b34f731e932de0bd"},
-    {file = "statsmodels-0.13.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ab9f63f432889b179967ab645aea7480e28731823a3b99850d7f7a561b624f93"},
-    {file = "statsmodels-0.13.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1f432fb7f54ce5edccc83aa36566653cd04ee35bbbefdf0a2b7bd9c97c5da443"},
-    {file = "statsmodels-0.13.3-cp310-cp310-win_amd64.whl", hash = "sha256:4cd64076c3ad366b10fd4e6f8ca6aeb1e398ec5480bddb65fba8889dd9eb550d"},
-    {file = "statsmodels-0.13.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:33f9caff2dbdfef22505678407d2f549b32a4a2729eb8675b60eb2932fc0e883"},
-    {file = "statsmodels-0.13.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:393f6a7ec85f65be9ac1a13be152dd14c65084436c48bcdf94cb21ef0b6cb79c"},
-    {file = "statsmodels-0.13.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:12b56d13d9a2af7a1aadc3fe9f3d3c18a5727a651323d94e7c2047177adfb9ce"},
-    {file = "statsmodels-0.13.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a61e0652f62b01981d8e857aa77550b42cf316c9d8e569b559869c248e3de834"},
-    {file = "statsmodels-0.13.3-cp311-cp311-win_amd64.whl", hash = "sha256:5368bccd471bb8cef0a8957ba5f2a3e5b5ecc433b0783d9f602039df45c780d3"},
-    {file = "statsmodels-0.13.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:1ecfb191958de187ba44b93316f4953b8b6588b5f68dcab218f76498a862dd7c"},
-    {file = "statsmodels-0.13.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2ea2b481b15e9e501904a1c36efc5f9a202f87529e600a99c364fd7e4598ae88"},
-    {file = "statsmodels-0.13.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5d270a11aac6720a8024e1136ab44036d0878f62995617bb5b9fc5c77ea3d3b8"},
-    {file = "statsmodels-0.13.3-cp37-cp37m-win_amd64.whl", hash = "sha256:2185ed356823cd1c258c09b790f0c21d2fd49321e82c79f8f6dc546f1c671d7a"},
-    {file = "statsmodels-0.13.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:9da39a36d114abcdcf8ebd351ed69229e23cb12b8a607996cb6511fa88e78b4d"},
-    {file = "statsmodels-0.13.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:3b3a9942d0b462af4c68c3895095d304869cbec9d97f3c268f19a6ba7ba294dc"},
-    {file = "statsmodels-0.13.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7fff0316420bc4f6fbd80dd77eb74f3834fcd0e4ca98ba9611b8a6d41ebbb979"},
-    {file = "statsmodels-0.13.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:352041bc04eaf90232e54a86861a460365ef45f34f58529578487e6f640dadf3"},
-    {file = "statsmodels-0.13.3-cp38-cp38-win_amd64.whl", hash = "sha256:61a0f39848ebacf5560e1539ca0037b8fc25cc9d1d7444bbef5bdc0a3c56087b"},
-    {file = "statsmodels-0.13.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:78cd12b0ee543fa955d2bace18518fc7d2b57f13c65929b54445bf3e54955b08"},
-    {file = "statsmodels-0.13.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:afccb80e3ddc969bfb5285f846ac2622861ffe192423087214d60e4c6e40e384"},
-    {file = "statsmodels-0.13.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d3609824e1ced44722bd905564d8ce94df29d24e32a6dd67cc9255932aedcd7b"},
-    {file = "statsmodels-0.13.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:81f8e71963a7bd169338fbb1472e34ec85ae4447414ac37bdae5cf6d1ac223bb"},
-    {file = "statsmodels-0.13.3-cp39-cp39-win_amd64.whl", hash = "sha256:000c7a1ce6780834f5fbb63f9ae07a00863a00f602c7c470c942153692f5bbc3"},
-    {file = "statsmodels-0.13.3.tar.gz", hash = "sha256:ed71df887334b1d332e71d33215122bdd54494dcb2248606b30bcfa6112e860a"},
-]
-
-[package.dependencies]
-numpy = {version = ">=1.17", markers = "python_version != \"3.10\" or platform_system != \"Windows\" or platform_python_implementation == \"PyPy\""}
-packaging = ">=21.3"
-pandas = ">=0.25"
-patsy = ">=0.5.2"
-scipy = {version = ">=1.3", markers = "python_version > \"3.7\" and python_version < \"3.12\" or platform_system != \"Windows\" and python_version < \"3.12\" or platform_machine != \"x86\" and python_version < \"3.12\""}
-
-[package.extras]
-build = ["cython (>=0.29.32)"]
-develop = ["Jinja2", "colorama", "cython (>=0.29.32)", "cython (>=0.29.32,<3.0.0)", "flake8", "isort", "joblib", "matplotlib (>=3)", "oldest-supported-numpy (>=2022.4.18)", "pytest (>=7.0.1,<7.1.0)", "pytest-randomly", "pytest-xdist", "pywinpty", "setuptools-scm[toml] (>=7.0.0,<7.1.0)"]
-docs = ["ipykernel", "jupyter-client", "matplotlib", "nbconvert", "nbformat", "numpydoc", "pandas-datareader", "sphinx"]
-
 [[package]]
 name = "statsmodels"
 version = "0.13.5"
@@ -6250,8 +6230,8 @@ docs = ["furo", "jaraco.packaging (>=9)", "jaraco.tidelift (>=1.4)", "rst.linker
 testing = ["big-O", "flake8 (<5)", "jaraco.functools", "jaraco.itertools", "more-itertools", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-flake8", "pytest-mypy (>=0.9.1)"]
 
 [extras]
-all = ["optuna", "prophet", "pytorch-forecasting", "pytorch-lightning", "pyts", "sqlalchemy", "statsforecast", "torch", "tsfresh", "wandb"]
-all-dev = ["GitPython", "Sphinx", "black", "click", "click", "codespell", "flake8", "flake8-bugbear", "flake8-comprehensions", "flake8-docstrings", "ipywidgets", "isort", "jupyter", "mypy", "myst-parser", "nbconvert", "nbqa", "nbsphinx", "optuna", "pep8-naming", "prophet", "pydata-sphinx-theme", "pytest", "pytest-cov", "pytest-shard", "pytorch-forecasting", "pytorch-lightning", "pyts", "semver", "semver", "sphinx-design", "sphinx-mathjax-offline", "sqlalchemy", "statsforecast", "torch", "tsfresh", "types-PyYAML", "types-setuptools", "wandb"]
+all = ["einops", "optuna", "prophet", "pytorch-forecasting", "pytorch-lightning", "pyts", "sqlalchemy", "statsforecast", "torch", "tsfresh", "wandb"]
+all-dev = ["GitPython", "Sphinx", "black", "click", "click", "codespell", "einops", "flake8", "flake8-bugbear", "flake8-comprehensions", "flake8-docstrings", "ipywidgets", "isort", "jupyter", "mypy", "myst-parser", "nbconvert", "nbqa", "nbsphinx", "optuna", "pep8-naming", "prophet", "pydata-sphinx-theme", "pytest", "pytest-cov", "pytest-shard", "pytorch-forecasting", "pytorch-lightning", "pyts", "semver", "semver", "sphinx-design", "sphinx-mathjax-offline", "sqlalchemy", "statsforecast", "torch", "tsfresh", "types-PyYAML", "types-setuptools", "wandb"]
 auto = ["optuna", "sqlalchemy"]
 classification = ["pyts", "tsfresh"]
 docs = ["GitPython", "Sphinx", "jupyter", "myst-parser", "nbsphinx", "pydata-sphinx-theme", "sphinx-design", "sphinx-mathjax-offline"]
@@ -6261,10 +6241,10 @@ release = ["click", "semver"]
 statsforecast = ["statsforecast"]
 style = ["black", "codespell", "flake8", "flake8-bugbear", "flake8-comprehensions", "flake8-docstrings", "isort", "mypy", "nbqa", "pep8-naming", "types-PyYAML", "types-setuptools"]
 tests = ["pytest", "pytest-cov", "pytest-shard"]
-torch = ["pytorch-forecasting", "pytorch-lightning", "torch"]
+torch = ["einops", "pytorch-forecasting", "pytorch-lightning", "torch"]
 wandb = ["wandb"]
 
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.8.0, <3.11.0"
-content-hash = "a337c82112d96af5cf8d4b54ec95cf6c3c2d4e57e933487e1fe585d0cf81ee42"
+content-hash = "0ed8d1e7e9f62271fe6b5244056bd860af671a6914ec8607b400c5c96e94f6b7"
diff --git a/pyproject.toml b/pyproject.toml
index a250fbf83..448605377 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -76,6 +76,7 @@ prophet = {version = "^1.0", optional = true}
 torch = {version = ">=1.8.0,<3", optional = true}
 pytorch-forecasting = {version = "^0.9.0", optional = true}
 pytorch-lightning = {version = "*", optional = true}
+einops = {version = "*", optional = true}
 
 wandb = {version = "^0.12.2", optional = true}
 
@@ -121,7 +122,7 @@ types-setuptools = {version = "^65.7.0", optional = true}
 [tool.poetry.extras]
 # optional deps
 prophet = ["prophet"]
-torch = ["torch", "pytorch-forecasting", "pytorch-lightning"]
+torch = ["torch", "pytorch-forecasting", "pytorch-lightning", "einops"]
 wandb = ["wandb"]
 auto = ["optuna", "sqlalchemy"]
 classification = ["pyts", "tsfresh"]
@@ -135,7 +136,7 @@ style = ["black", "isort", "flake8", "pep8-naming", "flake8-docstrings", "mypy",
 
 all = [
     "prophet",
-    "torch", "pytorch-forecasting", "pytorch-lightning",
+    "torch", "pytorch-forecasting", "pytorch-lightning", "einops",
     "wandb",
     "optuna", "sqlalchemy",
     "pyts", "tsfresh",
@@ -144,7 +145,7 @@ all = [
 
 all-dev = [
     "prophet",
-    "torch", "pytorch-forecasting", "pytorch-lightning",
+    "torch", "pytorch-forecasting", "pytorch-lightning", "einops",
     "wandb",
     "optuna", "sqlalchemy",
     "click", "semver",
@@ -247,7 +248,6 @@ filterwarnings = [
     "ignore: Call to deprecated class DeepARModel.",
     "ignore: dropout option adds dropout after all but last recurrent layer",
     "ignore: Call to deprecated class TFTModel."
-
 ]
 markers = [
     "smoke"
diff --git a/tests/test_transforms/test_embeddings/conftest.py b/tests/test_transforms/test_embeddings/conftest.py
new file mode 100644
index 000000000..d57bf8b63
--- /dev/null
+++ b/tests/test_transforms/test_embeddings/conftest.py
@@ -0,0 +1,100 @@
+import numpy as np
+import pandas as pd
+import pytest
+
+from etna.datasets import TSDataset
+
+
+@pytest.fixture
+def ts_with_exog_nan_begin() -> TSDataset:
+    n_segments = 5
+    periods = 10
+    timerange = pd.date_range(start="2020-01-01", periods=periods).to_list()
+    df = pd.DataFrame({"timestamp": timerange * n_segments})
+    segments_list = []
+    for i in range(n_segments):
+        segments_list += [f"segment_{i}"] * periods
+    df["segment"] = segments_list
+    df["target"] = (
+        [None, None, 3, 4, 5, 6, 7, 8, 9, 10]
+        + [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
+        + [9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        + [1, 1, 2, 2, 3, 3, 4, 4, 5, 5]
+        + [-10, -9, -8, -7, -6, -5, -4, -3, -2, -1]
+    )
+
+    df_exog = pd.DataFrame({"timestamp": timerange * n_segments})
+    df_exog["segment"] = segments_list
+    df_exog["exog_1"] = df["target"] * 10
+    df_exog["exog_2"] = (df["target"] * 3 + 5).astype("category")
+
+    df = TSDataset.to_dataset(df)
+    df_exog = TSDataset.to_dataset(df_exog)
+
+    ts = TSDataset(df=df, freq="D", df_exog=df_exog)
+    return ts
+
+
+@pytest.fixture
+def ts_with_exog_nan_middle() -> TSDataset:
+    n_segments = 2
+    periods = 10
+    timerange = pd.date_range(start="2020-01-01", periods=periods).to_list()
+    df = pd.DataFrame({"timestamp": timerange * n_segments})
+    df["segment"] = ["segment_0"] * periods + ["segment_1"] * periods
+    df["target"] = [1, 2, 3, 4, None, None, 7, 8, 9, 10] + [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
+
+    df_exog = pd.DataFrame({"timestamp": timerange * n_segments})
+    df_exog["segment"] = ["segment_0"] * periods + ["segment_1"] * periods
+    df_exog["exog_1"] = df["target"] * 10
+
+    df = TSDataset.to_dataset(df)
+    df_exog = TSDataset.to_dataset(df_exog)
+
+    ts = TSDataset(df=df, freq="D", df_exog=df_exog)
+    return ts
+
+
+@pytest.fixture
+def ts_with_exog_nan_end() -> TSDataset:
+    n_segments = 2
+    periods = 10
+    timerange = pd.date_range(start="2020-01-01", periods=periods).to_list()
+    df = pd.DataFrame({"timestamp": timerange * n_segments})
+    df["segment"] = ["segment_0"] * periods + ["segment_1"] * periods
+    df["target"] = [1, 2, 3, 4, 5, 7, 8, 9, 10, None] + [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
+
+    df = TSDataset.to_dataset(df)
+
+    ts = TSDataset(df=df, freq="D")
+    return ts
+
+
+@pytest.fixture
+def ts_with_exog_nan_begin_numpy(ts_with_exog_nan_begin) -> np.ndarray:
+    n_features = 3
+    df = ts_with_exog_nan_begin.to_pandas()
+    n_timestamps = len(df.index)
+    n_segments = df.columns.get_level_values("segment").nunique()
+    x = df.values.reshape((n_timestamps, n_segments, n_features)).transpose(1, 0, 2)
+    return x
+
+
+@pytest.fixture
+def ts_with_exog_nan_middle_numpy(ts_with_exog_nan_middle) -> np.ndarray:
+    n_features = 2
+    df = ts_with_exog_nan_middle.to_pandas()
+    n_timestamps = len(df.index)
+    n_segments = df.columns.get_level_values("segment").nunique()
+    x = df.values.reshape((n_timestamps, n_segments, n_features)).transpose(1, 0, 2)
+    return x
+
+
+@pytest.fixture
+def ts_with_exog_nan_end_numpy(ts_with_exog_nan_end) -> np.ndarray:
+    n_features = 1
+    df = ts_with_exog_nan_end.to_pandas()
+    n_timestamps = len(df.index)
+    n_segments = df.columns.get_level_values("segment").nunique()
+    x = df.values.reshape((n_timestamps, n_segments, n_features)).transpose(1, 0, 2)
+    return x
diff --git a/tests/test_transforms/test_embeddings/test_embedding_segment_transform.py b/tests/test_transforms/test_embeddings/test_embedding_segment_transform.py
new file mode 100644
index 000000000..fd417eb74
--- /dev/null
+++ b/tests/test_transforms/test_embeddings/test_embedding_segment_transform.py
@@ -0,0 +1,195 @@
+import pathlib
+from copy import deepcopy
+from unittest.mock import Mock
+
+import numpy as np
+import pandas as pd
+import pytest
+
+from etna.metrics import SMAPE
+from etna.models import LinearMultiSegmentModel
+from etna.pipeline import Pipeline
+from etna.transforms import EmbeddingSegmentTransform
+from etna.transforms.embeddings.models import TS2VecEmbeddingModel
+from etna.transforms.embeddings.models import TSTCCEmbeddingModel
+
+
+@pytest.mark.parametrize(
+    "embedding_model",
+    [TS2VecEmbeddingModel(input_dims=3), TSTCCEmbeddingModel(input_dims=3, batch_size=2)],
+)
+@pytest.mark.smoke
+def test_fit(ts_with_exog_nan_begin, embedding_model):
+    transform = EmbeddingSegmentTransform(
+        in_columns=["target", "exog_1", "exog_2"],
+        embedding_model=embedding_model,
+        training_params={"n_epochs": 1},
+        out_column="emb",
+    )
+    transform.fit(ts=ts_with_exog_nan_begin)
+
+
+@pytest.mark.parametrize(
+    "embedding_model",
+    [TS2VecEmbeddingModel(input_dims=3), TSTCCEmbeddingModel(input_dims=3, batch_size=2)],
+)
+@pytest.mark.smoke
+def test_fit_transform(ts_with_exog_nan_begin, embedding_model):
+    transform = EmbeddingSegmentTransform(
+        in_columns=["target", "exog_1", "exog_2"],
+        embedding_model=embedding_model,
+        training_params={"n_epochs": 1},
+        out_column="emb",
+    )
+    transform.fit_transform(ts=ts_with_exog_nan_begin)
+
+
+@pytest.mark.parametrize(
+    "embedding_model",
+    [TS2VecEmbeddingModel(input_dims=1), TSTCCEmbeddingModel(input_dims=1, batch_size=2)],
+)
+@pytest.mark.smoke
+def test_fit_forecast(example_tsds, embedding_model):
+    emb_transform = EmbeddingSegmentTransform(
+        in_columns=["target"], embedding_model=embedding_model, training_params={"n_epochs": 1}, out_column="emb"
+    )
+    transforms = [emb_transform]
+
+    pipeline = Pipeline(model=LinearMultiSegmentModel(), transforms=transforms, horizon=7)
+    pipeline.fit(example_tsds).forecast()
+
+
+@pytest.mark.parametrize(
+    "embedding_model",
+    [TS2VecEmbeddingModel(input_dims=1, output_dims=6), TSTCCEmbeddingModel(input_dims=1, batch_size=2, output_dims=6)],
+)
+@pytest.mark.smoke
+def test_backtest_full_series(example_tsds, embedding_model):
+    emb_transform = EmbeddingSegmentTransform(
+        in_columns=["target"], embedding_model=embedding_model, training_params={"n_epochs": 1}, out_column="emb"
+    )
+    transforms = [emb_transform]
+
+    pipeline = Pipeline(model=LinearMultiSegmentModel(), transforms=transforms, horizon=7)
+    pipeline.backtest(ts=example_tsds, metrics=[SMAPE()], n_folds=2, n_jobs=2, joblib_params=dict(backend="loky"))
+
+
+@pytest.mark.parametrize(
+    "embedding_model",
+    [TS2VecEmbeddingModel(input_dims=1, output_dims=6), TSTCCEmbeddingModel(input_dims=1, batch_size=2, output_dims=6)],
+)
+@pytest.mark.smoke
+def test_make_future(example_tsds, embedding_model):
+    emb_transform = EmbeddingSegmentTransform(
+        in_columns=["target"], embedding_model=embedding_model, training_params={"n_epochs": 1}, out_column="emb"
+    )
+    emb_transform.fit(example_tsds)
+
+    make_future_df = example_tsds.make_future(5, transforms=[emb_transform]).df
+    values_make_future = make_future_df.loc[:, pd.IndexSlice[:, emb_transform._get_out_columns()]].values[0]
+
+    example_tsds.transform([emb_transform])
+    ts_df = example_tsds.df
+    values_ts = ts_df.loc[:, pd.IndexSlice[:, emb_transform._get_out_columns()]].values[0]
+
+    assert np.array_equal(values_make_future, values_ts)
+
+
+@pytest.mark.parametrize(
+    "embedding_model",
+    [TS2VecEmbeddingModel(input_dims=3), TSTCCEmbeddingModel(input_dims=3)],
+)
+@pytest.mark.smoke
+def test_save(ts_with_exog_nan_begin, tmp_path, embedding_model):
+    transform = EmbeddingSegmentTransform(
+        in_columns=["target", "exog_1", "exog_2"],
+        embedding_model=embedding_model,
+        training_params={"n_epochs": 1},
+        out_column="emb",
+    )
+    transform.fit(ts=ts_with_exog_nan_begin)
+
+    path = pathlib.Path(tmp_path) / "tmp.zip"
+    transform.save(path=path)
+
+
+@pytest.mark.parametrize(
+    "embedding_model",
+    [TS2VecEmbeddingModel(input_dims=3), TSTCCEmbeddingModel(input_dims=1)],
+)
+@pytest.mark.smoke
+def test_load(ts_with_exog_nan_begin, tmp_path, embedding_model):
+    transform = EmbeddingSegmentTransform(
+        in_columns=["target", "exog_1", "exog_2"],
+        embedding_model=embedding_model,
+        training_params={"n_epochs": 1},
+        out_column="emb",
+    )
+    transform.fit(ts=ts_with_exog_nan_begin)
+
+    path = pathlib.Path(tmp_path) / "tmp.zip"
+    transform.save(path=path)
+    EmbeddingSegmentTransform.load(path=path)
+
+
+@pytest.mark.parametrize(
+    "output_dims, out_column, expected_out_columns",
+    [(2, "emb", ["emb_0", "emb_1"]), (3, "lag", ["lag_0", "lag_1", "lag_2"])],
+)
+def test_get_out_columns(output_dims, out_column, expected_out_columns):
+    transform = EmbeddingSegmentTransform(
+        in_columns=Mock(), embedding_model=Mock(output_dims=output_dims), out_column=out_column
+    )
+    assert sorted(expected_out_columns) == sorted(transform._get_out_columns())
+
+
+@pytest.mark.parametrize(
+    "embedding_model",
+    [TS2VecEmbeddingModel(input_dims=3, output_dims=3), TSTCCEmbeddingModel(input_dims=3, batch_size=2, output_dims=3)],
+)
+def test_transform_format(
+    ts_with_exog_nan_begin,
+    embedding_model,
+    expected_columns=(
+        "target",
+        "exog_1",
+        "exog_2",
+        "embedding_segment_0",
+        "embedding_segment_1",
+        "embedding_segment_2",
+    ),
+):
+    transform = EmbeddingSegmentTransform(
+        in_columns=["target", "exog_1", "exog_2"],
+        embedding_model=embedding_model,
+        training_params={"n_epochs": 1},
+        out_column="embedding_segment",
+    )
+    transform.fit_transform(ts=ts_with_exog_nan_begin)
+    obtained_columns = set(ts_with_exog_nan_begin.columns.get_level_values("feature"))
+    embedding_columns = transform.get_regressors_info()
+    embeddings = ts_with_exog_nan_begin.df.loc[:, pd.IndexSlice[:, embedding_columns]].values
+    assert sorted(obtained_columns) == sorted(expected_columns)
+    assert np.all(embeddings == embeddings[0, :], axis=0).all()
+
+
+@pytest.mark.parametrize(
+    "embedding_model",
+    [TS2VecEmbeddingModel(input_dims=3, output_dims=3), TSTCCEmbeddingModel(input_dims=3, batch_size=2, output_dims=3)],
+)
+def test_transform_load_pre_fitted(ts_with_exog_nan_begin, tmp_path, embedding_model):
+    transform = EmbeddingSegmentTransform(
+        in_columns=["target", "exog_1", "exog_2"],
+        embedding_model=embedding_model,
+        training_params={"n_epochs": 1},
+        out_column="emb",
+    )
+    before_load_ts = transform.fit_transform(ts=deepcopy(ts_with_exog_nan_begin))
+
+    path = pathlib.Path(tmp_path) / "tmp.zip"
+    transform.save(path=path)
+
+    loaded_transform = EmbeddingSegmentTransform.load(path=path)
+    after_load_ts = loaded_transform.transform(ts=deepcopy(ts_with_exog_nan_begin))
+
+    pd.testing.assert_frame_equal(before_load_ts.to_pandas(), after_load_ts.to_pandas())
diff --git a/tests/test_transforms/test_embeddings/test_embedding_window_transform.py b/tests/test_transforms/test_embeddings/test_embedding_window_transform.py
new file mode 100644
index 000000000..47b0e0ca9
--- /dev/null
+++ b/tests/test_transforms/test_embeddings/test_embedding_window_transform.py
@@ -0,0 +1,174 @@
+import pathlib
+from copy import deepcopy
+from unittest.mock import Mock
+
+import numpy as np
+import pandas as pd
+import pytest
+
+from etna.metrics import SMAPE
+from etna.models import LinearMultiSegmentModel
+from etna.pipeline import Pipeline
+from etna.transforms import EmbeddingWindowTransform
+from etna.transforms import FilterFeaturesTransform
+from etna.transforms import LagTransform
+from etna.transforms.embeddings.models import TS2VecEmbeddingModel
+from etna.transforms.embeddings.models import TSTCCEmbeddingModel
+
+
+@pytest.mark.parametrize(
+    "embedding_model",
+    [TS2VecEmbeddingModel(input_dims=3), TSTCCEmbeddingModel(input_dims=3, batch_size=2)],
+)
+@pytest.mark.smoke
+def test_fit(ts_with_exog_nan_begin, embedding_model):
+    transform = EmbeddingWindowTransform(
+        in_columns=["target", "exog_1", "exog_2"], embedding_model=embedding_model, training_params={"n_epochs": 1}
+    )
+    transform.fit(ts=ts_with_exog_nan_begin)
+
+
+@pytest.mark.parametrize(
+    "embedding_model",
+    [TS2VecEmbeddingModel(input_dims=3), TSTCCEmbeddingModel(input_dims=3, batch_size=2)],
+)
+@pytest.mark.smoke
+def test_fit_transform(ts_with_exog_nan_begin, embedding_model):
+    transform = EmbeddingWindowTransform(
+        in_columns=["target", "exog_1", "exog_2"], embedding_model=embedding_model, training_params={"n_epochs": 1}
+    )
+    transform.fit_transform(ts=ts_with_exog_nan_begin)
+
+
+@pytest.mark.parametrize(
+    "embedding_model",
+    [TS2VecEmbeddingModel(input_dims=1, output_dims=2), TSTCCEmbeddingModel(input_dims=1, batch_size=2, output_dims=2)],
+)
+@pytest.mark.smoke
+def test_fit_forecast(example_tsds, embedding_model):
+    emb_transform = EmbeddingWindowTransform(
+        in_columns=["target"],
+        embedding_model=embedding_model,
+        training_params={"n_epochs": 1},
+        out_column="embedding_window",
+    )
+    output_dims = embedding_model.output_dims
+    lag_transforms = [
+        LagTransform(in_column=f"embedding_window_{i}", lags=[7], out_column=f"lag_{i}") for i in range(output_dims)
+    ]
+    filter_transforms = FilterFeaturesTransform(exclude=[f"embedding_window_{i}" for i in range(output_dims)])
+    transforms = [emb_transform] + lag_transforms + [filter_transforms]
+
+    pipeline = Pipeline(model=LinearMultiSegmentModel(), transforms=transforms, horizon=7)
+    pipeline.fit(example_tsds).forecast()
+
+
+@pytest.mark.parametrize(
+    "embedding_model",
+    [TS2VecEmbeddingModel(input_dims=1, output_dims=2), TSTCCEmbeddingModel(input_dims=1, batch_size=2, output_dims=2)],
+)
+@pytest.mark.smoke
+def test_backtest(example_tsds, embedding_model):
+    emb_transform = EmbeddingWindowTransform(
+        in_columns=["target"],
+        embedding_model=embedding_model,
+        training_params={"n_epochs": 1},
+        out_column="embedding_window",
+    )
+    output_dims = embedding_model.output_dims
+    lag_transforms = [
+        LagTransform(in_column=f"embedding_window_{i}", lags=[7], out_column=f"lag_{i}") for i in range(output_dims)
+    ]
+    filter_transforms = FilterFeaturesTransform(exclude=[f"embedding_window_{i}" for i in range(output_dims)])
+    transforms = [emb_transform] + lag_transforms + [filter_transforms]
+
+    pipeline = Pipeline(model=LinearMultiSegmentModel(), transforms=transforms, horizon=7)
+    pipeline.backtest(ts=example_tsds, metrics=[SMAPE()], n_folds=2, n_jobs=2, joblib_params=dict(backend="loky"))
+
+
+@pytest.mark.parametrize(
+    "embedding_model",
+    [TS2VecEmbeddingModel(input_dims=3), TSTCCEmbeddingModel(input_dims=3)],
+)
+@pytest.mark.smoke
+def test_save(ts_with_exog_nan_begin, tmp_path, embedding_model):
+    transform = EmbeddingWindowTransform(
+        in_columns=["target", "exog_1", "exog_2"], embedding_model=embedding_model, training_params={"n_epochs": 1}
+    )
+    transform.fit(ts=ts_with_exog_nan_begin)
+
+    path = pathlib.Path(tmp_path) / "tmp.zip"
+    transform.save(path=path)
+
+
+@pytest.mark.parametrize(
+    "embedding_model",
+    [TS2VecEmbeddingModel(input_dims=3), TSTCCEmbeddingModel(input_dims=1)],
+)
+@pytest.mark.smoke
+def test_load(ts_with_exog_nan_begin, tmp_path, embedding_model):
+    transform = EmbeddingWindowTransform(
+        in_columns=["target", "exog_1", "exog_2"],
+        embedding_model=embedding_model,
+        training_params={"n_epochs": 1},
+    )
+    transform.fit(ts=ts_with_exog_nan_begin)
+
+    path = pathlib.Path(tmp_path) / "tmp.zip"
+    transform.save(path=path)
+    EmbeddingWindowTransform.load(path=path)
+
+
+@pytest.mark.parametrize(
+    "output_dims, out_column, expected_out_columns",
+    [(2, "emb", ["emb_0", "emb_1"]), (3, "lag", ["lag_0", "lag_1", "lag_2"])],
+)
+def test_get_out_columns(output_dims, out_column, expected_out_columns):
+    transform = EmbeddingWindowTransform(
+        in_columns=Mock(), embedding_model=Mock(output_dims=output_dims), out_column=out_column
+    )
+    assert sorted(expected_out_columns) == sorted(transform._get_out_columns())
+
+
+@pytest.mark.parametrize(
+    "embedding_model",
+    [TS2VecEmbeddingModel(input_dims=3, output_dims=3), TSTCCEmbeddingModel(input_dims=3, batch_size=2, output_dims=3)],
+)
+def test_transform_format(
+    ts_with_exog_nan_begin,
+    embedding_model,
+    expected_columns=("target", "exog_1", "exog_2", "embedding_window_0", "embedding_window_1", "embedding_window_2"),
+):
+    transform = EmbeddingWindowTransform(
+        in_columns=["target", "exog_1", "exog_2"],
+        embedding_model=embedding_model,
+        training_params={"n_epochs": 1},
+        out_column="embedding_window",
+    )
+    transform.fit_transform(ts=ts_with_exog_nan_begin)
+    obtained_columns = set(ts_with_exog_nan_begin.columns.get_level_values("feature"))
+    embedding_columns = ["embedding_window_0", "embedding_window_1", "embedding_window_2"]
+    embeddings = ts_with_exog_nan_begin.df.loc[:, pd.IndexSlice[:, embedding_columns]].values
+    assert sorted(obtained_columns) == sorted(expected_columns)
+    assert not np.all(embeddings == embeddings[0, :], axis=0).all()
+
+
+@pytest.mark.parametrize(
+    "embedding_model",
+    [TS2VecEmbeddingModel(input_dims=3, output_dims=3), TSTCCEmbeddingModel(input_dims=3, batch_size=2, output_dims=3)],
+)
+def test_transform_load_pre_fitted(ts_with_exog_nan_begin, tmp_path, embedding_model):
+    transform = EmbeddingWindowTransform(
+        in_columns=["target", "exog_1", "exog_2"],
+        embedding_model=embedding_model,
+        training_params={"n_epochs": 1},
+    )
+    before_load_ts = transform.fit_transform(ts=deepcopy(ts_with_exog_nan_begin))
+
+    path = pathlib.Path(tmp_path) / "tmp.zip"
+    transform.save(path=path)
+
+    loaded_transform = EmbeddingWindowTransform.load(path=path)
+    after_load_ts = loaded_transform.transform(ts=deepcopy(ts_with_exog_nan_begin))
+
+    pd.testing.assert_frame_equal(before_load_ts.to_pandas(), after_load_ts.to_pandas())
diff --git a/tests/test_transforms/test_embeddings/test_models/test_ts2vec.py b/tests/test_transforms/test_embeddings/test_models/test_ts2vec.py
new file mode 100644
index 000000000..f04f81090
--- /dev/null
+++ b/tests/test_transforms/test_embeddings/test_models/test_ts2vec.py
@@ -0,0 +1,140 @@
+import pathlib
+from tempfile import NamedTemporaryFile
+
+import numpy as np
+import pytest
+from loguru import logger as _logger
+
+from etna.loggers import ConsoleLogger
+from etna.loggers import tslogger
+from etna.transforms.embeddings.models import TS2VecEmbeddingModel
+from tests.test_transforms.test_embeddings.test_models.utils import check_logged_loss
+
+
+@pytest.mark.smoke
+def test_fit(ts_with_exog_nan_begin_numpy):
+    model = TS2VecEmbeddingModel(input_dims=3)
+    model.fit(ts_with_exog_nan_begin_numpy, n_epochs=1)
+
+
+@pytest.mark.smoke
+def test_encode_segment(ts_with_exog_nan_begin_numpy):
+    model = TS2VecEmbeddingModel(input_dims=3)
+    model.encode_segment(ts_with_exog_nan_begin_numpy)
+
+
+@pytest.mark.smoke
+def test_encode_window(ts_with_exog_nan_begin_numpy):
+    model = TS2VecEmbeddingModel(input_dims=3)
+    model.encode_window(ts_with_exog_nan_begin_numpy)
+
+
+@pytest.mark.smoke
+def test_save(tmp_path):
+    model = TS2VecEmbeddingModel(input_dims=3)
+
+    path = pathlib.Path(tmp_path) / "tmp.zip"
+    model.save(path=path)
+
+
+@pytest.mark.smoke
+def test_load(tmp_path):
+    model = TS2VecEmbeddingModel(input_dims=3)
+
+    path = pathlib.Path(tmp_path) / "tmp.zip"
+    model.save(path=path)
+    TS2VecEmbeddingModel.load(path=path)
+
+
+@pytest.mark.parametrize(
+    "output_dims, segment_shape_expected, window_shape_expected", [(2, (5, 2), (5, 10, 2)), (3, (5, 3), (5, 10, 3))]
+)
+def test_encode_format(ts_with_exog_nan_begin_numpy, output_dims, segment_shape_expected, window_shape_expected):
+    model = TS2VecEmbeddingModel(input_dims=3, output_dims=output_dims)
+    segment_embeddings = model.encode_segment(ts_with_exog_nan_begin_numpy)
+    window_embeddings = model.encode_window(ts_with_exog_nan_begin_numpy)
+    assert segment_embeddings.shape == segment_shape_expected
+    assert window_embeddings.shape == window_shape_expected
+
+
+def test_encode_pre_fitted(ts_with_exog_nan_begin_numpy, tmp_path):
+    model = TS2VecEmbeddingModel(input_dims=3)
+    model.fit(ts_with_exog_nan_begin_numpy, n_epochs=1)
+    path = pathlib.Path(tmp_path) / "tmp.zip"
+    model.save(path=path)
+
+    model_loaded = TS2VecEmbeddingModel.load(path=path)
+
+    np.testing.assert_array_equal(
+        model.encode_window(ts_with_exog_nan_begin_numpy), model_loaded.encode_window(ts_with_exog_nan_begin_numpy)
+    )
+    np.testing.assert_array_equal(
+        model.encode_segment(ts_with_exog_nan_begin_numpy), model_loaded.encode_segment(ts_with_exog_nan_begin_numpy)
+    )
+
+
+def test_not_freeze_fit(ts_with_exog_nan_begin_numpy, tmp_path):
+    model = TS2VecEmbeddingModel(input_dims=3)
+    model.fit(ts_with_exog_nan_begin_numpy, n_epochs=1)
+    model.freeze(is_freezed=False)
+    path = pathlib.Path(tmp_path) / "tmp.zip"
+    model.save(path=path)
+
+    model_loaded = TS2VecEmbeddingModel.load(path=path)
+    model_loaded.fit(ts_with_exog_nan_begin_numpy, n_epochs=1)
+
+    assert model_loaded.is_freezed is False
+    with pytest.raises(AssertionError):
+        np.testing.assert_array_equal(
+            model.encode_window(ts_with_exog_nan_begin_numpy), model_loaded.encode_window(ts_with_exog_nan_begin_numpy)
+        )
+        np.testing.assert_array_equal(
+            model.encode_segment(ts_with_exog_nan_begin_numpy),
+            model_loaded.encode_segment(ts_with_exog_nan_begin_numpy),
+        )
+
+
+def test_freeze_fit(ts_with_exog_nan_begin_numpy, tmp_path):
+    model = TS2VecEmbeddingModel(input_dims=3)
+    model.fit(ts_with_exog_nan_begin_numpy, n_epochs=1)
+    model.freeze(is_freezed=True)
+    path = pathlib.Path(tmp_path) / "tmp.zip"
+    model.save(path=path)
+
+    model_loaded = TS2VecEmbeddingModel.load(path=path)
+    model_loaded.fit(ts_with_exog_nan_begin_numpy, n_epochs=1)
+
+    assert model_loaded.is_freezed is True
+    np.testing.assert_array_equal(
+        model.encode_window(ts_with_exog_nan_begin_numpy), model_loaded.encode_window(ts_with_exog_nan_begin_numpy)
+    )
+    np.testing.assert_array_equal(
+        model.encode_segment(ts_with_exog_nan_begin_numpy), model_loaded.encode_segment(ts_with_exog_nan_begin_numpy)
+    )
+
+
+@pytest.mark.parametrize(
+    "data, input_dim",
+    [("ts_with_exog_nan_begin_numpy", 3), ("ts_with_exog_nan_middle_numpy", 2), ("ts_with_exog_nan_end_numpy", 1)],
+)
+def test_encode_not_contains_nan(data, input_dim, request):
+    data = request.getfixturevalue(data)
+    model = TS2VecEmbeddingModel(input_dims=input_dim)
+    model.fit(data, n_epochs=1)
+    encoded_segment = model.encode_segment(data)
+    encoded_window = model.encode_window(data)
+
+    assert np.isnan(encoded_segment).sum() == 0
+    assert np.isnan(encoded_window).sum() == 0
+
+
+@pytest.mark.parametrize("verbose, n_epochs, n_lines_expected", [(True, 1, 1), (False, 1, 0)])
+def test_logged_loss(ts_with_exog_nan_begin_numpy, verbose, n_epochs, n_lines_expected):
+    """Check logging loss during training."""
+    model = TS2VecEmbeddingModel(input_dims=3)
+    file = NamedTemporaryFile()
+    _logger.add(file.name)
+    idx = tslogger.add(ConsoleLogger())
+    model.fit(ts_with_exog_nan_begin_numpy, n_epochs=n_epochs, verbose=verbose)
+    check_logged_loss(log_file=file.name, n_lines_expected=n_lines_expected)
+    tslogger.remove(idx)
diff --git a/tests/test_transforms/test_embeddings/test_models/test_tstcc.py b/tests/test_transforms/test_embeddings/test_models/test_tstcc.py
new file mode 100644
index 000000000..cb41439db
--- /dev/null
+++ b/tests/test_transforms/test_embeddings/test_models/test_tstcc.py
@@ -0,0 +1,146 @@
+import pathlib
+from tempfile import NamedTemporaryFile
+
+import numpy as np
+import pytest
+from loguru import logger as _logger
+
+from etna.loggers import ConsoleLogger
+from etna.loggers import tslogger
+from etna.transforms.embeddings.models import TSTCCEmbeddingModel
+from tests.test_transforms.test_embeddings.test_models.utils import check_logged_loss
+
+
+@pytest.mark.smoke
+def test_fit(ts_with_exog_nan_begin_numpy):
+    model = TSTCCEmbeddingModel(input_dims=3, batch_size=3)
+    model.fit(ts_with_exog_nan_begin_numpy, n_epochs=1)
+
+
+@pytest.mark.smoke
+def test_encode_segment(ts_with_exog_nan_begin_numpy):
+    model = TSTCCEmbeddingModel(input_dims=3, batch_size=3)
+    model.encode_segment(ts_with_exog_nan_begin_numpy)
+
+
+@pytest.mark.smoke
+def test_encode_window(ts_with_exog_nan_begin_numpy):
+    model = TSTCCEmbeddingModel(input_dims=3, batch_size=3)
+    model.encode_window(ts_with_exog_nan_begin_numpy)
+
+
+@pytest.mark.smoke
+def test_save(tmp_path):
+    model = TSTCCEmbeddingModel(input_dims=3)
+
+    path = pathlib.Path(tmp_path) / "tmp.zip"
+    model.save(path=path)
+
+
+@pytest.mark.smoke
+def test_load(tmp_path):
+    model = TSTCCEmbeddingModel(input_dims=3)
+
+    path = pathlib.Path(tmp_path) / "tmp.zip"
+    model.save(path=path)
+    TSTCCEmbeddingModel.load(path=path)
+
+
+@pytest.mark.parametrize(
+    "output_dims, segment_shape_expected, window_shape_expected", [(2, (5, 2), (5, 10, 2)), (3, (5, 3), (5, 10, 3))]
+)
+def test_encode_format(ts_with_exog_nan_begin_numpy, output_dims, segment_shape_expected, window_shape_expected):
+    model = TSTCCEmbeddingModel(input_dims=3, output_dims=output_dims, batch_size=3)
+    segment_embeddings = model.encode_segment(ts_with_exog_nan_begin_numpy)
+    window_embeddings = model.encode_window(ts_with_exog_nan_begin_numpy)
+    assert segment_embeddings.shape == segment_shape_expected
+    assert window_embeddings.shape == window_shape_expected
+
+
+def test_encode_pre_fitted(ts_with_exog_nan_begin_numpy, tmp_path):
+    model = TSTCCEmbeddingModel(input_dims=3, batch_size=3)
+    model.fit(ts_with_exog_nan_begin_numpy, n_epochs=1)
+    path = pathlib.Path(tmp_path) / "tmp.zip"
+    model.save(path=path)
+
+    model_loaded = TSTCCEmbeddingModel.load(path=path)
+
+    np.testing.assert_array_equal(
+        model.encode_window(ts_with_exog_nan_begin_numpy), model_loaded.encode_window(ts_with_exog_nan_begin_numpy)
+    )
+    np.testing.assert_array_equal(
+        model.encode_segment(ts_with_exog_nan_begin_numpy), model_loaded.encode_segment(ts_with_exog_nan_begin_numpy)
+    )
+
+
+def test_not_freeze_fit(ts_with_exog_nan_begin_numpy, tmp_path):
+    model = TSTCCEmbeddingModel(input_dims=3, batch_size=3)
+    model.fit(ts_with_exog_nan_begin_numpy, n_epochs=1)
+    model.freeze(is_freezed=False)
+    path = pathlib.Path(tmp_path) / "tmp.zip"
+    model.save(path=path)
+
+    model_loaded = TSTCCEmbeddingModel.load(path=path)
+    model_loaded.fit(ts_with_exog_nan_begin_numpy, n_epochs=1)
+
+    assert model_loaded.is_freezed is False
+    with pytest.raises(AssertionError):
+        np.testing.assert_array_equal(
+            model.encode_window(ts_with_exog_nan_begin_numpy), model_loaded.encode_window(ts_with_exog_nan_begin_numpy)
+        )
+        np.testing.assert_array_equal(
+            model.encode_segment(ts_with_exog_nan_begin_numpy),
+            model_loaded.encode_segment(ts_with_exog_nan_begin_numpy),
+        )
+
+
+def test_freeze_fit(ts_with_exog_nan_begin_numpy, tmp_path):
+    model = TSTCCEmbeddingModel(input_dims=3, batch_size=3)
+    model.fit(ts_with_exog_nan_begin_numpy, n_epochs=1)
+    model.freeze(is_freezed=True)
+    path = pathlib.Path(tmp_path) / "tmp.zip"
+    model.save(path=path)
+
+    model_loaded = TSTCCEmbeddingModel.load(path=path)
+    model_loaded.fit(ts_with_exog_nan_begin_numpy, n_epochs=1)
+
+    assert model_loaded.is_freezed is True
+    np.testing.assert_array_equal(
+        model.encode_window(ts_with_exog_nan_begin_numpy), model_loaded.encode_window(ts_with_exog_nan_begin_numpy)
+    )
+    np.testing.assert_array_equal(
+        model.encode_segment(ts_with_exog_nan_begin_numpy), model_loaded.encode_segment(ts_with_exog_nan_begin_numpy)
+    )
+
+
+@pytest.mark.parametrize(
+    "data, input_dim",
+    [("ts_with_exog_nan_begin_numpy", 3), ("ts_with_exog_nan_middle_numpy", 2), ("ts_with_exog_nan_end_numpy", 1)],
+)
+def test_encode_not_contains_nan(data, input_dim, request):
+    data = request.getfixturevalue(data)
+    model = TSTCCEmbeddingModel(input_dims=input_dim, batch_size=2)
+    model.fit(data, n_epochs=1)
+    encoded_segment = model.encode_segment(data)
+    encoded_window = model.encode_window(data)
+
+    assert np.isnan(encoded_segment).sum() == 0
+    assert np.isnan(encoded_window).sum() == 0
+
+
+def test_failed_batch_size_1(ts_with_exog_nan_begin_numpy):
+    model = TSTCCEmbeddingModel(input_dims=3, batch_size=1)
+    with pytest.raises(ValueError):
+        model.fit(ts_with_exog_nan_begin_numpy, n_epochs=1)
+
+
+@pytest.mark.parametrize("verbose, n_epochs, n_lines_expected", [(True, 1, 1), (False, 1, 0)])
+def test_logged_loss(ts_with_exog_nan_begin_numpy, verbose, n_epochs, n_lines_expected):
+    """Check logging loss during training."""
+    model = TSTCCEmbeddingModel(input_dims=3)
+    file = NamedTemporaryFile()
+    _logger.add(file.name)
+    idx = tslogger.add(ConsoleLogger())
+    model.fit(ts_with_exog_nan_begin_numpy, n_epochs=n_epochs, verbose=verbose)
+    check_logged_loss(log_file=file.name, n_lines_expected=n_lines_expected)
+    tslogger.remove(idx)
diff --git a/tests/test_transforms/test_embeddings/test_models/utils.py b/tests/test_transforms/test_embeddings/test_models/utils.py
new file mode 100644
index 000000000..8b7c91c6a
--- /dev/null
+++ b/tests/test_transforms/test_embeddings/test_models/utils.py
@@ -0,0 +1,8 @@
+def check_logged_loss(log_file: str, n_lines_expected: int):
+    """Check that model loss is logged into the file."""
+    with open(log_file, "r") as in_file:
+        lines = in_file.readlines()
+        print(lines)
+        assert len(lines) == n_lines_expected
+        for i, line in enumerate(lines):
+            assert f"Epoch {i}: loss=" in line
diff --git a/tests/test_transforms/test_inference/test_inverse_transform.py b/tests/test_transforms/test_inference/test_inverse_transform.py
index 10f23bb51..df2dc0f01 100644
--- a/tests/test_transforms/test_inference/test_inverse_transform.py
+++ b/tests/test_transforms/test_inference/test_inverse_transform.py
@@ -18,6 +18,8 @@
 from etna.transforms import DensityOutliersTransform
 from etna.transforms import DeseasonalityTransform
 from etna.transforms import DifferencingTransform
+from etna.transforms import EmbeddingSegmentTransform
+from etna.transforms import EmbeddingWindowTransform
 from etna.transforms import EventTransform
 from etna.transforms import ExogShiftTransform
 from etna.transforms import FilterFeaturesTransform
@@ -59,6 +61,8 @@
 from etna.transforms import TrendTransform
 from etna.transforms import YeoJohnsonTransform
 from etna.transforms.decomposition import RupturesChangePointsModel
+from etna.transforms.embeddings.models import TS2VecEmbeddingModel
+from etna.transforms.embeddings.models import TSTCCEmbeddingModel
 from tests.test_transforms.utils import assert_column_changes
 from tests.test_transforms.utils import find_columns_diff
 from tests.utils import convert_ts_to_int_timestamp
@@ -96,9 +100,7 @@ def _test_inverse_transform_train(self, ts, transform, expected_changes):
         created_columns, removed_columns, changed_columns = find_columns_diff(
             flat_transformed_test_df, flat_inverse_transformed_test_df
         )
-        pd.testing.assert_frame_equal(
-            flat_test_df[list(changed_columns)], flat_inverse_transformed_test_df[list(changed_columns)]
-        )
+        assert_frame_equal(flat_test_df[list(changed_columns)], flat_inverse_transformed_test_df[list(changed_columns)])
 
     @pytest.mark.parametrize(
         "transform, dataset_name, expected_changes",
@@ -136,6 +138,43 @@ def _test_inverse_transform_train(self, ts, transform, expected_changes):
                 "regular_ts",
                 {},
             ),
+            # embeddings
+            (
+                EmbeddingSegmentTransform(
+                    in_columns=["target"],
+                    embedding_model=TS2VecEmbeddingModel(input_dims=1, output_dims=2),
+                    training_params={"n_epochs": 1},
+                ),
+                "regular_ts",
+                {},
+            ),
+            (
+                EmbeddingSegmentTransform(
+                    in_columns=["target"],
+                    embedding_model=TSTCCEmbeddingModel(input_dims=1, output_dims=2, batch_size=2),
+                    training_params={"n_epochs": 1},
+                ),
+                "regular_ts",
+                {},
+            ),
+            (
+                EmbeddingWindowTransform(
+                    in_columns=["target"],
+                    embedding_model=TS2VecEmbeddingModel(input_dims=1, output_dims=2),
+                    training_params={"n_epochs": 1},
+                ),
+                "regular_ts",
+                {},
+            ),
+            (
+                EmbeddingWindowTransform(
+                    in_columns=["target"],
+                    embedding_model=TSTCCEmbeddingModel(input_dims=1, output_dims=2, batch_size=2),
+                    training_params={"n_epochs": 1},
+                ),
+                "regular_ts",
+                {},
+            ),
             # encoders
             (LabelEncoderTransform(in_column="weekday", out_column="res"), "ts_with_exog", {}),
             (
@@ -557,6 +596,43 @@ def test_inverse_transform_train_fail_resample(self, transform, dataset_name, ex
                 "regular_ts",
                 {},
             ),
+            # embeddings
+            (
+                EmbeddingSegmentTransform(
+                    in_columns=["target"],
+                    embedding_model=TS2VecEmbeddingModel(input_dims=1, output_dims=2),
+                    training_params={"n_epochs": 1},
+                ),
+                "regular_ts",
+                {},
+            ),
+            (
+                EmbeddingSegmentTransform(
+                    in_columns=["target"],
+                    embedding_model=TSTCCEmbeddingModel(input_dims=1, output_dims=2, batch_size=2),
+                    training_params={"n_epochs": 1},
+                ),
+                "regular_ts",
+                {},
+            ),
+            (
+                EmbeddingWindowTransform(
+                    in_columns=["target"],
+                    embedding_model=TS2VecEmbeddingModel(input_dims=1, output_dims=2),
+                    training_params={"n_epochs": 1},
+                ),
+                "regular_ts",
+                {},
+            ),
+            (
+                EmbeddingWindowTransform(
+                    in_columns=["target"],
+                    embedding_model=TSTCCEmbeddingModel(input_dims=1, output_dims=2, batch_size=2),
+                    training_params={"n_epochs": 1},
+                ),
+                "regular_ts",
+                {},
+            ),
             # encoders
             (LabelEncoderTransform(in_column="weekday", out_column="res"), "ts_with_exog", {}),
             (
@@ -970,7 +1046,9 @@ def _test_inverse_transform_train_subset_segments(self, ts, transform, segments)
         inverse_transformed_subset_df = transform.inverse_transform(transformed_subset_ts).to_pandas()
 
         # check
-        assert_frame_equal(inverse_transformed_subset_df, inverse_transformed_df.loc[:, pd.IndexSlice[segments, :]])
+        assert_frame_equal(
+            inverse_transformed_subset_df, inverse_transformed_df.loc[:, pd.IndexSlice[segments, :]], atol=1e-5
+        )
 
     @pytest.mark.parametrize(
         "transform, dataset_name",
@@ -1006,6 +1084,39 @@ def _test_inverse_transform_train_subset_segments(self, ts, transform, segments)
                 ),
                 "regular_ts",
             ),
+            # embeddings
+            (
+                EmbeddingSegmentTransform(
+                    in_columns=["target"],
+                    embedding_model=TS2VecEmbeddingModel(input_dims=1, output_dims=2),
+                    training_params={"n_epochs": 1},
+                ),
+                "regular_ts",
+            ),
+            (
+                EmbeddingSegmentTransform(
+                    in_columns=["target"],
+                    embedding_model=TSTCCEmbeddingModel(input_dims=1, output_dims=2, batch_size=2),
+                    training_params={"n_epochs": 1},
+                ),
+                "regular_ts",
+            ),
+            (
+                EmbeddingWindowTransform(
+                    in_columns=["target"],
+                    embedding_model=TS2VecEmbeddingModel(input_dims=1, output_dims=2),
+                    training_params={"n_epochs": 1},
+                ),
+                "regular_ts",
+            ),
+            (
+                EmbeddingWindowTransform(
+                    in_columns=["target"],
+                    embedding_model=TSTCCEmbeddingModel(input_dims=1, output_dims=2, batch_size=2),
+                    training_params={"n_epochs": 1},
+                ),
+                "regular_ts",
+            ),
             # encoders
             (LabelEncoderTransform(in_column="weekday"), "ts_with_exog"),
             (OneHotEncoderTransform(in_column="weekday"), "ts_with_exog"),
@@ -1200,7 +1311,9 @@ def _test_inverse_transform_future_subset_segments(self, ts, transform, segments
 
         # check
         assert_frame_equal(
-            inverse_transformed_subset_future_df, inverse_transformed_future_df.loc[:, pd.IndexSlice[segments, :]]
+            inverse_transformed_subset_future_df,
+            inverse_transformed_future_df.loc[:, pd.IndexSlice[segments, :]],
+            atol=1e-5,
         )
 
     @pytest.mark.parametrize(
@@ -1253,6 +1366,39 @@ def _test_inverse_transform_future_subset_segments(self, ts, transform, segments
                 ),
                 "regular_ts",
             ),
+            # embeddings
+            (
+                EmbeddingSegmentTransform(
+                    in_columns=["target"],
+                    embedding_model=TS2VecEmbeddingModel(input_dims=1, output_dims=2),
+                    training_params={"n_epochs": 1},
+                ),
+                "regular_ts",
+            ),
+            (
+                EmbeddingSegmentTransform(
+                    in_columns=["target"],
+                    embedding_model=TSTCCEmbeddingModel(input_dims=1, output_dims=2, batch_size=2),
+                    training_params={"n_epochs": 1},
+                ),
+                "regular_ts",
+            ),
+            (
+                EmbeddingWindowTransform(
+                    in_columns=["target"],
+                    embedding_model=TS2VecEmbeddingModel(input_dims=1, output_dims=2),
+                    training_params={"n_epochs": 1},
+                ),
+                "regular_ts",
+            ),
+            (
+                EmbeddingWindowTransform(
+                    in_columns=["target"],
+                    embedding_model=TSTCCEmbeddingModel(input_dims=1, output_dims=2, batch_size=2),
+                    training_params={"n_epochs": 1},
+                ),
+                "regular_ts",
+            ),
             # encoders
             (LabelEncoderTransform(in_column="weekday"), "ts_with_exog"),
             (OneHotEncoderTransform(in_column="weekday"), "ts_with_exog"),
@@ -1479,13 +1625,50 @@ def _test_inverse_transform_train_new_segments(self, ts, transform, train_segmen
         created_columns, removed_columns, changed_columns = find_columns_diff(
             flat_transformed_test_df, flat_inverse_transformed_test_df
         )
-        pd.testing.assert_frame_equal(
-            flat_test_df[list(changed_columns)], flat_inverse_transformed_test_df[list(changed_columns)]
+        assert_frame_equal(
+            flat_test_df[list(changed_columns)], flat_inverse_transformed_test_df[list(changed_columns)], atol=1e-5
         )
 
     @pytest.mark.parametrize(
         "transform, dataset_name, expected_changes",
         [
+            # embeddings
+            (
+                EmbeddingSegmentTransform(
+                    in_columns=["target"],
+                    embedding_model=TS2VecEmbeddingModel(input_dims=1, output_dims=2),
+                    training_params={"n_epochs": 1},
+                ),
+                "regular_ts",
+                {},
+            ),
+            (
+                EmbeddingSegmentTransform(
+                    in_columns=["target"],
+                    embedding_model=TSTCCEmbeddingModel(input_dims=1, output_dims=2, batch_size=2),
+                    training_params={"n_epochs": 1},
+                ),
+                "regular_ts",
+                {},
+            ),
+            (
+                EmbeddingWindowTransform(
+                    in_columns=["target"],
+                    embedding_model=TS2VecEmbeddingModel(input_dims=1, output_dims=2),
+                    training_params={"n_epochs": 1},
+                ),
+                "regular_ts",
+                {},
+            ),
+            (
+                EmbeddingWindowTransform(
+                    in_columns=["target"],
+                    embedding_model=TSTCCEmbeddingModel(input_dims=1, output_dims=2, batch_size=2),
+                    training_params={"n_epochs": 1},
+                ),
+                "regular_ts",
+                {},
+            ),
             # encoders
             (LabelEncoderTransform(in_column="weekday", out_column="res"), "ts_with_exog", {}),
             (
@@ -1859,13 +2042,50 @@ def _test_inverse_transform_future_new_segments(self, ts, transform, train_segme
         created_columns, removed_columns, changed_columns = find_columns_diff(
             flat_transformed_test_df, flat_inverse_transformed_test_df
         )
-        pd.testing.assert_frame_equal(
-            flat_test_df[list(changed_columns)], flat_inverse_transformed_test_df[list(changed_columns)]
+        assert_frame_equal(
+            flat_test_df[list(changed_columns)], flat_inverse_transformed_test_df[list(changed_columns)], atol=1e-5
         )
 
     @pytest.mark.parametrize(
         "transform, dataset_name, expected_changes",
         [
+            # embeddings
+            (
+                EmbeddingSegmentTransform(
+                    in_columns=["target"],
+                    embedding_model=TS2VecEmbeddingModel(input_dims=1, output_dims=2),
+                    training_params={"n_epochs": 1},
+                ),
+                "regular_ts",
+                {},
+            ),
+            (
+                EmbeddingSegmentTransform(
+                    in_columns=["target"],
+                    embedding_model=TSTCCEmbeddingModel(input_dims=1, output_dims=2, batch_size=2),
+                    training_params={"n_epochs": 1},
+                ),
+                "regular_ts",
+                {},
+            ),
+            (
+                EmbeddingWindowTransform(
+                    in_columns=["target"],
+                    embedding_model=TS2VecEmbeddingModel(input_dims=1, output_dims=2),
+                    training_params={"n_epochs": 1},
+                ),
+                "regular_ts",
+                {},
+            ),
+            (
+                EmbeddingWindowTransform(
+                    in_columns=["target"],
+                    embedding_model=TSTCCEmbeddingModel(input_dims=1, output_dims=2, batch_size=2),
+                    training_params={"n_epochs": 1},
+                ),
+                "regular_ts",
+                {},
+            ),
             # encoders
             (LabelEncoderTransform(in_column="weekday", out_column="res"), "ts_with_exog", {}),
             (
@@ -2307,7 +2527,7 @@ def _test_inverse_transform_future_with_target(
         created_columns, removed_columns, changed_columns = find_columns_diff(
             flat_transformed_test_df, flat_inverse_transformed_test_df
         )
-        pd.testing.assert_frame_equal(
+        assert_frame_equal(
             flat_test_df[list(changed_columns)],
             flat_inverse_transformed_test_df[list(changed_columns)],
         )
@@ -2348,6 +2568,43 @@ def _test_inverse_transform_future_with_target(
                 "regular_ts",
                 {},
             ),
+            # embeddings
+            (
+                EmbeddingSegmentTransform(
+                    in_columns=["target"],
+                    embedding_model=TS2VecEmbeddingModel(input_dims=1, output_dims=2),
+                    training_params={"n_epochs": 1},
+                ),
+                "regular_ts",
+                {},
+            ),
+            (
+                EmbeddingSegmentTransform(
+                    in_columns=["target"],
+                    embedding_model=TSTCCEmbeddingModel(input_dims=1, output_dims=2, batch_size=2),
+                    training_params={"n_epochs": 1},
+                ),
+                "regular_ts",
+                {},
+            ),
+            (
+                EmbeddingWindowTransform(
+                    in_columns=["target"],
+                    embedding_model=TS2VecEmbeddingModel(input_dims=1, output_dims=2),
+                    training_params={"n_epochs": 1},
+                ),
+                "regular_ts",
+                {},
+            ),
+            (
+                EmbeddingWindowTransform(
+                    in_columns=["target"],
+                    embedding_model=TSTCCEmbeddingModel(input_dims=1, output_dims=2, batch_size=2),
+                    training_params={"n_epochs": 1},
+                ),
+                "regular_ts",
+                {},
+            ),
             # encoders
             (LabelEncoderTransform(in_column="weekday", out_column="res"), "ts_with_exog", {}),
             (
@@ -2761,9 +3018,8 @@ def _test_inverse_transform_future_without_target(
         created_columns, removed_columns, changed_columns = find_columns_diff(
             flat_transformed_test_df, flat_inverse_transformed_test_df
         )
-        pd.testing.assert_frame_equal(
-            flat_test_df[list(changed_columns)],
-            flat_inverse_transformed_test_df[list(changed_columns)],
+        assert_frame_equal(
+            flat_test_df[list(changed_columns)], flat_inverse_transformed_test_df[list(changed_columns)], atol=1e-5
         )
 
     @pytest.mark.parametrize(
@@ -2816,6 +3072,43 @@ def _test_inverse_transform_future_without_target(
                 "regular_ts",
                 {},
             ),
+            # embeddings
+            (
+                EmbeddingSegmentTransform(
+                    in_columns=["target"],
+                    embedding_model=TS2VecEmbeddingModel(input_dims=1, output_dims=2),
+                    training_params={"n_epochs": 1},
+                ),
+                "regular_ts",
+                {},
+            ),
+            (
+                EmbeddingSegmentTransform(
+                    in_columns=["target"],
+                    embedding_model=TSTCCEmbeddingModel(input_dims=1, output_dims=2, batch_size=2),
+                    training_params={"n_epochs": 1},
+                ),
+                "regular_ts",
+                {},
+            ),
+            (
+                EmbeddingWindowTransform(
+                    in_columns=["target"],
+                    embedding_model=TS2VecEmbeddingModel(input_dims=1, output_dims=2),
+                    training_params={"n_epochs": 1},
+                ),
+                "regular_ts",
+                {},
+            ),
+            (
+                EmbeddingWindowTransform(
+                    in_columns=["target"],
+                    embedding_model=TSTCCEmbeddingModel(input_dims=1, output_dims=2, batch_size=2),
+                    training_params={"n_epochs": 1},
+                ),
+                "regular_ts",
+                {},
+            ),
             # encoders
             (LabelEncoderTransform(in_column="weekday", out_column="res"), "ts_with_exog", {}),
             (
diff --git a/tests/test_transforms/test_inference/test_transform.py b/tests/test_transforms/test_inference/test_transform.py
index 322bfc674..981daa7a7 100644
--- a/tests/test_transforms/test_inference/test_transform.py
+++ b/tests/test_transforms/test_inference/test_transform.py
@@ -18,6 +18,8 @@
 from etna.transforms import DensityOutliersTransform
 from etna.transforms import DeseasonalityTransform
 from etna.transforms import DifferencingTransform
+from etna.transforms import EmbeddingSegmentTransform
+from etna.transforms import EmbeddingWindowTransform
 from etna.transforms import EventTransform
 from etna.transforms import ExogShiftTransform
 from etna.transforms import FilterFeaturesTransform
@@ -59,6 +61,8 @@
 from etna.transforms import TrendTransform
 from etna.transforms import YeoJohnsonTransform
 from etna.transforms.decomposition import RupturesChangePointsModel
+from etna.transforms.embeddings.models import TS2VecEmbeddingModel
+from etna.transforms.embeddings.models import TSTCCEmbeddingModel
 from tests.test_transforms.utils import assert_column_changes
 from tests.utils import convert_ts_to_int_timestamp
 from tests.utils import select_segments_subset
@@ -120,6 +124,47 @@ def _test_transform_train(self, ts, transform, expected_changes):
                 "regular_ts",
                 {"create": {"res"}},
             ),
+            # embeddings
+            (
+                EmbeddingSegmentTransform(
+                    in_columns=["target"],
+                    embedding_model=TS2VecEmbeddingModel(input_dims=1, output_dims=2),
+                    training_params={"n_epochs": 1},
+                    out_column="emb",
+                ),
+                "regular_ts",
+                {"create": {"emb_0", "emb_1"}},
+            ),
+            (
+                EmbeddingSegmentTransform(
+                    in_columns=["target"],
+                    embedding_model=TSTCCEmbeddingModel(input_dims=1, output_dims=2, batch_size=2),
+                    training_params={"n_epochs": 1},
+                    out_column="emb",
+                ),
+                "regular_ts",
+                {"create": {"emb_0", "emb_1"}},
+            ),
+            (
+                EmbeddingWindowTransform(
+                    in_columns=["target"],
+                    embedding_model=TS2VecEmbeddingModel(input_dims=1, output_dims=2),
+                    training_params={"n_epochs": 1},
+                    out_column="emb",
+                ),
+                "regular_ts",
+                {"create": {"emb_0", "emb_1"}},
+            ),
+            (
+                EmbeddingWindowTransform(
+                    in_columns=["target"],
+                    embedding_model=TSTCCEmbeddingModel(input_dims=1, output_dims=2, batch_size=2),
+                    training_params={"n_epochs": 1},
+                    out_column="emb",
+                ),
+                "regular_ts",
+                {"create": {"emb_0", "emb_1"}},
+            ),
             # encoders
             (LabelEncoderTransform(in_column="weekday", out_column="res"), "ts_with_exog", {"create": {"res"}}),
             (
@@ -493,6 +538,47 @@ def test_transform_train_datetime_timestamp(self, transform, dataset_name, expec
                 "regular_ts",
                 {"create": {"res"}},
             ),
+            # embeddings
+            (
+                EmbeddingSegmentTransform(
+                    in_columns=["target"],
+                    embedding_model=TS2VecEmbeddingModel(input_dims=1, output_dims=2),
+                    training_params={"n_epochs": 1},
+                    out_column="emb",
+                ),
+                "regular_ts",
+                {"create": {"emb_0", "emb_1"}},
+            ),
+            (
+                EmbeddingSegmentTransform(
+                    in_columns=["target"],
+                    embedding_model=TSTCCEmbeddingModel(input_dims=1, output_dims=2, batch_size=2),
+                    training_params={"n_epochs": 1},
+                    out_column="emb",
+                ),
+                "regular_ts",
+                {"create": {"emb_0", "emb_1"}},
+            ),
+            (
+                EmbeddingWindowTransform(
+                    in_columns=["target"],
+                    embedding_model=TS2VecEmbeddingModel(input_dims=1, output_dims=2),
+                    training_params={"n_epochs": 1},
+                    out_column="emb",
+                ),
+                "regular_ts",
+                {"create": {"emb_0", "emb_1"}},
+            ),
+            (
+                EmbeddingWindowTransform(
+                    in_columns=["target"],
+                    embedding_model=TSTCCEmbeddingModel(input_dims=1, output_dims=2, batch_size=2),
+                    training_params={"n_epochs": 1},
+                    out_column="emb",
+                ),
+                "regular_ts",
+                {"create": {"emb_0", "emb_1"}},
+            ),
             # encoders
             (LabelEncoderTransform(in_column="weekday", out_column="res"), "ts_with_exog", {"create": {"res"}}),
             (
@@ -923,7 +1009,7 @@ def _test_transform_train_subset_segments(self, ts, transform, segments):
         transformed_subset_df = transform.transform(subset_ts).to_pandas()
 
         # check
-        assert_frame_equal(transformed_subset_df, transformed_df.loc[:, pd.IndexSlice[segments, :]])
+        assert_frame_equal(transformed_subset_df, transformed_df.loc[:, pd.IndexSlice[segments, :]], atol=5e-4)
 
     @pytest.mark.parametrize(
         "transform, dataset_name",
@@ -954,6 +1040,39 @@ def _test_transform_train_subset_segments(self, ts, transform, segments):
                 ),
                 "regular_ts",
             ),
+            # embeddings
+            (
+                EmbeddingSegmentTransform(
+                    in_columns=["target"],
+                    embedding_model=TS2VecEmbeddingModel(input_dims=1, output_dims=2),
+                    training_params={"n_epochs": 1},
+                ),
+                "regular_ts",
+            ),
+            (
+                EmbeddingSegmentTransform(
+                    in_columns=["target"],
+                    embedding_model=TSTCCEmbeddingModel(input_dims=1, output_dims=2, batch_size=2),
+                    training_params={"n_epochs": 1},
+                ),
+                "regular_ts",
+            ),
+            (
+                EmbeddingWindowTransform(
+                    in_columns=["target"],
+                    embedding_model=TS2VecEmbeddingModel(input_dims=1, output_dims=2),
+                    training_params={"n_epochs": 1},
+                ),
+                "regular_ts",
+            ),
+            (
+                EmbeddingWindowTransform(
+                    in_columns=["target"],
+                    embedding_model=TSTCCEmbeddingModel(input_dims=1, output_dims=2, batch_size=2),
+                    training_params={"n_epochs": 1},
+                ),
+                "regular_ts",
+            ),
             # encoders
             (LabelEncoderTransform(in_column="weekday"), "ts_with_exog"),
             (OneHotEncoderTransform(in_column="weekday"), "ts_with_exog"),
@@ -1139,7 +1258,9 @@ def _test_transform_future_subset_segments(self, ts, transform, segments, horizo
         # check
         transformed_future_df = transformed_future_ts.to_pandas()
         transformed_subset_future_df = transformed_subset_future_ts.to_pandas()
-        assert_frame_equal(transformed_subset_future_df, transformed_future_df.loc[:, pd.IndexSlice[segments, :]])
+        assert_frame_equal(
+            transformed_subset_future_df, transformed_future_df.loc[:, pd.IndexSlice[segments, :]], atol=5e-4
+        )
 
     @pytest.mark.parametrize(
         "transform, dataset_name",
@@ -1183,6 +1304,39 @@ def _test_transform_future_subset_segments(self, ts, transform, segments, horizo
                 ),
                 "regular_ts",
             ),
+            # embeddings
+            (
+                EmbeddingSegmentTransform(
+                    in_columns=["target"],
+                    embedding_model=TS2VecEmbeddingModel(input_dims=1, output_dims=2),
+                    training_params={"n_epochs": 1},
+                ),
+                "regular_ts",
+            ),
+            (
+                EmbeddingSegmentTransform(
+                    in_columns=["target"],
+                    embedding_model=TSTCCEmbeddingModel(input_dims=1, output_dims=2, batch_size=2),
+                    training_params={"n_epochs": 1},
+                ),
+                "regular_ts",
+            ),
+            (
+                EmbeddingWindowTransform(
+                    in_columns=["target"],
+                    embedding_model=TS2VecEmbeddingModel(input_dims=1, output_dims=2),
+                    training_params={"n_epochs": 1},
+                ),
+                "regular_ts",
+            ),
+            (
+                EmbeddingWindowTransform(
+                    in_columns=["target"],
+                    embedding_model=TSTCCEmbeddingModel(input_dims=1, output_dims=2, batch_size=2),
+                    training_params={"n_epochs": 1},
+                ),
+                "regular_ts",
+            ),
             # encoders
             (LabelEncoderTransform(in_column="weekday"), "ts_with_exog"),
             (OneHotEncoderTransform(in_column="weekday"), "ts_with_exog"),
@@ -1402,6 +1556,47 @@ def _test_transform_train_new_segments(self, ts, transform, train_segments, expe
     @pytest.mark.parametrize(
         "transform, dataset_name, expected_changes",
         [
+            # embeddings
+            (
+                EmbeddingSegmentTransform(
+                    in_columns=["target"],
+                    embedding_model=TS2VecEmbeddingModel(input_dims=1, output_dims=2),
+                    training_params={"n_epochs": 1},
+                    out_column="emb",
+                ),
+                "regular_ts",
+                {"create": {"emb_0", "emb_1"}},
+            ),
+            (
+                EmbeddingSegmentTransform(
+                    in_columns=["target"],
+                    embedding_model=TSTCCEmbeddingModel(input_dims=1, output_dims=2),
+                    training_params={"n_epochs": 1},
+                    out_column="emb",
+                ),
+                "regular_ts",
+                {"create": {"emb_0", "emb_1"}},
+            ),
+            (
+                EmbeddingWindowTransform(
+                    in_columns=["target"],
+                    embedding_model=TS2VecEmbeddingModel(input_dims=1, output_dims=2),
+                    training_params={"n_epochs": 1},
+                    out_column="emb",
+                ),
+                "regular_ts",
+                {"create": {"emb_0", "emb_1"}},
+            ),
+            (
+                EmbeddingWindowTransform(
+                    in_columns=["target"],
+                    embedding_model=TSTCCEmbeddingModel(input_dims=1, output_dims=2),
+                    training_params={"n_epochs": 1},
+                    out_column="emb",
+                ),
+                "regular_ts",
+                {"create": {"emb_0", "emb_1"}},
+            ),
             # encoders
             (LabelEncoderTransform(in_column="weekday", out_column="res"), "ts_with_exog", {"create": {"res"}}),
             (
@@ -1740,6 +1935,47 @@ def _test_transform_future_new_segments(self, ts, transform, train_segments, exp
     @pytest.mark.parametrize(
         "transform, dataset_name, expected_changes",
         [
+            # embeddings
+            (
+                EmbeddingSegmentTransform(
+                    in_columns=["target"],
+                    embedding_model=TS2VecEmbeddingModel(input_dims=1, output_dims=2),
+                    training_params={"n_epochs": 1},
+                    out_column="emb",
+                ),
+                "regular_ts",
+                {"create": {"emb_0", "emb_1"}},
+            ),
+            (
+                EmbeddingSegmentTransform(
+                    in_columns=["target"],
+                    embedding_model=TSTCCEmbeddingModel(input_dims=1, output_dims=2),
+                    training_params={"n_epochs": 1},
+                    out_column="emb",
+                ),
+                "regular_ts",
+                {"create": {"emb_0", "emb_1"}},
+            ),
+            (
+                EmbeddingWindowTransform(
+                    in_columns=["target"],
+                    embedding_model=TS2VecEmbeddingModel(input_dims=1, output_dims=2),
+                    training_params={"n_epochs": 1},
+                    out_column="emb",
+                ),
+                "regular_ts",
+                {"create": {"emb_0", "emb_1"}},
+            ),
+            (
+                EmbeddingWindowTransform(
+                    in_columns=["target"],
+                    embedding_model=TSTCCEmbeddingModel(input_dims=1, output_dims=2),
+                    training_params={"n_epochs": 1},
+                    out_column="emb",
+                ),
+                "regular_ts",
+                {"create": {"emb_0", "emb_1"}},
+            ),
             # encoders
             (LabelEncoderTransform(in_column="weekday", out_column="res"), "ts_with_exog", {"create": {"res"}}),
             (
@@ -2163,6 +2399,47 @@ def _test_transform_future_with_target(self, ts, transform, expected_changes, ga
                 "regular_ts",
                 {"create": {"res"}},
             ),
+            # embeddings
+            (
+                EmbeddingSegmentTransform(
+                    in_columns=["target"],
+                    embedding_model=TS2VecEmbeddingModel(input_dims=1, output_dims=2),
+                    training_params={"n_epochs": 1},
+                    out_column="emb",
+                ),
+                "regular_ts",
+                {"create": {"emb_0", "emb_1"}},
+            ),
+            (
+                EmbeddingSegmentTransform(
+                    in_columns=["target"],
+                    embedding_model=TSTCCEmbeddingModel(input_dims=1, output_dims=2),
+                    training_params={"n_epochs": 1},
+                    out_column="emb",
+                ),
+                "regular_ts",
+                {"create": {"emb_0", "emb_1"}},
+            ),
+            (
+                EmbeddingWindowTransform(
+                    in_columns=["target"],
+                    embedding_model=TS2VecEmbeddingModel(input_dims=1, output_dims=2),
+                    training_params={"n_epochs": 1},
+                    out_column="emb",
+                ),
+                "regular_ts",
+                {"create": {"emb_0", "emb_1"}},
+            ),
+            (
+                EmbeddingWindowTransform(
+                    in_columns=["target"],
+                    embedding_model=TSTCCEmbeddingModel(input_dims=1, output_dims=2),
+                    training_params={"n_epochs": 1},
+                    out_column="emb",
+                ),
+                "regular_ts",
+                {"create": {"emb_0", "emb_1"}},
+            ),
             # encoders
             (LabelEncoderTransform(in_column="weekday", out_column="res"), "ts_with_exog", {"create": {"res"}}),
             (
@@ -2558,6 +2835,47 @@ def _test_transform_future_without_target(self, ts, transform, expected_changes,
                 "regular_ts",
                 {"create": {"res"}},
             ),
+            # embeddings
+            (
+                EmbeddingSegmentTransform(
+                    in_columns=["target"],
+                    embedding_model=TS2VecEmbeddingModel(input_dims=1, output_dims=2),
+                    training_params={"n_epochs": 1},
+                    out_column="emb",
+                ),
+                "regular_ts",
+                {"create": {"emb_0", "emb_1"}},
+            ),
+            (
+                EmbeddingSegmentTransform(
+                    in_columns=["target"],
+                    embedding_model=TSTCCEmbeddingModel(input_dims=1, output_dims=2),
+                    training_params={"n_epochs": 1},
+                    out_column="emb",
+                ),
+                "regular_ts",
+                {"create": {"emb_0", "emb_1"}},
+            ),
+            (
+                EmbeddingWindowTransform(
+                    in_columns=["target"],
+                    embedding_model=TS2VecEmbeddingModel(input_dims=1, output_dims=2),
+                    training_params={"n_epochs": 1},
+                    out_column="emb",
+                ),
+                "regular_ts",
+                {"create": {"emb_0", "emb_1"}},
+            ),
+            (
+                EmbeddingWindowTransform(
+                    in_columns=["target"],
+                    embedding_model=TSTCCEmbeddingModel(input_dims=1, output_dims=2),
+                    training_params={"n_epochs": 1},
+                    out_column="emb",
+                ),
+                "regular_ts",
+                {"create": {"emb_0", "emb_1"}},
+            ),
             # encoders
             (LabelEncoderTransform(in_column="weekday", out_column="res"), "ts_with_exog", {"create": {"res"}}),
             (