etna-team · egoriyaa · Apr 10, 2024 · Apr 2, 2024 · Apr 3, 2024 · Apr 3, 2024
diff --git a/docs/source/api_reference/transforms.rst b/docs/source/api_reference/transforms.rst
@@ -65,7 +65,8 @@ Embedding transforms and their utilities:
 
    EmbeddingSegmentTransform
    EmbeddingWindowTransform
-   embeddings.TS2VecEmbeddingModel
+   embeddings.models.TS2VecEmbeddingModel
+   embeddings.models.TSTCCEmbeddingModel
 
 Feature selection transforms:
 

diff --git a/etna/libs/ts2vec/ts2vec.py b/etna/libs/ts2vec/ts2vec.py
@@ -29,6 +29,7 @@
 from torch.utils.data import TensorDataset, DataLoader
 import numpy as np
 from etna.libs.ts2vec.encoder import TSEncoder
+from etna.loggers import tslogger, ConsoleLogger
 from etna.libs.ts2vec.losses import  hierarchical_contrastive_loss
 from etna.libs.ts2vec.utils import take_per_row, split_with_nan, centerize_vary_length_series, torch_pad_nan, AveragedModel
 import math
@@ -97,6 +98,7 @@ def fit(self, train_data, n_epochs=None, n_iters=None, verbose=False):
         Returns:
             loss_log: a list containing the training losses on each epoch.
         '''
+
         assert train_data.ndim == 3
 
         if n_iters is None and n_epochs is None:
@@ -182,7 +184,7 @@ def fit(self, train_data, n_epochs=None, n_iters=None, verbose=False):
             cum_loss /= n_epoch_iters
             loss_log.append(cum_loss)
             if verbose:
-                print(f"Epoch #{cur_epoch}: loss={cum_loss}")
+                tslogger.log(f"Epoch {cur_epoch}: loss={cum_loss:.4f}")
             cur_epoch += 1
 
             if self.after_epoch_callback is not None:

diff --git a/etna/libs/tstcc/__init__.py b/etna/libs/tstcc/__init__.py
@@ -0,0 +1 @@
+from etna.libs.tstcc.tstcc import TSTCC
diff --git a/etna/libs/tstcc/attention.py b/etna/libs/tstcc/attention.py
@@ -0,0 +1,133 @@
+"""
+MIT License
+
+Copyright (c) 2022 Emadeldeen Eldele
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+"""
+# Note: Copied from ts-tcc repository (https://github.com/emadeldeen24/TS-TCC/tree/main)
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from einops import rearrange, repeat
+
+
+class Residual(nn.Module):
+    def __init__(self, fn):
+        super().__init__()
+        self.fn = fn
+
+    def forward(self, x, **kwargs):
+        return self.fn(x, **kwargs) + x
+
+
+class PreNorm(nn.Module):
+    def __init__(self, dim, fn):
+        super().__init__()
+        self.norm = nn.LayerNorm(dim)
+        self.fn = fn
+
+    def forward(self, x, **kwargs):
+        return self.fn(self.norm(x), **kwargs)
+
+
+class FeedForward(nn.Module):
+    def __init__(self, dim, hidden_dim, dropout=0.):
+        super().__init__()
+        self.net = nn.Sequential(
+            nn.Linear(dim, hidden_dim),
+            nn.ReLU(),
+            nn.Dropout(dropout),
+            nn.Linear(hidden_dim, dim),
+            nn.Dropout(dropout)
+        )
+
+    def forward(self, x):
+        return self.net(x)
+
+
+class Attention(nn.Module):
+    def __init__(self, dim, heads=8, dropout=0.):
+        super().__init__()
+        self.heads = heads
+        self.scale = dim ** -0.5
+
+        self.to_qkv = nn.Linear(dim, dim * 3, bias=False)
+        self.to_out = nn.Sequential(
+            nn.Linear(dim, dim),
+            nn.Dropout(dropout)
+        )
+
+    def forward(self, x, mask=None):
+        b, n, _, h = x.shape[0], x.shape[1], x.shape[2], self.heads
+        qkv = self.to_qkv(x).chunk(3, dim=-1)
+        q, k, v = map(lambda t: rearrange(t, 'b n (h d) -> b h n d', h=h), qkv)
+
+        dots = torch.einsum('bhid,bhjd->bhij', q, k) * self.scale
+
+        if mask is not None:
+            mask = F.pad(mask.flatten(1), (1, 0), value=True)
+            assert mask.shape[-1] == dots.shape[-1], 'mask has incorrect dimensions'
+            mask = mask[:, None, :] * mask[:, :, None]
+            dots.masked_fill_(~mask, float('-inf'))
+            del mask
+
+        attn = dots.softmax(dim=-1)
+
+        out = torch.einsum('bhij,bhjd->bhid', attn, v)
+        out = rearrange(out, 'b h n d -> b n (h d)')
+        out = self.to_out(out)
+        return out
+
+
+class Transformer(nn.Module):
+    def __init__(self, dim, depth, heads, mlp_dim, dropout):
+        super().__init__()
+        self.layers = nn.ModuleList([])
+        for _ in range(depth):
+            self.layers.append(nn.ModuleList([
+                Residual(PreNorm(dim, Attention(dim, heads=heads, dropout=dropout))),
+                Residual(PreNorm(dim, FeedForward(dim, mlp_dim, dropout=dropout)))
+            ]))
+
+    def forward(self, x, mask=None):
+        for attn, ff in self.layers:
+            x = attn(x, mask=mask)
+            x = ff(x)
+        return x
+
+
+class Seq_Transformer(nn.Module):
+    def __init__(self, *, patch_size, dim, depth, heads, mlp_dim, channels=1, dropout=0.1):
+        super().__init__()
+        patch_dim = channels * patch_size
+        self.patch_to_embedding = nn.Linear(patch_dim, dim)
+        self.c_token = nn.Parameter(torch.randn(1, 1, dim))
+        self.transformer = Transformer(dim, depth, heads, mlp_dim, dropout)
+        self.to_c_token = nn.Identity()
+
+    def forward(self, forward_seq):
+        x = self.patch_to_embedding(forward_seq)
+        b, n, _ = x.shape
+        c_tokens = repeat(self.c_token, '() n d -> b n d', b=b)
+        x = torch.cat((c_tokens, x), dim=1)
+        x = self.transformer(x)
+        c_t = self.to_c_token(x[:, 0])
+        return c_t
diff --git a/etna/libs/tstcc/augmentations.py b/etna/libs/tstcc/augmentations.py
@@ -0,0 +1,73 @@
+"""
+MIT License
+
+Copyright (c) 2022 Emadeldeen Eldele
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+"""
+# Note: Copied from ts-tcc repository (https://github.com/emadeldeen24/TS-TCC/tree/main)
+# Fix numpy warning in `permutation` function
+
+import numpy as np
+import torch
+
+
+def DataTransform(sample, jitter_scale_ratio, max_seg, jitter_ratio):
+
+    weak_aug = torch.from_numpy(scaling(sample, jitter_scale_ratio))
+    strong_aug = jitter(permutation(sample, max_segments=max_seg), jitter_ratio)
+
+    return weak_aug, strong_aug
+
+
+def jitter(x, sigma=0.8):
+    # https://arxiv.org/pdf/1706.00527.pdf
+    return x + np.random.normal(loc=0., scale=sigma, size=x.shape)
+
+
+def scaling(x, sigma=1.1):
+    # https://arxiv.org/pdf/1706.00527.pdf
+    factor = np.random.normal(loc=2., scale=sigma, size=(x.shape[0], x.shape[2]))
+    ai = []
+    for i in range(x.shape[1]):
+        xi = x[:, i, :]
+        ai.append(np.multiply(xi, factor[:, :])[:, np.newaxis, :])
+    return np.concatenate((ai), axis=1)
+
+
+def permutation(x, max_segments=5, seg_mode="random"):
+    orig_steps = np.arange(x.shape[2])
+
+    num_segs = np.random.randint(1, max_segments, size=(x.shape[0]))
+
+    ret = np.zeros_like(x)
+    for i, pat in enumerate(x):
+        if num_segs[i] > 1:
+            if seg_mode == "random":
+                split_points = np.random.choice(x.shape[2] - 2, num_segs[i] - 1, replace=False)
+                split_points.sort()
+                splits = np.split(orig_steps, split_points)
+            else:
+                splits = np.array_split(orig_steps, num_segs[i])
+            # add `np.asarray(splits, dtype=object)` instead of `splits` due to warning about different length of arrays
+            warp = np.concatenate(np.random.permutation(np.asarray(splits, dtype=object))).ravel()
+            ret[i] = pat[0, warp]
+        else:
+            ret[i] = pat
+    return torch.from_numpy(ret)
diff --git a/etna/libs/tstcc/dataloader.py b/etna/libs/tstcc/dataloader.py
@@ -0,0 +1,74 @@
+"""
+MIT License
+
+Copyright (c) 2022 Emadeldeen Eldele
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+"""
+# Note: Copied from ts-tcc repository (https://github.com/emadeldeen24/TS-TCC/tree/main)
+
+import torch
+from torch.utils.data import Dataset
+
+from etna.libs.tstcc.augmentations import DataTransform
+
+
+class Load_Dataset(Dataset):
+    # Initialize your data, download, etc.
+    def __init__(
+            self,
+            dataset,
+            mode,
+            jitter_scale_ratio,
+            max_seg,
+            jitter_ratio
+    ):
+        """
+        Notes
+        -----
+        In this implementation we replace NaNs with 0 values to work with time-series with different length.
+        """
+        super(Load_Dataset, self).__init__()
+        self.mode = mode
+        self.jitter_scale_ratio = jitter_scale_ratio
+        self.max_seg = max_seg
+        self.jitter_ratio = jitter_ratio
+
+        X_train = torch.from_numpy(dataset)
+        X_train = torch.nan_to_num(X_train, nan=0)
+
+        self.x_data = X_train
+        self.len = X_train.shape[0]
+        if self.mode == "train":
+            aug1, aug2 = DataTransform(
+                self.x_data,
+                jitter_scale_ratio=self.jitter_scale_ratio,
+                max_seg=self.max_seg,
+                jitter_ratio=self.jitter_ratio
+            )
+            self.aug1, self.aug2 = aug1.float(), aug2.float()
+
+    def __getitem__(self, index):
+        if self.mode == "train":
+            return self.aug1[index], self.aug2[index]
+        else:
+            return self.x_data[index].float()
+
+    def __len__(self):
+        return self.len
diff --git a/etna/libs/tstcc/encoder.py b/etna/libs/tstcc/encoder.py
@@ -0,0 +1,69 @@
+"""
+MIT License
+
+Copyright (c) 2022 Emadeldeen Eldele
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+"""
+# Note: Copied from ts-tcc repository (https://github.com/emadeldeen24/TS-TCC/tree/main)
+
+# In the original implementation, the name of this file is "model.py".
+from torch import nn
+
+
+class ConvEncoder(nn.Module):
+    def __init__(
+            self,
+            input_dims,
+            kernel_size,
+            dropout,
+            output_dims
+    ):
+        super(ConvEncoder, self).__init__()
+
+        self.input_dims = input_dims
+        self.kernel_size = kernel_size
+        self.dropout = dropout
+        self.output_dims = output_dims
+
+        self.conv_block1 = nn.Sequential(
+            nn.Conv1d(self.input_dims, 32, kernel_size=self.kernel_size,
+                      stride=1, bias=False, padding="same"),
+            nn.BatchNorm1d(32),
+            nn.ReLU(),
+            nn.Dropout(dropout)
+        )
+
+        self.conv_block2 = nn.Sequential(
+            nn.Conv1d(32, 64, kernel_size=8, stride=1, bias=False, padding="same"),
+            nn.BatchNorm1d(64),
+            nn.ReLU(),
+        )
+
+        self.conv_block3 = nn.Sequential(
+            nn.Conv1d(64, output_dims, kernel_size=8, stride=1, bias=False, padding="same"),
+            nn.BatchNorm1d(output_dims),
+            nn.ReLU(),
+        )
+
+    def forward(self, x_in):
+        x = self.conv_block1(x_in)
+        x = self.conv_block2(x)
+        x = self.conv_block3(x)
+        return x