In [None]:
# installing tensorflow extra due to incompatibility with conda and tensorflow-text https://github.com/tensorflow/text/issues/644
!pip install transformers[tf] -q --upgrade
!pip install sentence-transformers -q # needed for validating results


[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.2/7.2 MB[0m [31m29.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m236.8/236.8 kB[0m [31m15.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m29.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m38.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m83.8/83.8 kB[0m [31m4.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m451.2/451.2 kB[0m [31m22.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.0/6.0 MB[0m [31m71.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m527.7/527.7 kB[0m [31m22.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [None]:
import tensorflow as tf
from transformers import TFAutoModel, AutoTokenizer

In [None]:
from sentence_transformers import SentenceTransformer, models
from torch import nn

model_name = 'sentence-transformers/all-MiniLM-L6-v2'
max_seq_length = 512
output_dimension = 256

word_embedding_model = models.Transformer(model_name,
                                          max_seq_length=max_seq_length)

pooling_model = models.Pooling(word_embedding_model.get_word_embedding_dimension(),
                               pooling_mode_cls_token=False,
                               pooling_mode_mean_tokens=True,
                               pooling_mode_max_tokens=False)

dense_model = models.Dense(in_features=pooling_model.get_sentence_embedding_dimension(),
                           out_features=output_dimension,
                           activation_function=nn.Tanh())

model = SentenceTransformer(modules=[word_embedding_model, pooling_model, dense_model])

(word_embedding_model.get_word_embedding_dimension(),
 pooling_model.get_sentence_embedding_dimension(),
 output_dimension)

Downloading (…)lve/main/config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

(384, 384, 256)

In [None]:
model

SentenceTransformer(
  (0): Transformer({'max_seq_length': 512, 'do_lower_case': False}) with Transformer model: BertModel 
  (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False})
  (2): Dense({'in_features': 384, 'out_features': 256, 'bias': True, 'activation_function': 'torch.nn.modules.activation.Tanh'})
)

In [None]:
model_id = 'sentence-transformers/all-MiniLM-L6-v2'
tokenizer = AutoTokenizer.from_pretrained(model_id)

In [None]:
old_model = TFAutoModel.from_pretrained(model_id)

Downloading tf_model.h5:   0%|          | 0.00/91.0M [00:00<?, ?B/s]

All model checkpoint layers were used when initializing TFBertModel.

All the layers of TFBertModel were initialized from the model checkpoint at sentence-transformers/all-MiniLM-L6-v2.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions without further training.


In [None]:
old_model.summary()

Model: "tf_bert_model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 bert (TFBertMainLayer)      multiple                  22713216  
                                                                 
Total params: 22,713,216
Trainable params: 22,713,216
Non-trainable params: 0
_________________________________________________________________


In [None]:
payload = ["This is a sentence embedding",
           "This is another sentence embedding"]
tokenizer = AutoTokenizer.from_pretrained(model_id)
encoded_input = tokenizer(payload, padding=True, truncation=True, return_tensors='tf')

In [None]:
encoded_input

{'input_ids': <tf.Tensor: shape=(2, 9), dtype=int32, numpy=
array([[ 101, 2023, 2003, 1037, 6251, 7861, 8270, 4667,  102],
       [ 101, 2023, 2003, 2178, 6251, 7861, 8270, 4667,  102]],
      dtype=int32)>, 'token_type_ids': <tf.Tensor: shape=(2, 9), dtype=int32, numpy=
array([[0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0]], dtype=int32)>, 'attention_mask': <tf.Tensor: shape=(2, 9), dtype=int32, numpy=
array([[1, 1, 1, 1, 1, 1, 1, 1, 1],
       [1, 1, 1, 1, 1, 1, 1, 1, 1]], dtype=int32)>}

In [None]:
old_model(
        input_ids=encoded_input.input_ids,
        attention_mask=encoded_input.attention_mask,
        token_type_ids=encoded_input.token_type_ids,
    )

In [None]:
old_embd, encoded = old_model(encoded_input)
old_embd, encoded

('last_hidden_state', 'pooler_output')

In [None]:
old_output = old_model(encoded_input)

old_output.last_hidden_state.shape, old_output.pooler_output.shape

(TensorShape([2, 9, 384]), TensorShape([2, 384]))

In [None]:
token_embeddings = old_output.last_hidden_state # shape = (2, 9, 384), dtype=float32
attention_mask = encoded_input.attention_mask # shape=(2, 9), dtype=int32
attention_mask = tf.expand_dims(attention_mask, axis=-1) # shape=(2, 9, 1), dtype=int32
attention_mask = tf.broadcast_to(attention_mask, tf.shape(token_embeddings)) # shape=(2, 9, 384), dtype=int32
attention_mask = tf.cast(attention_mask, dtype = tf.float32) # shape=(2, 9, 384), dtype=float32
token_embeddings = token_embeddings * attention_mask # shape = (2, 9, 384), dtype=float32
# taking mean over all the tokens (time or block_size axis)
mean_embeddings = tf.math.reduce_sum(token_embeddings, axis=1) # shape = (2, 384), dtype=float32
tf.linalg.normalize(mean_embeddings, 2, axis=1)

In [None]:
old_model

<transformers.models.bert.modeling_tf_bert.TFBertModel at 0x7f040aab27d0>

In [None]:
import tensorflow as tf
from transformers import TFAutoModel

class TFSTLayer(tf.keras.layers.Layer):
    def __init__(self, model_name: str) -> None:
        super(TFSTLayer, self).__init__()
        self.tf_model = TFAutoModel.from_pretrained(model_name)

    def call(self, input_ids, attention_mask, token_type_ids, normalize=True):
        # Compute the model output
        output = self.tf_model(input_ids, attention_mask, token_type_ids)

        # Compute the token embeddings
        token_embeddings = output.last_hidden_state  # shape=(B, max_seq_length, n_embd), dtype=float32

        # Mean Pooling
        embedding = self.mean_pooling(token_embeddings, attention_mask)  # shape=(B, n_embd), dtype=float32

        if normalize:
            embedding, _ = tf.linalg.normalize(embedding, 2, axis=1)  # shape=(B, n_embd), dtype=float32

        return embedding

    def mean_pooling(self, token_embeddings, attention_mask):
        attention_mask = tf.expand_dims(attention_mask, axis=-1)  # shape=(B, max_seq_length, 1), dtype=int32
        attention_mask = tf.broadcast_to(attention_mask, tf.shape(token_embeddings))  # shape=(B, max_seq_length, n_embd), dtype=int32
        attention_mask = tf.cast(attention_mask, dtype=tf.float32)  # shape=(B, max_seq_length, n_embd), dtype=float32
        token_embeddings = token_embeddings * attention_mask  # shape=(B, max_seq_length, n_embd), dtype=float32

        # Taking mean over all the tokens (max_seq_length axis)
        mean_embeddings = tf.reduce_sum(token_embeddings, axis=1)  # shape=(B, n_embd), dtype=float32
        # Alternatively, you can replace the `mean_pooling` method with `tf.keras.layers.GlobalAveragePooling1D`:
        # mean_pooling = tf.keras.layers.GlobalAveragePooling1D()
        # mean_embeddings = mean_pooling(token_embeddings)
        return mean_embeddings






def tf_sentence_transformer(model_path:str, max_seq_length) -> tf.keras.Model:
    input_ids = tf.keras.Input(shape=(None, max_seq_length), dtype=tf.int32)
    attention_mask = tf.keras.Input(shape=(None, max_seq_length), dtype=tf.int32)
    token_type_ids = tf.keras.Input(shape=(None, max_seq_length), dtype=tf.int32)
    tfst_layer = TFSTLayer(model_path)
    output = tfst_layer(input_ids, attention_mask, token_type_ids)


In [None]:
tfst_layer = TFSTLayer(model_name)
output = tfst_layer(encoded_input['input_ids'], encoded_input['attention_mask'], encoded_input['token_type_ids'])
output.shape

All model checkpoint layers were used when initializing TFBertModel.

All the layers of TFBertModel were initialized from the model checkpoint at sentence-transformers/all-MiniLM-L6-v2.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions without further training.


TensorShape([2, 384])

In [None]:
def mean_pooling(model_output, attention_mask):
    token_embeddings = model_output[0] #First element of model_output contains all token embeddings
    input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
    return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9)


In [None]:
from sentence_transformers import SentenceTransformer
model = SentenceTransformer('all-MiniLM-L6-v2')

#Our sentences we like to encode
sentences = ['This framework generates embeddings for each input sentence',
    'Sentences are passed as a list of string.',
    'The quick brown fox jumps over the lazy dog.']

#Sentences are encoded by calling model.encode()
embeddings = model.encode(sentences)

#Print the embeddings
for sentence, embedding in zip(sentences, embeddings):
    print("Sentence:", sentence)
    print("Embedding:", embedding.shape)
    print("")

Downloading (…)e9125/.gitattributes:   0%|          | 0.00/1.18k [00:00<?, ?B/s]

Downloading (…)_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Downloading (…)7e55de9125/README.md:   0%|          | 0.00/10.6k [00:00<?, ?B/s]

Downloading (…)55de9125/config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

Downloading (…)ce_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

Downloading (…)125/data_config.json:   0%|          | 0.00/39.3k [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

Downloading (…)nce_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

Downloading (…)e9125/tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

Downloading (…)9125/train_script.py:   0%|          | 0.00/13.2k [00:00<?, ?B/s]

Downloading (…)7e55de9125/vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading (…)5de9125/modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

Sentence: This framework generates embeddings for each input sentence
Embedding: (384,)

Sentence: Sentences are passed as a list of string.
Embedding: (384,)

Sentence: The quick brown fox jumps over the lazy dog.
Embedding: (384,)



In [None]:
sentence

'The quick brown fox jumps over the lazy dog.'

In [None]:
model

SentenceTransformer(
  (0): Transformer({'max_seq_length': 256, 'do_lower_case': False}) with Transformer model: BertModel 
  (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False})
  (2): Normalize()
)

In [None]:
import tensorflow as tf
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Model
from tensorflow.keras.metrics import Mean
from transformers import TFAutoModel, BertTokenizer

class SBERTCosineSimilarityModel(tf.keras.Model):
    def __init__(self, model_name: str, embedding_dim: int):
        super(SBERTCosineSimilarityModel, self).__init__()
        self.tokenizer = BertTokenizer.from_pretrained(model_name)
        self.st_layer = TFSTLayer(model_name)
        self.embedding_dim = embedding_dim

    def call(self, inputs):
        input_ids, attention_mask, token_type_ids = inputs

        # Sentence-BERT layer
        embeddings = self.st_layer(input_ids, attention_mask, token_type_ids)
        print(f"Embeddings shape: {embeddings.shape}")  # (B, 2, embedding_dim)

        return embeddings

In [None]:
import numpy as np
sentences = ['This framework generates embeddings for each input sentence',
    'Sentences are passed as a list of string.',
    'The quick brown fox jumps over the lazy dog.']
tokenizer = BertTokenizer.from_pretrained(model_name)
st_layer = TFSTLayer(model_name)
tokenizer(sentences)

All model checkpoint layers were used when initializing TFBertModel.

All the layers of TFBertModel were initialized from the model checkpoint at sentence-transformers/all-MiniLM-L6-v2.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions without further training.


{'input_ids': [[101, 2023, 7705, 19421, 7861, 8270, 4667, 2015, 2005, 2169, 7953, 6251, 102], [101, 11746, 2024, 2979, 2004, 1037, 2862, 1997, 5164, 1012, 102], [101, 1996, 4248, 2829, 4419, 14523, 2058, 1996, 13971, 3899, 1012, 102]], 'token_type_ids': [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]], 'attention_mask': [[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]]}

In [None]:
sentence_pairs = [
    ['This framework generates embeddings for each input sentence','Sentences are passed as a list of string.'],
    ['This framework generates embeddings for each input sentence','Sentences are passed as a list of string.'],
    ['The quick brown fox jumps over the lazy dog.', 'This framework generates embeddings for each input sentence'],
    ['Sentences are passed as a list of string.',   'The quick brown fox jumps over the lazy dog.']]
print(np.shape(sentence_pairs))

tokenized_sentence_pairs = tokenizer(sentence_pairs)
np.shape(tokenized_sentence_pairs.input_ids)

(4, 2)


  result = asarray(a).shape


(4,)

In [None]:
for l in tokenized_sentence_pairs.input_ids:
    print(np.shape(l))

(23,)
(23,)
(24,)
(22,)


In [None]:
tokenized_sentence_pairs.input_ids[0]

[101,
 2023,
 7705,
 19421,
 7861,
 8270,
 4667,
 2015,
 2005,
 2169,
 7953,
 6251,
 102,
 11746,
 2024,
 2979,
 2004,
 1037,
 2862,
 1997,
 5164,
 1012,
 102]

In [None]:
import tensorflow as tf
from transformers import TFAutoModel, BertTokenizer

class SBERTCosineSimilarityModel(tf.keras.Model):
    def __init__(self, model_name: str):
        super(SBERTCosineSimilarityModel, self).__init__()
        self.tokenizer = BertTokenizer.from_pretrained(model_name)
        self.st_layer = TFSTLayer(model_name)

    def call(self, inputs):
        input_ids, attention_mask, token_type_ids = inputs

        # Sentence-BERT layer
        embeddings = self.st_layer(input_ids, attention_mask, token_type_ids)
        print(f"Embeddings shape: {embeddings.shape}")  # (B, n_embd)

        return embeddings

# Example usage:
model_name = 'bert-base-uncased'
sentence_pairs = [
    ['This framework generates embeddings for each input sentence','Sentences are passed as a list of string.'],
    ['This framework generates embeddings for each input sentence','Sentences are passed as a list of string.'],
    ['The quick brown fox jumps over the lazy dog.', 'This framework generates embeddings for each input sentence'],
    ['Sentences are passed as a list of string.',   'The quick brown fox jumps over the lazy dog.']
]

tokenizer = BertTokenizer.from_pretrained(model_name)
tokenized_sentence_pairs = tokenizer(sentence_pairs, padding=True, truncation=True, return_tensors="tf")

print(f"Input IDs shape: {tokenized_sentence_pairs.input_ids.shape}")  # (4, 2)
for l in tokenized_sentence_pairs.input_ids:
    print(f"Shape of input IDs: {l.shape}")  # (2,)


Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

Input IDs shape: (4, 24)
Shape of input IDs: (24,)
Shape of input IDs: (24,)
Shape of input IDs: (24,)
Shape of input IDs: (24,)


In [None]:
from transformers import BertTokenizer

tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
input_data = [
    ['sentence A1', 'sentence B1'],
    ['sentence A2', 'sentence B2'],
    ['sentence A3', 'sentence B3']
]

# Tokenize the sentences and convert them to tensors
# Tokenize the sentences, pad them to the same length, and convert them to tensors
tokenized_data = [(tokenizer(s1, padding='max_length', max_length=512, return_tensors='tf'),
                   tokenizer(s2, padding='max_length', max_length=512, return_tensors='tf'))
                  for s1, s2 in input_data]
batch_size = 5
target_data = [0.8, 0.3, 0.9]  # Example targets
dataset = tf.data.Dataset.from_tensor_slices((tokenized_data, target_data)).batch(batch_size)


In [None]:
tokenized_data[0][0]

In [None]:
from transformers import BertTokenizer, TFBertModel
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras import Model
import numpy as np

class SBERTCosineSimilarityModel(Model):
    def __init__(self, model_name='bert-base-uncased', **kwargs):
        super(SBERTCosineSimilarityModel, self).__init__(**kwargs)
        self.tokenizer = BertTokenizer.from_pretrained(model_name)
        self.bert_model = TFBertModel.from_pretrained(model_name)

    def call(self, inputs):
        input_a, input_b = inputs

        tokenized_input_a = self.tokenizer(input_a, padding=True, truncation=True, return_tensors="tf", max_length=128)
        tokenized_input_b = self.tokenizer(input_b, padding=True, truncation=True, return_tensors="tf", max_length=128)

        output_a = self.bert_model(tokenized_input_a)['pooler_output']
        output_b = self.bert_model(tokenized_input_b)['pooler_output']

        normalized_a = tf.nn.l2_normalize(output_a, axis=1)
        normalized_b = tf.nn.l2_normalize(output_b, axis=1)
        cosine_similarity = tf.reduce_sum(tf.multiply(normalized_a, normalized_b), axis=1)

        return cosine_similarity



class SBERTCosineSimilarityTrainer:
    def __init__(self, model_name: str, learning_rate: float = 1e-4):
        self.model = SBERTCosineSimilarityModel(model_name)
        self.loss_metric = tf.keras.metrics.Mean()
        self.optimizer = tf.keras.optimizers.Adam(learning_rate)

    def train_step(self, inputs, targets):
        inputs_a, inputs_b = inputs[:, 0], inputs[:, 1]
        with tf.GradientTape() as tape:
            embeddings_a, embeddings_b = self.model([inputs_a, inputs_b], training=True)

            normalized_a = tf.nn.l2_normalize(embeddings_a, axis=1)
            normalized_b = tf.nn.l2_normalize(embeddings_b, axis=1)

            similarity_scores = tf.reduce_sum(
                tf.multiply(normalized_a, normalized_b),
                axis=1
            )

            loss = tf.keras.losses.MSE(targets, similarity_scores)

        gradients = tape.gradient(loss, self.model.trainable_variables)
        self.optimizer.apply_gradients(zip(gradients, self.model.trainable_variables))

        self.loss_metric.update_state(loss)

    def train(self, train_dataset, epochs: int):
        for epoch in range(epochs):
            self.loss_metric.reset_states()

            for inputs, targets in train_dataset:
                self.train_step(inputs, targets)

            print(f"Epoch {epoch+1}/{epochs}, Loss: {self.loss_metric.result().numpy()}")


class TFSTLayer(tf.keras.layers.Layer):
    def __init__(self, model_name: str) -> None:
        super(TFSTLayer, self).__init__()
        self.tf_model = TFAutoModel.from_pretrained(model_name)

    def call(self, input_ids, attention_mask, token_type_ids, normalize=True):
        output = self.tf_model(input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids)

        token_embeddings = output.last_hidden_state

        embedding = self.mean_pooling(token_embeddings, attention_mask)

        if normalize:
            embedding = tf.nn.l2_normalize(embedding, axis=1)

        return embedding

    def mean_pooling(self, token_embeddings, attention_mask):
        attention_mask = tf.cast(attention_mask, dtype=tf.float32)
        sum_embeddings = tf.reduce_sum(token_embeddings * attention_mask[:, :, tf.newaxis], axis=1)
        sum_mask = tf.reduce_sum(attention_mask, axis=1)
        return sum_embeddings / sum_mask[:, tf.newaxis]


# Example data
data = [
    ["Hello, world!", "Hi, world!", 1.0],
    ["Hello, world!", "Goodbye, world!", 0.0],
    ["Hello, world!", "Hello again, world!", 0.8]
]

sentences_a, sentences_b, scores = zip(*data)

# Split data into train and validation sets
train_sentences_a, validation_sentences_a, train_sentences_b, validation_sentences_b, train_scores, validation_scores = train_test_split(sentences_a, sentences_b, scores, test_size=0.2)

# Convert data into tf.data.Dataset
train_dataset = tf.data.Dataset.from_tensor_slices(((train_sentences_a, train_sentences_b), train_scores)).batch(1)
validation_dataset = tf.data.Dataset.from_tensor_slices(((validation_sentences_a, validation_sentences_b), validation_scores)).batch(1)

# Initialize model
model = SBERTCosineSimilarityModel()

# Compile model
model.compile(optimizer='adam', loss='mse')

# Train model
model.fit(train_dataset, validation_data=validation_dataset, epochs=5)
