In [1]:
import os

import numpy as np

os.environ['KERAS_BACKEND'] = "torch"

In [2]:
import torch

torch.autograd.set_detect_anomaly(True)

<torch.autograd.anomaly_mode.set_detect_anomaly at 0x79695830aed0>

## Regularization
>We hope to learn vector representations of the most representative aspects for a review dataset.
However, the aspect embedding matrix T may suffer from redundancy problems during training. [...] 
> The regularization term encourages orthogonality among the rows of the aspect embedding matrix T and penalizes redundancy between different aspect vectors
> ~ Ruidan

We use an Orthogonal Regulizer definition of the method can be found here: https://paperswithcode.com/method/orthogonal-regularization. <br/>
For the code we use the default implementation provided by Keras (https://keras.io/api/layers/regularizers/)

In [3]:
from keras import ops as K
from keras import backend as B


def ortho_reg(W):
    ### Orthogonal regularization for aspect embedding matrix by Ruidan     ###
    w_n = W / K.cast(B.epsilon() + K.sqrt(K.sum(K.square(W), axis=-1, keepdims=True)), B.floatx())
    # sum(w_n * w_n_t - I) * factor
    return K.sum(K.square(K.dot(w_n, K.transpose(w_n)) - K.eye(w_n.shape[0])))

  return torch._C._cuda_getDeviceCount() > 0


In [4]:
# TODO: Sarebbe da vedere come l'implementazione di Ruidan sia diversa da quella di Keras. Se effettivamente questa importa. A guardare i numeri sono effettivamente diversi!

In [5]:
corpus_file = "./../data/corpus.preprocessed.csv"  # It's this
# TODO GET MAXLEN FROM EMBEDDINGS DATASET (Which is input shape)
input_shape = (64, 1017)

## Model Setup

In [6]:
import model.embeddings as embeddings

embeddings_model = embeddings.WordEmbedding(
    embeddings.LoadCorpusUtility(), max_vocab_size=10000, embedding_size=128,
    target_model_file="./../data/word-embeddings.model", corpus_file=corpus_file
)
# todo check this
# Chiamo questi embeddings MA guarda caso sono float i miei numeri (giustamente)
# Vedi come farlo correttamente. Guarda impl RUIDAN todo
aspect_embeddings_model = embeddings.AspectEmbedding(
    aspect_size=4, embedding_size=128, base_embeddings=embeddings_model,
    target_model_file="./../data/aspects-embedding.model"
)

In [7]:
embeddings_model.load_model()
aspect_embeddings_model.load_model()

#### Load the data

In [8]:
import dataset
from torch.utils.data import DataLoader

vocabulary = embeddings_model.model.wv.key_to_index
train = dataset.PositiveNegativeCommentGeneratorDataset(
    vocabulary=vocabulary, csv_dataset_path=corpus_file, negative_size=10
)
# I have no idea why I have to collate.
train_dataloader = DataLoader(train, batch_size=64, shuffle=True)

Pandas Apply:   0%|          | 0/1000 [00:00<?, ?it/s]

In [9]:
from model.model import ABAEGenerator

generator = ABAEGenerator(train.max_seq_length, 10, embeddings_model, aspect_embeddings_model)
model = generator.make_model()

  super(WeightedAspectEmb, self).__init__(**kwargs)


## Train

In [10]:
from keras import ops as K


# Y_true is always zero as we are in unsupervised paradigm
def max_margin_loss(y_true, y_pred):
    return K.mean(y_pred)

In [11]:
model.compile(optimizer='SGD', loss=max_margin_loss, metrics=[max_margin_loss])
model.summary()

In [12]:
# Debug: (inputs == 0).all(dim=-1) # There are some sentences with 0 words! Can it be?
# Still, if all is 0 my model should not be breaking.
history = model.fit(x=train_dataloader, batch_size=64, epochs=5)

Epoch 1/5
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 238ms/step - loss: 8.9624 - max_margin_loss: 8.9604
Epoch 2/5
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 235ms/step - loss: 7.7345 - max_margin_loss: 7.7311
Epoch 3/5
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 237ms/step - loss: 6.9787 - max_margin_loss: 6.9786
Epoch 4/5
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 238ms/step - loss: 6.5958 - max_margin_loss: 6.5968
Epoch 5/5
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 243ms/step - loss: 6.3246 - max_margin_loss: 6.3241


In [13]:
p = next(iter(train_dataloader))

In [14]:
len(p)

2

In [15]:
p[0][0].shape

torch.Size([64, 1017])

In [16]:
len(p[0][0])

64

In [17]:
emb = embeddings_model.build_embedding_layer("las")

In [18]:
e = emb(p[0][1])

In [19]:
e.shape

torch.Size([64, 10, 1017, 128])

In [20]:
e == 0

tensor([[[[False, False, False,  ..., False, False, False],
          [False, False, False,  ..., False, False, False],
          [False, False, False,  ..., False, False, False],
          ...,
          [False, False, False,  ..., False, False, False],
          [False, False, False,  ..., False, False, False],
          [False, False, False,  ..., False, False, False]],

         [[False, False, False,  ..., False, False, False],
          [False, False, False,  ..., False, False, False],
          [False, False, False,  ..., False, False, False],
          ...,
          [False, False, False,  ..., False, False, False],
          [False, False, False,  ..., False, False, False],
          [False, False, False,  ..., False, False, False]],

         [[False, False, False,  ..., False, False, False],
          [False, False, False,  ..., False, False, False],
          [False, False, False,  ..., False, False, False],
          ...,
          [False, False, False,  ..., False, False,

In [21]:
import numpy as np

np.argwhere(e == 0.0)

tensor([], size=(4, 0), dtype=torch.int64)

In [48]:
model.predict(x=train_dataloader)

[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 202ms/step


array([[ 6.2499537],
       [ 4.4965024],
       [ 7.123279 ],
       [ 5.5771885],
       [ 4.312462 ],
       [ 4.317806 ],
       [ 8.889741 ],
       [ 5.2828326],
       [ 3.4322658],
       [ 8.260528 ],
       [ 5.481821 ],
       [ 6.9792957],
       [ 3.0646257],
       [ 7.8050036],
       [ 7.7022314],
       [ 4.362407 ],
       [ 7.6938844],
       [ 3.977336 ],
       [ 2.0455678],
       [ 4.3520784],
       [ 8.443966 ],
       [ 6.4367685],
       [ 3.606248 ],
       [ 6.902653 ],
       [ 4.6262646],
       [11.0855055],
       [ 5.195367 ],
       [ 6.2596617],
       [ 9.825886 ],
       [ 5.293392 ],
       [ 7.263816 ],
       [ 5.042217 ],
       [ 8.395377 ],
       [11.229925 ],
       [ 9.052852 ],
       [ 3.0984516],
       [ 6.7261677],
       [ 6.352546 ],
       [ 5.9526377],
       [ 5.445347 ],
       [ 5.3963037],
       [ 6.074003 ],
       [ 3.2712808],
       [ 6.6931157],
       [ 5.8156695],
       [ 5.7684755],
       [ 6.5570726],
       [ 5.73

In [40]:
model(train[2][0])

ValueError: Input 1 of layer "functional" is incompatible with the layer: expected shape=(None, 10, 1017), found shape=(10, 1017)

In [45]:
train[2][0][0].shape

(1017,)