In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import clip
import re
from matplotlib import pyplot as plt
import pandas as pd
from sentence_transformers import SentenceTransformer
from networks import SCLIPNN, SCLIP_LSTM, SCLIP_GRU

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"
dirname = 'europarl/'
clip_model, preprocess = clip.load("ViT-B/32", device=device)
sbert_model = SentenceTransformer('paraphrase-MiniLM-L6-v2')

In [None]:
#Our sentences we like to encode
train_path = 'train_sentences.txt'
test_path = 'test_sentences.txt'
valid_path = 'valid_sentences.txt'
train_filename = dirname + '/' + train_path
test_filename = dirname + '/' + test_path
valid_filename = dirname + '/' + valid_path

In [None]:
train_sentences = []
with open(train_filename, mode='rt', encoding='utf-8') as file_object:
    for line in file_object:
        train_sentences.append(line)
N = len(train_sentences)
print("Number of sentences : {}".format(N))

In [None]:
regex = [r"[^A-Za-z0-9]+|[a-zA-Z][0-9]", r"(?<!\d)[0]\d*(?!\d)", r"\s+", r"[0-9]+"]
for r in regex:
    train_sentences = list(map(lambda sentence: re.sub(r, " ", sentence), train_sentences))

In [None]:
text = clip.tokenize(train_sentences).to(device)

In [None]:
with torch.no_grad():
    clip_embeddings = clip_model.encode_text(text)

In [None]:
with torch.no_grad():  
    sbert_embeddings = torch.from_numpy(sbert_model.encode(train_sentences))

In [None]:
#Print the embeddings
for sentence, clip_embedding, sbert_embedding in zip(train_sentences[:1], clip_embeddings[:1], sbert_embeddings[:1]):
    print("Sentence:", sentence)
    print("Clip Embedding: ", clip_embedding.size())
    print("Sbert Embedding: ", sbert_embedding.size())
    print("-"*10)

In [None]:
model_NN = SCLIPNN().to(device)
model_lstm = SCLIP_LSTM().to(device)
model_gru = SCLIP_GRU().to(device)
#model_attn = SCLIP_Attn().to(device)

In [None]:
models = {'NN':model_NN, 'LSTM': model_lstm, 'GRU': model_gru} #, 'attn': model_attn}
criterion = nn.MSELoss()

In [None]:
sbert_embeddings[0].shape

In [None]:
def train(model, sbert_emb, clip_emb, epochs=150):    
    optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)    
    losses = []
    model.train()
    for epoch in range(epochs):  # loop over the dataset multiple times
        epoch_loss = 0.0
        for i, data in enumerate(zip(sbert_emb, clip_emb)):
            # get the inputs; data is a list of [inputs, labels]
            inputs, labels = data
            inputs = inputs.to(device)
            labels = labels.to(device)
            
            # zero the parameter gradients
            optimizer.zero_grad()

            outputs = model(inputs)
            loss = criterion(outputs.to(float), labels.to(float))       
            loss.backward()
            optimizer.step()

            epoch_loss += loss.item()
            
        losses.append(epoch_loss)

        if epoch % 10 == 0:
            print("Epoch {}. Loss: {}".format(epoch, epoch_loss))
    
    print("Final Loss: {}".format(losses[-1]))
    
    return losses

In [None]:
losses = []
for name, model in models.items():
    print('Training model {}'.format(name))
    loss = train(model, sbert_embeddings, clip_embeddings)
    losses.append(loss)
    print('Finished Training from model {}'.format(name))
    print("-"*50)

In [None]:
for i, (name, model) in enumerate(models.items()):
    plt.plot(losses[i], label = name)
plt.legend()
plt.show()

In [None]:
test_sentences = []
with open(test_filename, mode='rt', encoding='utf-8') as file_object:
    for line in file_object:
        test_sentences.append(line)
N = len(test_sentences)
print("Number of sentences : {}".format(N))

In [None]:
for r in regex:
    test_sentences = list(map(lambda sentence: re.sub(r, " ", sentence), test_sentences))

In [None]:
text = clip.tokenize(test_sentences).to(device)
with torch.no_grad():
    test_clip_embeddings = clip_model.encode_text(text)

In [None]:
with torch.no_grad():
    test_sbert_embeddings = torch.from_numpy(sbert_model.encode(test_sentences))

In [None]:
def cosin_calculator(targets, predictions):    
    cosines = []
    cos = nn.CosineSimilarity(dim=0, eps=1e-6)
    for tar, pred in zip(targets, predictions):        
        cosine = cos(tar, pred)
        cosines.append(cosine.item())
    return np.array(cosines)

In [None]:
cosines = []
euclideans = []
for name, model in models.items():
    sum_cos = 0
    count = 0
    predictions =[]
    if len(test_clip_embeddings) == 0:
        break
    for tclip, tsbert in zip(test_clip_embeddings, test_sbert_embeddings):
        tclip = tclip.to(device)
        tsbert = tsbert.to(device)
        prediction = model(tsbert)
        predictions.append(prediction)
        sum_cos += np.mean(cosin_calculator(tclip, prediction))
        count += 1
    cosines.append(sum_cos/count)
    stacked_predictions = torch.stack(predictions)
    euclidean = torch.cdist(test_clip_embeddings.to(float), stacked_predictions.to(float))
    avg_euclidean = torch.mean(euclidean)
    euclideans.append(avg_euclidean.item())    

In [None]:
cosines

In [None]:
euclideans

In [None]:
data = {"Cosin":cosines, "Euclidean":euclideans}
results = pd.DataFrame(data, index=models.keys())

In [None]:
results