In [None]:
!pip install -U sentence-transformers

In [None]:
!pip install tf-keras

In [17]:
from sentence_transformers import SentenceTransformer
from scipy.spatial import distance
import math
import numpy as np
import matplotlib.pyplot as plt
import plotly.graph_objects as go
import tensorflow as t
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Activation
from tensorflow.keras import Input
from tensorflow.keras.models import Model
sentence_model = SentenceTransformer('paraphrase-MiniLM-L6-v2')



In [18]:
# Sentences we want to encode. Example:
factual_sentence = ['who is the director of Joker?',"who directed the Godfather?"]
recommendation_sentence = [ "recommend me a movie similar to Interstellar.", "i would like to watch a movie like Doctor Strange and Spider-man."]
multimedia_sentence = ["show me a picture of Jennifer Anniston.","what does Matthew Moy look like?"]


# Sentences are encoded by calling model.encode()
factual_embedding = sentence_model.encode(factual_sentence)
recommendation_embedding = sentence_model.encode(recommendation_sentence)
multimedia_embedding = sentence_model.encode(multimedia_sentence)

In [45]:
FACTUAL_QUESTIONS = [
    "What is the genre of Good Neighbors?",
    'Who directed Apocalypse Now?',
    "Who is the director of Star Wars Episode VI - Return of the Jedi?",
    "Who is the screenwriter of The Masked Gang: Cyprus?",
    'When was The Godfather released?',
    "When was Pulp Fiction released?",
    "Who played the lead role in The Matrix?",
    "Who directed Blade Runner 2049?",
]


RECOMMENDATION_EXAMPLES = [ "Recommend me a movie similar to Interstellar.", 
                           "I would like to watch a movie like Doctor Strange and Spider-man.",
                           "Given that I like The Lion King, Pocahontas, and The Beauty and the Beast, can you recommend some movies?",
                           "Recommend movies like Nightmare on Elm Street, Friday the 13th, and Halloween.",
                           "Recommend movies similar to Hamlet and Othello.",
                           "I want to see some drama and comedy movies."]

factual_embedding = sentence_model.encode(FACTUAL_QUESTIONS)
recommendation_embedding = sentence_model.encode(RECOMMENDATION_EXAMPLES)

In [60]:
math.dist(recommendation_embedding[0], recommendation_embedding[1])

6.7748182806917

In [36]:
from sklearn.decomposition import PCA 

def dimension_reduction_PCA(embeddings):
    reduced_embeddings = np.zeros((embeddings.shape[0],3))
    for i,embedding in enumerate(embeddings):
        pca = PCA(n_components = 1)
        pca.fit(embedding.reshape((3,128)))
        embedding_pca_reduced = pca.transform(embedding.reshape((3,128))).reshape((3))
        reduced_embeddings[i]=embedding_pca_reduced
    return reduced_embeddings

def visualize_reduced_embeddings(*args):
    colors = ["b","g","r","c","m","y"]
    for i,arg in enumerate(args):
        for reduced_embedding in arg:
            plt.scatter(reduced_embedding[0],reduced_embedding[1],c=colors[i])
    plt.show()

def visualize_3d(*args):
    fig = go.Figure()
    colors = ["blue","green","red","cyan","magenta","yellow"]
    for i,arg in enumerate(args):
        fig.add_trace(go.Scatter3d(
                                    x=arg[:,0],
                                    y=arg[:,1],
                                    z=arg[:,2],
                                    mode='markers',
                                    marker=dict(
                                        size=12,
                                        color=colors[i]               # set color to an array/list of desired values
                                    )
                                )) 
    fig.show() 

In [39]:
visualize_3d(dimension_reduction_PCA(factual_embedding),dimension_reduction_PCA(recommendation_embedding))

In [46]:
def build_model():
    model = Sequential()
    model.add(Input([384]))
    model.add(Dense(128, activation="sigmoid"))
    model.add(Dense(1, activation="sigmoid"))
    model.compile(optimizer='adam',
      loss='mse',
      metrics=['accuracy'])
    return model

model = build_model()
model.summary()

In [47]:
# Factual questions: 0
# Recommendation: 1

X=np.concatenate([factual_embedding,recommendation_embedding],axis=0)
y=np.concatenate([np.zeros(factual_embedding.shape[0]),np.ones(recommendation_embedding.shape[0])],axis=0)

In [48]:
model.fit(X, y, epochs=10, 
          batch_size=1, 
          validation_split=0.1)

Epoch 1/10
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.4187 - loss: 0.3011 - val_accuracy: 0.0000e+00 - val_loss: 0.2615
Epoch 2/10
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8739 - loss: 0.1253 - val_accuracy: 1.0000 - val_loss: 0.1890
Epoch 3/10
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 1.0000 - loss: 0.0494 - val_accuracy: 1.0000 - val_loss: 0.1235
Epoch 4/10
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 1.0000 - loss: 0.0470 - val_accuracy: 1.0000 - val_loss: 0.0692
Epoch 5/10
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 1.0000 - loss: 0.0245 - val_accuracy: 1.0000 - val_loss: 0.0526
Epoch 6/10
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 1.0000 - loss: 0.0210 - val_accuracy: 1.0000 - val_loss: 0.0486
Epoch 7/10
[1m12/12[0m [32m━━━━━━

<keras.src.callbacks.history.History at 0x3167cd400>

In [49]:
def predict(input):
    embed=sentence_model.encode(input)
    output = model.predict(embed.reshape((1,384)))[0,0]
    return int(output>=0.5)

In [50]:
predict("Can you recommend me films like Doctor Strange and Ironman.")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step


1

In [51]:
predict("Can you recommend me drama and comedy movies?")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step


1

In [54]:
predict("Recommend me drama and comedy movies.")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step


1

In [52]:
predict("Who is the director of Doctor Strange?")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step


0

In [53]:
predict("Who is the director of Interstellar?")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step


0