In [None]:
import pandas as pd
import neo4j
from neo4j import GraphDatabase
from datasets import load_dataset
from sentence_transformers import SentenceTransformer
import numpy as np
from utils import query_db
from utils import URI, AUTH

## Fetch movie data from DB

In [None]:
movie_data = query_db("""
  MATCH (m:Movie) 
  RETURN m.id as id, m.info as info, m.title as title
  """)

print(movie_data[0])

## Get transformer for creating embeddings

In [None]:
embedder = SentenceTransformer("msmarco-distilbert-base-v4")


In [None]:
embedding_dimensions = 768

### Ingest embedding data into the database

In [None]:
with GraphDatabase.driver(URI, auth=AUTH) as driver:
    with driver.session(database="neo4j") as session:
        for m in movie_data:
            id = m["id"]
            info = m["info"] if "info" in m else ""
            info = "" if info is None else info
            embedding = embedder.encode(info).astype(np.float32).tolist()
            set_embedding_query = """
            MATCH (m {id: '$id'})
            SET m.embedding =  apoc.convert.fromJsonList('$embeddings')
            """.replace("$id", str(id)).replace("$embeddings", str(embedding))
            session.run(set_embedding_query)

### TODO
Check that the embeddings are now ingested in the database.

## Create the vector index

In [None]:
create_vector_index_query="""
CREATE VECTOR INDEX embedding_index
FOR (m:Movie)
ON (m.embedding)
OPTIONS {indexConfig: {`vector.dimensions`: $embedding_dimensions, `vector.similarity_function`: 'cosine'}}
""".replace("$embedding_dimensions", str(embedding_dimensions))

with GraphDatabase.driver(URI, auth=AUTH) as driver:
    with driver.session(database="neo4j") as session:

        session.run(create_vector_index_query)

### Show all vector indices

In [None]:
query_db('SHOW INDEXES WHERE type = "VECTOR"')

Vector index seems ok.

### Create some queries to fetch movies via vector search

In [None]:
def get_similar_movies_query(movie_name: str):
  return f"""
  MATCH (m:Movie)
  WHERE toLower(m.title) = '{movie_name.lower()}'

  CALL db.index.vector.queryNodes('embedding_index', 5, m.embedding)
  YIELD node AS similarMovie, score
  RETURN similarMovie.title, similarMovie.genre, score
  """

def get_similar_movies_with_same_actor_query(movie_name: str):
    return f"""
    MATCH (m:Movie)
    WHERE toLower(m.title) = '{movie_name.lower()}'

    MATCH (p: Person)-[:ACTED_IN]->(m: Movie)

    CALL db.index.vector.queryNodes('embedding_index', 100, m.embedding)
    YIELD node AS similarMovie, score
    WHERE similarMovie <> m
    AND (similarMovie)<-[:ACTED_IN]-(p)
    RETURN similarMovie.title, score
    """

In [None]:
query_db(get_similar_movies_query("American Sniper"))

In [None]:
query_db(get_similar_movies_with_same_actor_query("American Sniper"))

### TODO
Check that American Sniper and Burnt in fact have some actors in common.

### TODO

Think how you could you use the viewer and rating data for producing recommendations for the viewers? Implement some queries for the recommendations.