In [None]:
import os
import pandas as pd

from urllib.parse import quote_plus
from dotenv import load_dotenv

from neomodel import (
    StructuredNode,
    StringProperty,
    RelationshipTo,
    IntegerProperty,
    config
)

from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import Neo4jVector

In [None]:
catalog = pd.read_json("../data/catalog.json").reset_index(drop=True)
scifi_catalog = catalog[catalog.genreList.apply(lambda genre_list: "ficção científica" in genre_list)]
scifi_catalog.head(10)

In [None]:
class Movie(StructuredNode):
    title = StringProperty(unique_index=True)
    description = StringProperty()
    year = IntegerProperty()


class Actor(StructuredNode):
    name = StringProperty(unique_index=True)
    movies = RelationshipTo("Movie", 'ACTED_IN')

In [None]:
load_dotenv()

password = quote_plus(os.getenv("NEO4J_PASSWORD"))
config.DATABASE_URL = f"bolt://neo4j:{password}@localhost:7687"

In [None]:
actors = {}
for _, content in scifi_catalog.iterrows():
    movie_node = Movie(title=content.title, description=content.fullDescription, year=content.year).save()
    for actor in content.actors:
        if actor not in actors:
            actor_node = Actor(name=actor).save()
            actors[actor] = actor_node
        else:    
            actor_node = actors[actor]
        actor_node.movies.connect(movie_node)

In [None]:
# Create the vectorstore for our existing graph
paper_graph = Neo4jVector.from_existing_graph(
    embedding=OpenAIEmbeddings(model="text-embedding-3-large"),
    url="bolt://localhost:7687",
    username="neo4j",
    password="secretgraph",
    index_name="movie_index",
    node_label="Movie",
    text_node_properties=["title", "description"],
    embedding_node_property="movie_embedding",
)