In [1]:
import base64
import os
from dotenv import load_dotenv
import pandas as pd
import langchain
from langchain_community.graphs import Neo4jGraph
from langchain_community.llms import Ollama
from langchain_community.chat_models import ChatOllama
from langchain_core.prompts import ChatPromptTemplate
import ollama
from langchain.chains import GraphCypherQAChain

In [2]:
import requests
import json

def generate_response(prompt, model='llama3'):
    url = "http://localhost:11434/api/generate"
    data = {
        "model": model,
        "prompt": prompt,
        "stream": False
    }
    response = requests.post(url, json=data)
    return json.loads(response.text)['response']

In [3]:
generate_response("what is ML?")

"ML stands for Machine Learning. It's a subfield of Artificial Intelligence (AI) that involves training algorithms to learn from data, make predictions, and improve their performance over time without being explicitly programmed.\n\nMachine learning focuses on developing systems that can:\n\n1. **Learn**: Discover patterns, relationships, and insights from data.\n2. **Predict**: Make informed decisions based on the learned knowledge.\n3. **Improve**: Refine their performance through continuous learning from new data.\n\nThe primary goals of machine learning are:\n\n1. **Classification**: Predicting a category or class label (e.g., spam vs. non-spam emails).\n2. **Regression**: Estimating continuous values or predictions (e.g., stock prices).\n3. **Clustering**: Grouping similar data points into clusters.\n4. **Dimensionality reduction**: Reducing the number of features in a dataset.\n\nMachine learning is applied in various fields, including:\n\n1. Computer Vision: Image recognition, o

### 1. Load Data and select a Sample of records

In [4]:
# Load Dataset
from datasets import load_dataset
import pandas as pd

# https://huggingface.co/datasets/AIatMongoDB/embedded_movies
dataset = load_dataset("AIatMongoDB/embedded_movies")

# Convert the dataset to a pandas dataframe
dataset_df = pd.DataFrame(dataset["train"])
dataset_df['awards_wins'] = dataset_df['awards'].apply(lambda x: x['wins'])
dataset_df['imdb_rating'] = dataset_df['imdb'].apply(lambda x: x['rating'])
dataset_df.head(2)


Unnamed: 0,plot,genres,runtime,cast,num_mflix_comments,poster,title,fullplot,languages,directors,writers,awards,imdb,countries,type,plot_embedding,rated,metacritic,awards_wins,imdb_rating
0,Young Pauline is left a lot of money when her ...,[Action],199.0,"[Pearl White, Crane Wilbur, Paul Panzer, Edwar...",0,https://m.media-amazon.com/images/M/MV5BMzgxOD...,The Perils of Pauline,Young Pauline is left a lot of money when her ...,[English],"[Louis J. Gasnier, Donald MacKenzie]","[Charles W. Goddard (screenplay), Basil Dickey...","{'nominations': 0, 'text': '1 win.', 'wins': 1}","{'id': 4465, 'rating': 7.6, 'votes': 744}",[USA],movie,"[0.0007293965299999999, -0.026834568000000003,...",,,1,7.6
1,A penniless young man tries to save an heiress...,"[Comedy, Short, Action]",22.0,"[Harold Lloyd, Mildred Davis, 'Snub' Pollard, ...",0,https://m.media-amazon.com/images/M/MV5BNzE1OW...,From Hand to Mouth,As a penniless man worries about how he will m...,[English],"[Alfred J. Goulding, Hal Roach]",[H.M. Walker (titles)],"{'nominations': 1, 'text': '1 nomination.', 'w...","{'id': 10146, 'rating': 7.0, 'votes': 639}",[USA],movie,"[-0.022837115, -0.022941574000000003, 0.014937...",TV-G,,0,7.0


In [5]:
import re

def generate_movie_tagline(row):
    tagline = f"The movie '{row.title}' is a {row.genres} film directed by {row.directors}. Starring {row.cast}, with an IMDb rating of {row.imdb_rating}, the film has won {row.awards_wins} awards. It is available in {row.languages}. The movie's plot is: {row.fullplot}"
    return tagline

def clean_string(text):
    # Replace any non-alphanumeric character (excluding spaces) with an empty string
    return re.sub(r'[^A-Za-z0-9\s]', '', text)

# dataset_df.columns
drop_feats = ['plot', 'runtime', 'num_mflix_comments', 'poster', 'awards', 'imdb', 'type', 'metacritic', 'plot_embedding', 'rated', 'countries', 'writers']

cleaned_df = dataset_df.drop(drop_feats, axis=1).dropna()

# Apply the function to create a new column
cleaned_df['title'] = cleaned_df['title'].apply(lambda x: clean_string(x))

# Apply the function to create a new column
cleaned_df['tagline'] = cleaned_df.apply(generate_movie_tagline, axis=1)

for i in cleaned_df.columns:
    cleaned_df[i] = cleaned_df[i].apply(lambda x: ' | '.join(x) if isinstance(x, list) else x)

cleaned_df['movie_id'] = range(1, len(cleaned_df) + 1)

organize_columns = ['movie_id', 'title', 'languages', 'awards_wins', 'imdb_rating', 'directors', 'cast', 'genres', 'tagline']

cleaned_df = cleaned_df[organize_columns]

cleaned_df.columns = ['movie_id', 'title', 'languages', 'awards', 'imdb_rating', 'directors', 'cast', 'genres', 'tagline']

cleaned_df['tagline'] = cleaned_df['tagline'].str.replace('[', '(', regex=False).str.replace(']', ')', regex=False).str.replace('"', "'", regex=False)


cleaned_df.head()

Unnamed: 0,movie_id,title,languages,awards,imdb_rating,directors,cast,genres,tagline
0,1,The Perils of Pauline,English,1,7.6,Louis J. Gasnier | Donald MacKenzie,Pearl White | Crane Wilbur | Paul Panzer | Edw...,Action,The movie 'The Perils of Pauline' is a ('Actio...
1,2,From Hand to Mouth,English,0,7.0,Alfred J. Goulding | Hal Roach,Harold Lloyd | Mildred Davis | 'Snub' Pollard ...,Comedy | Short | Action,"The movie 'From Hand to Mouth' is a ('Comedy',..."
2,3,Beau Geste,English,1,6.9,Herbert Brenon,Ronald Colman | Neil Hamilton | Ralph Forbes |...,Action | Adventure | Drama,"The movie 'Beau Geste' is a ('Action', 'Advent..."
4,4,For Heavens Sake,English,0,7.6,Sam Taylor,Harold Lloyd | Jobyna Ralston | Noah Young | J...,Action | Comedy | Romance,"The movie 'For Heavens Sake' is a ('Action', '..."
5,5,Men Without Women,English,1,5.8,John Ford,Kenneth MacKenna | Frank Albertson | J. Farrel...,Action | Drama,"The movie 'Men Without Women' is a ('Action', ..."


In [6]:
print(cleaned_df.tagline[0])

The movie 'The Perils of Pauline' is a ('Action') film directed by ('Louis J. Gasnier', 'Donald MacKenzie'). Starring ('Pearl White', 'Crane Wilbur', 'Paul Panzer', 'Edward Josè'), with an IMDb rating of 7.6, the film has won 1 awards. It is available in ('English'). The movie's plot is: Young Pauline is left a lot of money when her wealthy uncle dies. However, her uncle's secretary has been named as her guardian until she marries, at which time she will officially take possession of her inheritance. Meanwhile, her 'guardian' and his confederates constantly come up with schemes to get rid of Pauline so that he can get his hands on the money himself.


`Features - Properties`:
* Movie - ID, Title, language,  imdb rating, Award, Plot / Description

* Person - Director, Actor

* Genre 

* Location

### 2. Selecting a Sample of Records

In [7]:
n_records = 30
df = cleaned_df.sample(n_records)
df.head()

Unnamed: 0,movie_id,title,languages,awards,imdb_rating,directors,cast,genres,tagline
221,211,Raiders of the Lost Ark,English | German | Hebrew | Spanish | Arabic |...,32,8.6,Steven Spielberg,Harrison Ford | Karen Allen | Paul Freeman | R...,Action | Adventure,The movie 'Raiders of the Lost Ark' is a ('Act...
25,25,Road House,English,0,7.3,Jean Negulesco,Ida Lupino | Cornel Wilde | Celeste Holm | Ric...,Action | Drama | Film-Noir,"The movie 'Road House' is a ('Action', 'Drama'..."
1076,1036,The Recruit,English | Persian | Russian,0,6.6,Roger Donaldson,Al Pacino | Colin Farrell | Bridget Moynahan |...,Action | Crime | Drama,"The movie 'The Recruit' is a ('Action', 'Crime..."
625,605,The Blade,Cantonese,1,7.2,Hark Tsui,Wenzhuo Zhao | Xin Xin Xiong | Sonny Song | Va...,Drama | Action,"The movie 'The Blade' is a ('Drama', 'Action')..."
1310,1258,72 Meters,Russian | Ukrainian,0,6.7,Vladimir Khotinenko,Sergey Makovetskiy | Marat Basharov | Andrey K...,Drama | Action | Thriller,"The movie '72 Meters' is a ('Drama', 'Action',..."


In [8]:
# df.to_csv(f"data/sampled_cleaned_movies_data_with_embeddings_{n_records}.csv", index=False)

### 3. Initiate Neo4J and Load the data

In [9]:
from langchain.graphs import Neo4jGraph
from dotenv import load_dotenv

load_dotenv()

## Load the Neo4J graph db
def load_neo4j_graph_db():
    graph = Neo4jGraph(
        url=os.getenv("NEO4J_URI") ,
        username=os.getenv("NEO4J_USERNAME") ,
        password=os.getenv("NEO4J_PASSWORD") 
    )

    graph.refresh_schema()
    print(f"Here is the Graph DB Schema:\n\n{graph.schema}")

    return graph

graph_db = load_neo4j_graph_db()

Here is the Graph DB Schema:

Node properties:
Movie {title: STRING, awards: INTEGER, id: STRING, tagline: STRING, imdbRating: FLOAT}
Person {name: STRING}
Genre {name: STRING}
Language {name: STRING}
Relationship properties:

The relationships:
(:Movie)-[:IN_GENRE]->(:Genre)
(:Movie)-[:WAS_RELEASED_IN]->(:Language)
(:Person)-[:DIRECTED]->(:Movie)
(:Person)-[:ACTED_IN]->(:Movie)


In [10]:
## Load Data to Graph DB 
def load_data_to_graph_db(graph_db, df):
    # Loop through each row in the DataFrame and add them to the Neo4j db
    print(f"\nTotal Number of Records to push in Graph DB: {df.shape[0]}")

    for index, row in df.iterrows():

        graph_db.query(f'''
                MERGE (m:Movie {{title: '{row.title}'}})                             
                SET m.awards = {row.awards},                        
                    m.id = "{row.movie_id}",                                    
                    m.tagline = "{row.tagline}",                                
                    m.imdbRating = toFloat({(row.imdb_rating)}) 
                
                FOREACH (director IN split("{row.directors}", '|') |             
                    MERGE (p:Person {{name: trim(director)}})                  
                    MERGE (p)-[:DIRECTED]->(m))                             
                            
                FOREACH (actor IN split("{row.cast}", '|') |                  
                    MERGE (p:Person {{name: trim(actor)}})                     
                    MERGE (p)-[:ACTED_IN]->(m))                             
                            
                FOREACH (genre IN split("{row.genres}", '|') |                  
                    MERGE (g:Genre {{name: trim(genre)}})                      
                    MERGE (m)-[:IN_GENRE]->(g))                             
                            
                MERGE (l:Language {{name: trim("{row.languages}")}})
                MERGE (m)-[:WAS_RELEASED_IN]->(l)
                        
            ''')
    
    print("\nPushed all the records to the graph db!")

def update_graph_db_with_new_data(graph_db, df):
    graph_db.refresh_schema()
    if graph_db.schema == 'Node properties:\n\nRelationship properties:\n\nThe relationships:\n':
        print("\nGraph DB is Empty!\n")
        load_data_to_graph_db(graph_db, df)
    else:
        print("\nGraph DB is not empty, so deleting the records from DB!")

        ## Query to Delete all the nodes and relationship from the graph db
        graph_db.query('''
            MATCH (n)
            DETACH DELETE n
        ''')

        print("\nPushing new data to Graph DB")
        load_data_to_graph_db(graph_db, df)


## Function to update the graph DB
update_graph_db_with_new_data(graph_db, df)


Graph DB is not empty, so deleting the records from DB!

Pushing new data to Graph DB

Total Number of Records to push in Graph DB: 30

Pushed all the records to the graph db!


In [11]:
graph_db.refresh_schema()
print(graph_db.schema)

Node properties:
Movie {title: STRING, awards: INTEGER, id: STRING, tagline: STRING, imdbRating: FLOAT}
Person {name: STRING}
Genre {name: STRING}
Language {name: STRING}
Relationship properties:

The relationships:
(:Movie)-[:IN_GENRE]->(:Genre)
(:Movie)-[:WAS_RELEASED_IN]->(:Language)
(:Person)-[:ACTED_IN]->(:Movie)
(:Person)-[:DIRECTED]->(:Movie)


### Generate Embeddings - Tagline (Plot of the Movie)

In [12]:
import ollama

def embed_text(text:str):
    response = ollama.embeddings(model="all-minilm", prompt=text)
    return response["embedding"]   

In [14]:
df['tagline_embedding'] = df["tagline"].apply(lambda x: embed_text(x))
df.head(2)

Unnamed: 0,movie_id,title,languages,awards,imdb_rating,directors,cast,genres,tagline,tagline_embedding
221,211,Raiders of the Lost Ark,English | German | Hebrew | Spanish | Arabic |...,32,8.6,Steven Spielberg,Harrison Ford | Karen Allen | Paul Freeman | R...,Action | Adventure,The movie 'Raiders of the Lost Ark' is a ('Act...,"[-0.04223256930708885, 0.12403502315282822, -0..."
25,25,Road House,English,0,7.3,Jean Negulesco,Ida Lupino | Cornel Wilde | Celeste Holm | Ric...,Action | Drama | Film-Noir,"The movie 'Road House' is a ('Action', 'Drama'...","[-0.04625450447201729, -0.10949589312076569, 0..."


In [15]:
print("Number of vectors:", len(df['tagline_embedding']))
print("Embedding dimension:", len(df['tagline_embedding'].values[0]))
df['tagline_embedding'].values[0][:5]

Number of vectors: 30
Embedding dimension: 384


[-0.04223256930708885,
 0.12403502315282822,
 -0.11337953060865402,
 0.013988159596920013,
 0.07863015681505203]

* Create the Index on Embedding

In [16]:
def create_vector_index_in_graph_db(graph_db, df):
    graph_db.query("""
        CREATE VECTOR INDEX movie_tagline_embeddings IF NOT EXISTS      // Create a vector index named 'movie_tagline_embeddings' if it doesn't already exist  
        FOR (m:Movie) ON (m.tagline_embedding)                           // Index the 'taglineEmbedding' property of Movie nodes 
        OPTIONS { indexConfig: {                                        // Set options for the index
            `vector.dimensions`: 384,                                    // Specify the dimensionality of the vector space (1536 dimensions)
            `vector.similarity_function`: 'cosine'                        // Specify the similarity function to be cosine similarity
        }}"""
    )

    res = graph_db.query("""
        SHOW VECTOR INDEXES     // Retrieves information about all vector indexes in the database
        """
        )
    
    print(f"Sample Vector Index Response:\n{res}")

    print(f"Updating the graph db with tagline embeddings") 
    for index, row in df.iterrows():
        title = row['title']
        embedding = row['tagline_embedding']
        graph_db.query(f"MATCH (m:Movie {{title: '{title}'}}) SET m.tagline_embedding = {embedding}")

    print(f"Updated the graph db with tagline embeddings and here is the schema of the db:\n{graph_db.schema}") 

In [17]:
create_vector_index_in_graph_db(graph_db, df)

Sample Vector Index Response:
[{'id': 2, 'name': 'movie_tagline_embeddings', 'state': 'ONLINE', 'populationPercent': 100.0, 'type': 'VECTOR', 'entityType': 'NODE', 'labelsOrTypes': ['Movie'], 'properties': ['tagline_embedding'], 'indexProvider': 'vector-2.0', 'owningConstraint': None, 'lastRead': None, 'readCount': 0}]
Updating the graph db with tagline embeddings
Updated the graph db with tagline embeddings and here is the schema of the db:
Node properties:
Movie {title: STRING, awards: INTEGER, id: STRING, tagline: STRING, imdbRating: FLOAT}
Person {name: STRING}
Genre {name: STRING}
Language {name: STRING}
Relationship properties:

The relationships:
(:Movie)-[:IN_GENRE]->(:Genre)
(:Movie)-[:WAS_RELEASED_IN]->(:Language)
(:Person)-[:ACTED_IN]->(:Movie)
(:Person)-[:DIRECTED]->(:Movie)


In [20]:
## Validation Query

result = graph_db.query("""
    MATCH (m:Movie) 
    WHERE m.tagline IS NOT NULL
    RETURN m.tagline, m.tagline_embedding
    LIMIT 1
    """
)

print(result[0]['m.tagline'])

The movie 'Kill Me Again' is a ('Action', 'Crime', 'Drama') film directed by ('John Dahl'). Starring ('Pat Mulligan', 'Nick Dimitri', 'Michael Madsen', 'Joanne Whalley'), with an IMDb rating of 6.3, the film has won 1 awards. It is available in ('English'). The movie's plot is: Fay Forrester, an attractive young lady wants to escape from her violent and jealous boyfriend Vince. So she hires Jack Andrews, a second class private investigator to arrange her death. She wants to restart her life with a new identity and the money she robbed together with Vince. Because of Jack's financial problems he joins Fay after her fake death. Unfortunately Vince finds out that Fay's still alive. The hunt for Jack, Fay and the money begins...


* Graph DB is updated!

### 2. Initiate LLM Models - Text to Cypher Model 

In [21]:
## LLM Model names
text_to_cypher_llm_model_name = "tomasonjo/llama3-text2cypher-demo"
embedding_model_name = "all-minilm"

## Initiate Cypher llm 
text_to_cypher_llm = ChatOllama(model=text_to_cypher_llm_model_name)

In [51]:
# llm = Ollama(model="llama3")
# llm.invoke("what is ML?")

## 3. Function & Templates

In [22]:
def simple_graph_db_search_by_generating_cypher_queries(graph_db, question):
    # Template to convert Question to Cypher Query
    prompt = ChatPromptTemplate.from_messages(
        [
            (
                "system",
                "Given an input question, convert it to a Cypher query. No pre-amble.",
            ),
            (
                "human",
                (
                    "Based on the Neo4j graph schema below, write a Cypher query that would answer the user's question: "
                    "\n{schema} \nQuestion: {question} \nCypher query:"
                ),
            ),
        ]
    )

    # Chain
    cypher_query_chain = prompt | text_to_cypher_llm

    # Generate Cypher Query using Chain
    cypher_query_response = cypher_query_chain.invoke({"question": question, "schema": graph_db.schema}).content

    # Trigger the Graph db to fetch results
    result = graph_db.query(cypher_query_response)
    print(f"\nGraph DB response:\n{result}")  

    format_response = text_to_cypher_llm.invoke(f"Based on the following content, format it in a more readable and understandable output to make clear to the audience, {result}")
    print(f"\nImproved responses:\n{format_response.content}")  
    return format_response.content
    

In [23]:
df.head(2)

Unnamed: 0,movie_id,title,languages,awards,imdb_rating,directors,cast,genres,tagline,tagline_embedding
221,211,Raiders of the Lost Ark,English | German | Hebrew | Spanish | Arabic |...,32,8.6,Steven Spielberg,Harrison Ford | Karen Allen | Paul Freeman | R...,Action | Adventure,The movie 'Raiders of the Lost Ark' is a ('Act...,"[-0.04223256930708885, 0.12403502315282822, -0..."
25,25,Road House,English,0,7.3,Jean Negulesco,Ida Lupino | Cornel Wilde | Celeste Holm | Ric...,Action | Drama | Film-Noir,"The movie 'Road House' is a ('Action', 'Drama'...","[-0.04625450447201729, -0.10949589312076569, 0..."


* Search Graph DB - NLU to Cypher Queries

In [24]:
q_toask = ["who was the cast of the movie Road House?", "What are the most common genres for movies who got awards"]

for i in q_toask:
    print(f"User Question: {i}\n")
    simple_graph_db_search_by_generating_cypher_queries(graph_db, i)

User Question: who was the cast of the movie Road House?


Graph DB response:
[{'actor': 'Ida Lupino', 'movie': 'Road House'}, {'actor': 'Cornel Wilde', 'movie': 'Road House'}, {'actor': 'Celeste Holm', 'movie': 'Road House'}, {'actor': 'Richard Widmark', 'movie': 'Road House'}]

Improved responses:
**Cast of "Road House"**

* **Ida Lupino**: Starred in the movie "Road House"
* **Cornel Wilde**: Starred in the movie "Road House"
* **Celeste Holm**: Starred in the movie "Road House"
* **Richard Widmark**: Starred in the movie "Road House"

This format is easier to read and understand, making it clear that these actors have appeared in the same movie, "Road House".
User Question: What are the most common genres for movies who got awards


Graph DB response:
[{'genre': 'Action', 'movie_count': 18}, {'genre': 'Adventure', 'movie_count': 10}, {'genre': 'Crime', 'movie_count': 6}, {'genre': 'Drama', 'movie_count': 6}, {'genre': 'Comedy', 'movie_count': 4}, {'genre': 'Sci-Fi', 'movie_count': 

### Method 2: GraphCypherQAChain

In [25]:
def cypher_chain_graph_db_search_by_generating_cypher_queries(graph_db, question):
    graphCypher_chain = GraphCypherQAChain.from_llm(graph=graph_db, llm=text_to_cypher_llm, validate_cypher=True)

    response = graphCypher_chain.invoke({"query": question})
    print(f"\nGraph Cypher Chain response:\n{response["result"]}")


In [26]:
for i in q_toask:
    print(f"\nUser Question: {i}")
    cypher_chain_graph_db_search_by_generating_cypher_queries(graph_db, i)


User Question: who was the cast of the movie Road House?

Graph Cypher Chain response:
Ida Lupino, Cornel Wilde, Celeste Holm, and Richard Widmark were part of the cast of the movie Road House.

User Question: What are the most common genres for movies who got awards

Graph Cypher Chain response:
Action and Adventure are the most common genres for movies that have received awards.


In [27]:
q_three = "Can you recommend some movies similar to The Road House"

cypher_chain_graph_db_search_by_generating_cypher_queries(graph_db, q_three)


Graph Cypher Chain response:
Based on the action-packed and drama-filled plot of The Road House, I'd recommend the following movies:

* First Blood (1982) - an iconic action movie with a strong focus on character development.
* Point Break (1991) - an adrenaline-fueled thriller with a similar blend of high-stakes action and dramatic twists.
* Speed Racer (2008) - a fast-paced, visually stunning film that explores the world of competitive racing.
* The Driver (1978) - a gritty, intense movie that delves into the world of professional drivers.
* Bullitt (1968) - an all-time classic with Steve McQueen's iconic car chases and intense drama.

These movies share similar themes, action sequences, and dramatic plot twists that made The Road House an enjoyable watch.


In [28]:
q_four = "Can you recommend some Top Genres Movies which has more Awards?"

cypher_chain_graph_db_search_by_generating_cypher_queries(graph_db, q_four)


Graph Cypher Chain response:
Based on the provided information, I would recommend the following top genres movies with a high number of awards:

* Adventure: 'Apocalypto' (AverageAwards: 5.454545454545454)
* Drama: 'Proof of Life' (AverageAwards: 3.583333333333334)
* Action: 'Eragon' (AverageAwards: 3.266666666666666)

These movies have a significant number of awards, indicating their critical and commercial success.


In [29]:
q_five = "How many of the movies have the Action genre?"
cypher_chain_graph_db_search_by_generating_cypher_queries(graph_db, q_five)


Graph Cypher Chain response:
30.


In [30]:
q_five = "How many awards each genre have?"
cypher_chain_graph_db_search_by_generating_cypher_queries(graph_db, q_five)


Graph Cypher Chain response:
Horror - 1, Family - 1, Action - 98, Adventure - 60, Drama - 43, Film-Noir - 0, Crime - 31, Thriller - 4, Musical - 3.


In [31]:
q_five = "Recommend 3 movies similar to Road House and it should have atleast 1 award?"
cypher_chain_graph_db_search_by_generating_cypher_queries(graph_db, q_five)


Graph Cypher Chain response:
Based on the action-packed and thrilling nature of Road House, here are three movie recommendations with at least one award:

1. Point Break (1991) - This film has a similar blend of action, suspense, and drama, starring Patrick Swayze as an FBI agent chasing down a group of bank robbers. It received an MTV Movie Award nomination.
2. Speed Racer (2008) - An adrenaline-fueled racing movie with impressive stunts and special effects, based on the classic anime series. It won several awards, including the Critics' Choice Movie Awards for Best Visual Effects and Best Young Performer.
3. Bulletproof (1996) - A crime thriller starring Damon Wayans as a detective investigating the murder of his partner, leading to an intense cat-and-mouse chase with the killer. The movie received a Golden Globe nomination for Best Actor in a Motion Picture - Drama.

These movies share similar themes and elements that made Road House exciting, and all have received critical acclaim

In [103]:
q_five = "Recommend 3 movies in Action genre which are in english language and it should have atleast 1 award?"
cypher_chain_graph_db_search_by_generating_cypher_queries(graph_db, q_five)


Graph Cypher Chain response:
Based on the provided list of movie data points, I recommend three action movies that meet your criteria:

1. **The Matrix** (1999) - This iconic sci-fi action film won four Academy Awards, including Best Visual Effects, and has an English language soundtrack.
2. **Mad Max: Fury Road** (2015) - This adrenaline-fueled action movie won six Academy Awards, including Best Picture and Best Costume Design, and features an all-English cast.
3. **John Wick: Chapter 3 – Parabellum** (2019) - Although it didn't win any major awards, this action film has a strong following and is part of the highly acclaimed John Wick franchise.

All three movies are English-language films with a significant focus on action sequences, making them excellent recommendations for fans of the genre.


### 4. Improve the Results 
##### 4.1 Embed the Question + Similarity Search on Tagline embedding + Generate Cypher Queries + Fetch

Question -- Embed -- Similarity Search on Index -- Results accordingly

In [32]:

def embed_question_to_improve_search_results(graph, question):
    question_emb = embed_text(question)

    result = graph.query("""
            with $question_embedding as question_embedding      // Use the provided question embedding as 'question_embedding'
            CALL db.index.vector.queryNodes(                    // Call the vector index query function
                'movie_tagline_embeddings',                     // Name of the vector index to query against
                $top_k,                                         // Number of top results to retrieve
                question_embedding                              // The question embedding to compare against
                ) YIELD node AS movie, score                    // Yield each matched node and its similarity score
            RETURN movie.title, movie.tagline, score            // Return the title, tagline, and similarity score of each movie
            """,
            params={
                "question_embedding": question_emb,       # Pass the question embedding as a parameter
                "top_k": 3                                      # Specify the number of top results to retrieve
    })

    base_res = f"""Here is the graph db schema which has entities with properties and the relationship between entities
            {graph.schema}
            Here is the user question: {question}
            Here is the model response: {result}

            Based on the above information, summarize and format it in a better way to make it more understandable and readable to the user.
            """

    improved_result = text_to_cypher_llm.invoke(base_res).content

    print(improved_result)
    return improved_result


In [33]:
q_1 = "Can you recommend some Top Genres Movies which has more Awards?"

res = embed_question_to_improve_search_results(graph_db, q_1)

print(res)


**Top Genres with More Awarded Movies**

Based on the movie database schema, here are the top genres with movies that have received more awards:

**Note:** The ranking is based on the number of awards given to movies in each genre.

1. **Drama**: 5 movies with a total of 17 awards
	* Examples: "The Shawshank Redemption" (7 awards), "12 Years a Slave" (6 awards)
2. **Action**: 4 movies with a total of 13 awards
	* Examples: "Mad Max: Fury Road" (5 awards), "The Matrix" (3 awards)
3. **Romance**: 3 movies with a total of 9 awards
	* Examples: "Titanic" (4 awards), "The Proposal" (2 awards)
4. **Thriller**: 3 movies with a total of 7 awards
	* Examples: "Seven" (2 awards), "Minority Report" (1 award)
5. **Science Fiction**: 2 movies with a total of 6 awards
	* Examples: "Inception" (3 awards), "The Matrix Reloaded" (1 award)

**Limitations:** This recommendation only considers movies that have received awards and does not account for other factors like movie ratings or popularity.

I hope

##### 4.2 Extracting Entities + Generate Cypher Queries + Trigger DB


In [91]:
def extract_entites_using_cypher_model(graph_db, question):
    
    ENTITY_EXTRACTION_AND_CYPHER_GENERATION_TEMPLATE = f"""
                    Task: Breakdown the user input and Extract relevant entities from the user input and generate a Cypher query to retrieve the unknown entities using the known entities.
                    Instructions:
                    1. Identify and extract the known entities (e.g., movie title) from the user input.
                    2. Determine the unknown entities (e.g., cast, awards, genres, directors) that need to be retrieved from the graph database.
                    3. Use the known entities to generate a Cypher query that retrieves the values of the unknown entities and validate the generated Cypher query entities and relationships with the reference schema to ensure the query is valid.
                    Schema:
                    {graph_db.schema}
                    Note: Do not include any explanations or apologies in your responses.
                    Do not respond to any questions that might ask for anything other than extracting entities and generating a Cypher query.
                    Do not include any text except the extracted entities and the generated Cypher query.
                    Examples:

                    # Example user input: "Which genres does the movie Inception belong to and who directed it?"
                    Output:
                    MATCH (m:Movie {{title: "Inception"}})
                    OPTIONAL MATCH (m)-[:IN_GENRE]->(g:Genre)
                    OPTIONAL MATCH (p:Person)-[:DIRECTED]->(m)
                    RETURN collect(DISTINCT g.name) AS genres, collect(DISTINCT p.name) AS directors

                    # Example user input: "What is the IMDb rating of The Dark Knight, and in which language was it released?"
                    Output:
                    MATCH (m:Movie {{title: "The Dark Knight"}})
                    OPTIONAL MATCH (m)-[:WAS_RELEASED_IN]->(l:Language)
                    RETURN m.imdbRating AS imdbRating, collect(DISTINCT l.name) AS languages

                    # Example user input: "Who acted in the movie Titanic, and what is its tagline?"
                    Output:
                    MATCH (m:Movie {{title: "Titanic"}})
                    OPTIONAL MATCH (p:Person)-[:ACTED_IN]->(m)
                    RETURN collect(DISTINCT p.name) AS cast, m.tagline AS tagline

                    # Example user input: "Did the movie Avatar win any awards, and what is its IMDb rating?"
                    Output:
                    MATCH (m:Movie {{title: "Avatar"}})
                    RETURN m.awards AS awards, m.imdbRating AS imdbRating


                    # Example user input: "Which languages was the movie Parasite released in and who directed it?"
                    Output:
                    MATCH (m:Movie {{title: "Parasite"}})
                    OPTIONAL MATCH (m)-[:WAS_RELEASED_IN]->(l:Language)
                    OPTIONAL MATCH (p:Person)-[:DIRECTED]->(m)
                    RETURN collect(DISTINCT l.name) AS languages, collect(DISTINCT p.name) AS directors

                    # Example user input: "Recommend 3 movies with  atleast a award and should be in english language"
                    Output:
                    MATCH (m:Movie)-[:WAS_RELEASED_IN]->(l:Language)
                    WHERE l.name = 'English' AND m.awards > 1 
                    RETURN m.title
                    LIMIT 3

                    # Example user input: "Recommend 3 movies with imdb rating atleast 7 and atleast a award and should be in english language"
                    MATCH (m:Movie)-[:WAS_RELEASED_IN]->(l:Language)
                    WHERE l.name = 'English' AND m.awards > 1 and m.imdbRating >= 7.0
                    RETURN m.title
                    LIMIT 3

                    The question is:
                    {question}
        """


    entities_extracted_query = text_to_cypher_llm.invoke(ENTITY_EXTRACTION_AND_CYPHER_GENERATION_TEMPLATE).content
    print(f"\nModel Generated Cypher Query:\n\n{entities_extracted_query}")
    fetched_results = graph_db.query(f"""{entities_extracted_query}""")
    print(f"\n\nFetched Results:\n{fetched_results}")

    format_response = text_to_cypher_llm.invoke(f"""
                    Given the user question:\n{question} and the output response:\n{fetched_results}, 
                    format the response in a more readable and understandable way for clarity, without adding any additional information.
                    Note:
                    Do not include any text except the provided information.
                    """)

    # format_response = text_to_cypher_llm.invoke(f"Based on the following user question: \n{question} and output response: \n{fetched_results}, format it in a more readable and understandable output to make clear to the audience")
    print(f"\nImproved responses:\n{format_response.content}")  
    return entities_extracted_query


In [56]:
example_queries = [
    "who was the cast of the movie Road House and does the movie got any awards?",
    "Can you recommend some Top Genres Movies which has more Awards"
]

for q in example_queries:
    print(f"\nQuestion: '{q}'")
    extract_entites_using_cypher_model(graph_db, q)


Question: 'who was the cast of the movie Road House and does the movie got any awards?'


Failed to write data to connection ResolvedIPv4Address(('34.66.78.163', 7687)) (ResolvedIPv4Address(('34.66.78.163', 7687)))



Model Generated Cypher Query:

MATCH (m:Movie {title: "Road House"})
OPTIONAL MATCH (p:Person)-[:ACTED_IN]->(m)
RETURN collect(DISTINCT p.name) AS cast, m.awards AS awards


Failed to write data to connection IPv4Address(('ad5eb799.databases.neo4j.io', 7687)) (ResolvedIPv4Address(('34.66.78.163', 7687)))




Fetched Results:
[{'cast': ['Ida Lupino', 'Cornel Wilde', 'Celeste Holm', 'Richard Widmark'], 'awards': 0}]

Improved responses:
The cast of the movie Road House includes:

• Ida Lupino
• Cornel Wilde
• Celeste Holm
• Richard Widmark

As for awards, the movie received none.

Question: 'Can you recommend some Top Genres Movies which has more Awards'

Model Generated Cypher Query:

MATCH (m:Movie)
WHERE m.awards > 0
WITH m
ORDER BY m.imdbRating DESC, m.awards DESC
LIMIT 10
RETURN m.title AS title, m.genres AS genres, m.awards AS awards, m.imdbRating AS imdbRating






Fetched Results:
[{'title': 'Raiders of the Lost Ark', 'genres': None, 'awards': 32, 'imdbRating': 8.6}, {'title': 'The French Connection', 'genres': None, 'awards': 24, 'imdbRating': 7.8}, {'title': 'Apocalypto', 'genres': None, 'awards': 12, 'imdbRating': 7.8}, {'title': 'The Blade', 'genres': None, 'awards': 1, 'imdbRating': 7.2}, {'title': 'Thunderball', 'genres': None, 'awards': 4, 'imdbRating': 7.0}, {'title': 'Pardes', 'genres': None, 'awards': 3, 'imdbRating': 7.0}, {'title': 'Alive', 'genres': None, 'awards': 2, 'imdbRating': 7.0}, {'title': 'Kopps', 'genres': None, 'awards': 5, 'imdbRating': 6.8}, {'title': 'All the Way Boys', 'genres': None, 'awards': 2, 'imdbRating': 6.7}, {'title': 'Kill Me Again', 'genres': None, 'awards': 1, 'imdbRating': 6.3}]

Improved responses:
Here are the top genres movies with more awards:
1. Raiders of the Lost Ark (32 Awards)
2. The French Connection (24 Awards)
3. Apocalypto (12 Awards)
4. Thunderball (4 Awards)
5. Pardes (3 Awards)
6. Alive 

In [59]:
df.head()

Unnamed: 0,movie_id,title,languages,awards,imdb_rating,directors,cast,genres,tagline,tagline_embedding
221,211,Raiders of the Lost Ark,English | German | Hebrew | Spanish | Arabic |...,32,8.6,Steven Spielberg,Harrison Ford | Karen Allen | Paul Freeman | R...,Action | Adventure,The movie 'Raiders of the Lost Ark' is a ('Act...,"[-0.04223256930708885, 0.12403502315282822, -0..."
25,25,Road House,English,0,7.3,Jean Negulesco,Ida Lupino | Cornel Wilde | Celeste Holm | Ric...,Action | Drama | Film-Noir,"The movie 'Road House' is a ('Action', 'Drama'...","[-0.04625450447201729, -0.10949589312076569, 0..."
1076,1036,The Recruit,English | Persian | Russian,0,6.6,Roger Donaldson,Al Pacino | Colin Farrell | Bridget Moynahan |...,Action | Crime | Drama,"The movie 'The Recruit' is a ('Action', 'Crime...","[-0.13712893426418304, -0.13582655787467957, -..."
625,605,The Blade,Cantonese,1,7.2,Hark Tsui,Wenzhuo Zhao | Xin Xin Xiong | Sonny Song | Va...,Drama | Action,"The movie 'The Blade' is a ('Drama', 'Action')...","[-0.10137008130550385, -0.14402838051319122, -..."
1310,1258,72 Meters,Russian | Ukrainian,0,6.7,Vladimir Khotinenko,Sergey Makovetskiy | Marat Basharov | Andrey K...,Drama | Action | Thriller,"The movie '72 Meters' is a ('Drama', 'Action',...","[0.03624854236841202, 0.06997304409742355, -0...."


In [92]:
print(extract_entites_using_cypher_model(graph_db, "recommend 3 movies with  atleast a awards and should be in english language"))


Model Generated Cypher Query:

MATCH (m:Movie)-[:WAS_RELEASED_IN]->(l:Language)
WHERE l.name = 'English' AND m.awards > 0 
RETURN m.title
LIMIT 3


Fetched Results:
[{'m.title': 'Alive'}, {'m.title': 'Kill Me Again'}, {'m.title': 'Predator 2'}]

Improved responses:
Here are three movies that meet your criteria:

1. **Alive** (1991) - Won: 1 award
2. **Kill Me Again** (1989) - Nominated for: 1 award
3. **Predator 2** (1990) - Won: 4 awards
MATCH (m:Movie)-[:WAS_RELEASED_IN]->(l:Language)
WHERE l.name = 'English' AND m.awards > 0 
RETURN m.title
LIMIT 3


In [64]:
print(graph_db.schema)

Node properties:
Movie {title: STRING, awards: INTEGER, id: STRING, tagline: STRING, imdbRating: FLOAT}
Person {name: STRING}
Genre {name: STRING}
Language {name: STRING}
Relationship properties:

The relationships:
(:Movie)-[:IN_GENRE]->(:Genre)
(:Movie)-[:WAS_RELEASED_IN]->(:Language)
(:Person)-[:ACTED_IN]->(:Movie)
(:Person)-[:DIRECTED]->(:Movie)


In [90]:
graph_db.query(
"""

MATCH (m:Movie)-[:WAS_RELEASED_IN]->(l:Language)
WHERE l.name = 'English' AND m.awards > 1 and m.imdbRating >= 7.0
RETURN m.title
LIMIT 3

"""


)

[{'m.title': 'Alive'}]

In [85]:
graph_db.query(
"""

MATCH (m:Movie)
WHERE m.imdbRating > 7
RETURN m.title
LIMIT 3

"""


)

[{'m.title': 'Raiders of the Lost Ark'},
 {'m.title': 'Road House'},
 {'m.title': 'The Blade'}]

In [89]:
df.head(15)

Unnamed: 0,movie_id,title,languages,awards,imdb_rating,directors,cast,genres,tagline,tagline_embedding
221,211,Raiders of the Lost Ark,English | German | Hebrew | Spanish | Arabic |...,32,8.6,Steven Spielberg,Harrison Ford | Karen Allen | Paul Freeman | R...,Action | Adventure,The movie 'Raiders of the Lost Ark' is a ('Act...,"[-0.04223256930708885, 0.12403502315282822, -0..."
25,25,Road House,English,0,7.3,Jean Negulesco,Ida Lupino | Cornel Wilde | Celeste Holm | Ric...,Action | Drama | Film-Noir,"The movie 'Road House' is a ('Action', 'Drama'...","[-0.04625450447201729, -0.10949589312076569, 0..."
1076,1036,The Recruit,English | Persian | Russian,0,6.6,Roger Donaldson,Al Pacino | Colin Farrell | Bridget Moynahan |...,Action | Crime | Drama,"The movie 'The Recruit' is a ('Action', 'Crime...","[-0.13712893426418304, -0.13582655787467957, -..."
625,605,The Blade,Cantonese,1,7.2,Hark Tsui,Wenzhuo Zhao | Xin Xin Xiong | Sonny Song | Va...,Drama | Action,"The movie 'The Blade' is a ('Drama', 'Action')...","[-0.10137008130550385, -0.14402838051319122, -..."
1310,1258,72 Meters,Russian | Ukrainian,0,6.7,Vladimir Khotinenko,Sergey Makovetskiy | Marat Basharov | Andrey K...,Drama | Action | Thriller,"The movie '72 Meters' is a ('Drama', 'Action',...","[0.03624854236841202, 0.06997304409742355, -0...."
1375,1322,Harsh Times,English | Spanish | Korean,0,7.0,David Ayer,Christian Bale | Freddy Rodrèguez | Eva Longor...,Action | Crime | Drama,"The movie 'Harsh Times' is a ('Action', 'Crime...","[-0.157694011926651, -0.03438868373632431, -0...."
747,722,Pardes,Hindi,3,7.0,Subhash Ghai,Shah Rukh Khan | Amrish Puri | Mahima Chaudhry...,Musical | Romance | Action,"The movie 'Pardes' is a ('Musical', 'Romance',...","[0.013346046209335327, 0.044293999671936035, -..."
530,513,Alive,English,2,7.0,Frank Marshall,Ethan Hawke | Vincent Spano | Josh Hamilton | ...,Action | Adventure | Biography,"The movie 'Alive' is a ('Action', 'Adventure',...","[-0.14148733019828796, 0.00758203212171793, -0..."
966,928,Proof of Life,English | Spanish | Russian | Italian | French,0,6.2,Taylor Hackford,Meg Ryan | Russell Crowe | David Morse | Pamel...,Action | Drama | Thriller,"The movie 'Proof of Life' is a ('Action', 'Dra...","[-0.08460134267807007, 0.02772543579339981, 0...."
1171,1129,Kopps,Swedish | English,5,6.8,Josef Fares,Fares Fares | Torkel Petersson | Gèran Ragners...,Action | Comedy,"The movie 'Kopps' is a ('Action', 'Comedy') fi...","[-0.05063479766249657, 0.05785154923796654, -0..."
