In [9]:
import dotenv

dotenv.load_dotenv()

import os
import sys
import logging

logging.basicConfig(stream=sys.stdout, level=logging.INFO)

from langchain_community.graphs import FalkorDBGraph
import pandas as pd

import random

In [2]:
graph = FalkorDBGraph(database="imdb")
filename = os.path.join(os.getcwd(), "imdb_top_1000.csv")
data = pd.read_csv(filename)

In [3]:
# Define a function to clean up each column
def clean_column(column_data):
    return column_data.apply(lambda x: x.replace("'", "\\'") if pd.notnull(x) else "NA")


# Assume 'data' is already defined and loaded with pd.read_csv()

# Apply the cleanup function to all necessary columns
data["Series_Title"] = clean_column(data["Series_Title"])
data["Director"] = clean_column(data["Director"])
data["Certificate"] = clean_column(data["Certificate"])
data["Runtime"] = clean_column(data["Runtime"])
data["Genre"] = clean_column(data["Genre"])
data["IMDB_Rating"] = data["IMDB_Rating"].fillna("NA")
data["Released_Year"] = pd.to_numeric(data["Released_Year"], errors="coerce").fillna(-1)

# Clean the star columns
star_columns = ["Star1", "Star2", "Star3", "Star4"]
for column in star_columns:
    data[column] = clean_column(data[column])

# Now, iterate through the DataFrame and construct the graph
for index, row in data.iterrows():
    # Directly use the cleaned data
    movie_title = row["Series_Title"]
    director_name = row["Director"]
    certificate = row["Certificate"]
    runtime = row["Runtime"]
    genre = row["Genre"]
    imdb_rating = row["IMDB_Rating"]
    released_year = int(row["Released_Year"]) if row["Released_Year"] != -1 else "NA"

    # Skip the row if the year is invalid
    if released_year == "NA":
        print(f"Skipping row {index} due to invalid year.")
        continue

    # Create or merge the Movie node
    movie_node = (
        f"MERGE (m:Movie {{"
        f"title: '{movie_title}', "
        f"year: {released_year}, "
        f"certificate: '{certificate}', "
        f"runtime: '{runtime}', "
        f"genre: '{genre}', "
        f"imdb_rating: {imdb_rating}"
        f"}})"
    )

    # Create or merge the Director node
    director_node = f"MERGE (d:Director {{name: '{director_name}'}})"
    directed_by_relation = f"MERGE (d)-[:DIRECTED]->(m)"

    # Create or merge the Star nodes and their relationships
    star_nodes = ""
    starred_in_relations = ""
    for s, star in enumerate(star_columns, start=1):
        star_name = row[star]
        if star_name != "NA":
            star_nodes += f"MERGE (s{s}:Star {{name: '{star_name}'}})\n"
            starred_in_relations += f"MERGE (s{s})-[:STARRED_IN]->(m)\n"

    cypher_query = (
        f"{movie_node} {director_node} {directed_by_relation}\n"
        f"{star_nodes.strip()}\n{starred_in_relations.strip()}"
    )
    # Print the Cypher query
    print(cypher_query)
    graph.query(cypher_query)

# It might be better to refresh the schema once after all operations to optimize performance
graph.refresh_schema()
print("Added graph documents to FalkorDB")

MERGE (m:Movie {title: 'The Shawshank Redemption', year: 1994, certificate: 'A', runtime: '142 min', genre: 'Drama', imdb_rating: 9.3}) MERGE (d:Director {name: 'Frank Darabont'}) MERGE (d)-[:DIRECTED]->(m)
MERGE (s1:Star {name: 'Tim Robbins'})
MERGE (s2:Star {name: 'Morgan Freeman'})
MERGE (s3:Star {name: 'Bob Gunton'})
MERGE (s4:Star {name: 'William Sadler'})
MERGE (s1)-[:STARRED_IN]->(m)
MERGE (s2)-[:STARRED_IN]->(m)
MERGE (s3)-[:STARRED_IN]->(m)
MERGE (s4)-[:STARRED_IN]->(m)
MERGE (m:Movie {title: 'The Godfather', year: 1972, certificate: 'A', runtime: '175 min', genre: 'Crime, Drama', imdb_rating: 9.2}) MERGE (d:Director {name: 'Francis Ford Coppola'}) MERGE (d)-[:DIRECTED]->(m)
MERGE (s1:Star {name: 'Marlon Brando'})
MERGE (s2:Star {name: 'Al Pacino'})
MERGE (s3:Star {name: 'James Caan'})
MERGE (s4:Star {name: 'Diane Keaton'})
MERGE (s1)-[:STARRED_IN]->(m)
MERGE (s2)-[:STARRED_IN]->(m)
MERGE (s3)-[:STARRED_IN]->(m)
MERGE (s4)-[:STARRED_IN]->(m)
MERGE (m:Movie {title: 'The Dark Kn

In [4]:
# Assume 'data' is already loaded with pd.read_csv()
# movie_names = data['Series_Title'].apply(lambda x: x.replace("'", "\\'")).tolist()
movie_names = data["Series_Title"].tolist()

# Demographic data ranges and options
birth_years_range = (1950, 2005)
genders = ["Male", "Female", "Other"]
locations = ["USA", "UK", "Canada", "Australia", "France", "Germany", "India", "Japan"]

num_persons = 100
num_data_points = 1000

opinions = ["liked", "loved", "ignored", "disliked", "hated"]

# First creating Person Nodes

for i in range(1, num_persons + 1):
    person_name = f"Person {i}"
    birth_year = random.randint(*birth_years_range)
    gender = random.choice(genders)
    location = random.choice(locations)

    person_node = f"CREATE (:Person {{name: '{person_name}', birthYear: {birth_year}, gender: '{gender}', location: '{location}'}})"
    graph.query(person_node)

# Then Matching Person nodes with Movie nodes at random
for _ in range(num_data_points):
    person_index = random.randint(1, num_persons)
    person_name = f"Person {person_index}"

    opinion_edge = random.choice(opinions).upper()
    movie_name = random.choice(movie_names)

    opinion_query = f"MATCH (m:Movie {{title: '{movie_name}'}}), (p:Person {{name: '{person_name}'}})"
    opinion_query += f" MERGE (p)-[:{opinion_edge}]->(m)"

    print(opinion_query)
    graph.query(opinion_query)


graph.refresh_schema()
print("Added graph documents to FalkorDB")

MATCH (m:Movie {title: 'Sweet Smell of Success'}), (p:Person {name: 'Person 42'}) MERGE (p)-[:LIKED]->(m)
MATCH (m:Movie {title: 'Eternal Sunshine of the Spotless Mind'}), (p:Person {name: 'Person 19'}) MERGE (p)-[:LIKED]->(m)
MATCH (m:Movie {title: 'The Blues Brothers'}), (p:Person {name: 'Person 8'}) MERGE (p)-[:IGNORED]->(m)
MATCH (m:Movie {title: 'Mad Max: Fury Road'}), (p:Person {name: 'Person 35'}) MERGE (p)-[:LIKED]->(m)
MATCH (m:Movie {title: 'Knives Out'}), (p:Person {name: 'Person 16'}) MERGE (p)-[:HATED]->(m)
MATCH (m:Movie {title: 'Bringing Up Baby'}), (p:Person {name: 'Person 85'}) MERGE (p)-[:LOVED]->(m)
MATCH (m:Movie {title: 'Spider-Man: Into the Spider-Verse'}), (p:Person {name: 'Person 26'}) MERGE (p)-[:LIKED]->(m)
MATCH (m:Movie {title: 'Moonrise Kingdom'}), (p:Person {name: 'Person 52'}) MERGE (p)-[:IGNORED]->(m)
MATCH (m:Movie {title: 'In the Heat of the Night'}), (p:Person {name: 'Person 1'}) MERGE (p)-[:LIKED]->(m)
MATCH (m:Movie {title: 'Coco'}), (p:Person {name

In [16]:
from langchain_community.chat_models import ChatZhipuAI
from langchain.chains import FalkorDBQAChain

llm = ChatZhipuAI(
    temperature=0,
    zhipuai_api_key=os.getenv("ZHIPUAI_API_KEY"),
    model_name="GLM-4-Flash",
)

chain = FalkorDBQAChain.from_llm(llm, graph=graph, verbose=True)
out1 = chain.run("Which movies did Christopher Nolan direct?")
print(out1)



[1m> Entering new FalkorDBQAChain chain...[0m
INFO:httpx:HTTP Request: POST https://open.bigmodel.cn/api/paas/v4/chat/completions "HTTP/1.1 200 OK"
Generated Cypher:
[32;1m[1;3mMATCH (d:Director {name: 'Christopher Nolan'})-[:DIRECTED]->(m:Movie)
RETURN m.title[0m
Full Context:
[32;1m[1;3m[['The Dark Knight'], ['Inception'], ['Interstellar'], ['The Prestige'], ['The Dark Knight Rises'], ['Memento'], ['Batman Begins'], ['Dunkirk']][0m
INFO:httpx:HTTP Request: POST https://open.bigmodel.cn/api/paas/v4/chat/completions "HTTP/1.1 200 OK"

[1m> Finished chain.[0m
Christopher Nolan directed "The Dark Knight," "Inception," "Interstellar," "The Prestige," "The Dark Knight Rises," "Memento," "Batman Begins," and "Dunkirk."


In [17]:
out = chain.run("Who starred in Lifeboat?")
print(out)



[1m> Entering new FalkorDBQAChain chain...[0m
INFO:httpx:HTTP Request: POST https://open.bigmodel.cn/api/paas/v4/chat/completions "HTTP/1.1 200 OK"
Generated Cypher:
[32;1m[1;3mMATCH (m:Movie {title: 'Lifeboat'})<-[:STARRED_IN]-(s:Star)
RETURN s.name[0m
Full Context:
[32;1m[1;3m[['Tallulah Bankhead'], ['John Hodiak'], ['Walter Slezak'], ['William Bendix']][0m
INFO:httpx:HTTP Request: POST https://open.bigmodel.cn/api/paas/v4/chat/completions "HTTP/1.1 200 OK"

[1m> Finished chain.[0m
Tallulah Bankhead, John Hodiak, Walter Slezak, and William Bendix starred in the film Lifeboat.


In [18]:
out1 = chain.run("Which movies did person born in year 1975 like?")
print(out1)
out2 = chain.run("Which movies should person born in year 1967 watch?")
print(out2)



[1m> Entering new FalkorDBQAChain chain...[0m
INFO:httpx:HTTP Request: POST https://open.bigmodel.cn/api/paas/v4/chat/completions "HTTP/1.1 200 OK"
Generated Cypher:
[32;1m[1;3mMATCH (p:Person)-[:LIKED]->(m:Movie) WHERE p.birthYear = 1975 RETURN m.title[0m
Full Context:
[32;1m[1;3m[['Psycho'], ['Bajrangi Bhaijaan'], ['Hauru no ugoku shiro'], ['Hannah and Her Sisters'], ['Notorious'], ['In America'], ['A Bronx Tale'], ["God's Own Country"], ['The Butterfly Effect'], ['Little Women']][0m
INFO:httpx:HTTP Request: POST https://open.bigmodel.cn/api/paas/v4/chat/completions "HTTP/1.1 200 OK"

[1m> Finished chain.[0m
The movies that a person born in 1975 might have liked include "Psycho," "Bajrangi Bhaijaan," "Hauru no ugoku shiro," "Hannah and Her Sisters," "Notorious," "In America," "A Bronx Tale," "God's Own Country," "The Butterfly Effect," and "Little Women." These films span various genres and styles, so the preferences of an individual born in 1975 would depend on their per

In [19]:
import gradio as gr

# Assuming your FalkorDBQAChain setup code is correctly initialized here


def ask_question(question):
    # Your function to run the question through the chain and return the output
    output = chain.run(question)
    return output


# Corrected Gradio interface setup
iface = gr.Interface(
    fn=ask_question,
    inputs=gr.Textbox(lines=2, placeholder="Enter your question here..."),
    outputs=gr.Textbox(lines=10, label="Output"),  # Increased the lines for the output
    title="FalkorDB QA System",
    description="Ask any question related to the movie database.",
)

# Launch the Gradio app
iface.launch(share=True)

Running on local URL:  http://127.0.0.1:7860
INFO:httpx:HTTP Request: GET http://127.0.0.1:7860/startup-events "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: HEAD http://127.0.0.1:7860/ "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: GET https://checkip.amazonaws.com/ "HTTP/1.1 200 "
INFO:httpx:HTTP Request: GET https://api.gradio.app/gradio-messaging/en "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: GET https://api.gradio.app/v2/tunnel-request "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: GET https://cdn-media.huggingface.co/frpc-gradio-0.2/frpc_linux_amd64 "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: GET https://api.gradio.app/pkg-version "HTTP/1.1 200 OK"
Running on public URL: https://56b23a9c510d9ee36b.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)
INFO:httpx:HTTP Request: HEAD https://56b23a9c510d9ee36b.gradio.live "HTTP/1.1 200 OK"






[1m> Entering new FalkorDBQAChain chain...[0m
INFO:httpx:HTTP Request: POST https://open.bigmodel.cn/api/paas/v4/chat/completions "HTTP/1.1 200 OK"
Generated Cypher:
[32;1m[1;3mMATCH (p:Person)-[:LIKED]->(m:Movie) WHERE p.birthYear = 1975 RETURN m.title[0m
Full Context:
[32;1m[1;3m[['Psycho'], ['Bajrangi Bhaijaan'], ['Hauru no ugoku shiro'], ['Hannah and Her Sisters'], ['Notorious'], ['In America'], ['A Bronx Tale'], ["God's Own Country"], ['The Butterfly Effect'], ['Little Women']][0m
INFO:httpx:HTTP Request: POST https://open.bigmodel.cn/api/paas/v4/chat/completions "HTTP/1.1 200 OK"

[1m> Finished chain.[0m


[1m> Entering new FalkorDBQAChain chain...[0m
INFO:httpx:HTTP Request: POST https://open.bigmodel.cn/api/paas/v4/chat/completions "HTTP/1.1 200 OK"
Generated Cypher:
[32;1m[1;3mMATCH (p:Person)-[:LIKED]->(m:Movie) RETURN p.name, m.title[0m
Full Context:
[32;1m[1;3m[['Person 1', 'Nuovo Cinema Paradiso'], ['Person 1', 'Drishyam'], ['Person 1', 'Drishyam'], ['Pe