In [1]:
import os

import pandas as pd
import tiktoken

from graphrag.query.indexer_adapters import (
    read_indexer_communities,
    read_indexer_entities,
    read_indexer_reports,
)
from graphrag.query.llm.oai.chat_openai import ChatOpenAI
from graphrag.query.llm.oai.typing import OpenaiApiType
from graphrag.query.structured_search.global_search.community_context import (
    GlobalCommunityContext,
)
from graphrag.query.structured_search.global_search.search import GlobalSearch

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
api_key = "sk-proj-xtjiZv7zhvKo_vCNLAqsuAy7LGEUf0MkadEnLkQqblBSWtXlLhlOdR1BIrcm5G6Kzk9Y1MWfmHT3BlbkFJAW0ctApfiRXazPzI43QxxXxR-0sYgXDy8MgKosSYC-scDqCk3cJwD39K3hq7KXoXYvD9bw-SwA"
llm_model = "gpt-3.5-turbo"

llm = ChatOpenAI(
    api_key=api_key,
    model=llm_model,
    api_type=OpenaiApiType.OpenAI,  # OpenaiApiType.OpenAI or OpenaiApiType.AzureOpenAI
    max_retries=20,
)

token_encoder = tiktoken.encoding_for_model(llm_model)

In [10]:
# parquet files generated from indexing pipeline
INPUT_DIR = "/home/Nema/UniCRS_GraphRAG/GraphRAG/output/successful_20250129-110435/artifacts/"
COMMUNITY_TABLE = "create_final_communities"
COMMUNITY_REPORT_TABLE = "create_final_community_reports"
ENTITY_TABLE = "create_final_entities"
NODES_TABLE = "create_final_nodes"

# community level in the Leiden community hierarchy from which we will load the community reports
# higher value means we use reports from more fine-grained communities (at the cost of higher computation cost)
COMMUNITY_LEVEL = 2

In [14]:
community_df = pd.read_parquet(f"{INPUT_DIR}/{COMMUNITY_TABLE}.parquet")
entity_df = pd.read_parquet(f"{INPUT_DIR}/{ENTITY_TABLE}.parquet")
report_df = pd.read_parquet(f"{INPUT_DIR}/{COMMUNITY_REPORT_TABLE}.parquet")
nodes_df = pd.read_parquet(f"{INPUT_DIR}/{NODES_TABLE}.parquet")

communities = read_indexer_communities(community_df, nodes_df, report_df)
reports = read_indexer_reports(report_df, community_df, COMMUNITY_LEVEL)
entities = read_indexer_entities(nodes_df, community_df, COMMUNITY_LEVEL)

print(f"Total report count: {len(report_df)}")
print(
    f"Report count after filtering by community level {COMMUNITY_LEVEL}: {len(reports)}"
)

report_df.head()

Total report count: 2228
Report count after filtering by community level 2: 1341


Unnamed: 0,id,human_readable_id,community,parent,level,title,summary,full_content,rank,rank_explanation,findings,full_content_json,period,size
0,a6d11470f41f4ce0857ee2c9ec2f9a70,2200,2200,1618,4,Star Wars (1977) and George Lucas' Impact,This community revolves around the iconic movi...,# Star Wars (1977) and George Lucas' Impact\n\...,9.0,The impact severity rating is high due to the ...,[{'explanation': 'Star Wars (1977) stands out ...,"{\n ""title"": ""Star Wars (1977) and George L...",2025-01-29,4
1,aa3e76b5eb8a4cbaa7ed616a86ed9700,2201,2201,1618,4,Exploring Robots in Cinema: Bicentennial Man (...,This community delves into the portrayal of ro...,# Exploring Robots in Cinema: Bicentennial Man...,7.5,The impact severity rating reflects the cultur...,[{'explanation': 'Bicentennial Man (1999) stan...,"{\n ""title"": ""Exploring Robots in Cinema: B...",2025-01-29,3
2,bbdd83641cd24effa306de3ca7bf2c87,2202,2202,1618,4,Star Wars Saga and George Lucas' Impact,This community revolves around the iconic Star...,# Star Wars Saga and George Lucas' Impact\n\nT...,9.0,The impact severity rating is high due to the ...,[{'explanation': 'The Empire Strikes Back (198...,"{\n ""title"": ""Star Wars Saga and George Luc...",2025-01-29,4
3,deb2fc15b3134a3b81994c8b7034a630,2203,2203,1622,4,Star Wars Franchise and Its Impact on Sci-Fi C...,This community revolves around the iconic Star...,# Star Wars Franchise and Its Impact on Sci-Fi...,9.2,The impact severity rating is high due to the ...,[{'explanation': 'The Empire Strikes Back is a...,"{\n ""title"": ""Star Wars Franchise and Its I...",2025-01-29,8
4,e1ad5a88df374a7fb0d4b37cc717189b,2204,2204,1622,4,Let It Go and Its Movie Community Connections,This community revolves around the song Let It...,# Let It Go and Its Movie Community Connection...,7.5,The impact severity rating reflects the signif...,[{'explanation': 'Let It Go serves as the foca...,"{\n ""title"": ""Let It Go and Its Movie Commu...",2025-01-29,2


In [15]:
context_builder = GlobalCommunityContext(
    community_reports=reports,
    communities=communities,
    entities=entities,  # default to None if you don't want to use community weights for ranking
    token_encoder=token_encoder,
)

In [16]:
context_builder_params = {
    "use_community_summary": False,  # False means using full community reports. True means using community short summaries.
    "shuffle_data": True,
    "include_community_rank": True,
    "min_community_rank": 0,
    "community_rank_name": "rank",
    "include_community_weight": True,
    "community_weight_name": "occurrence weight",
    "normalize_community_weight": True,
    "max_tokens": 12_000,  # change this based on the token limit you have on your model (if you are using a model with 8k limit, a good setting could be 5000)
    "context_name": "Reports",
}

map_llm_params = {
    "max_tokens": 1000,
    "temperature": 0.0,
    "response_format": {"type": "json_object"},
}

reduce_llm_params = {
    "max_tokens": 2000,  # change this based on the token limit you have on your model (if you are using a model with 8k limit, a good setting could be 1000-1500)
    "temperature": 0.0,
}

In [17]:
search_engine = GlobalSearch(
    llm=llm,
    context_builder=context_builder,
    token_encoder=token_encoder,
    max_data_tokens=12_000,  # change this based on the token limit you have on your model (if you are using a model with 8k limit, a good setting could be 5000)
    map_llm_params=map_llm_params,
    reduce_llm_params=reduce_llm_params,
    allow_general_knowledge=False,  # set this to True will add instruction to encourage the LLM to incorporate general knowledge in the response, which may increase hallucinations, but could be useful in some use cases.
    json_mode=True,  # set this to False if your LLM model does not support JSON mode.
    context_builder_params=context_builder_params,
    concurrent_coroutines=32,
    response_type="multiple paragraphs",  # free form text describing the response type and format, can be anything, e.g. prioritized list, single paragraph, multiple paragraphs, multiple-page report
)

In [18]:
query = r"Can you get me recommendations for this dialogue? DIALOGUE -> System: helloUser: Hello! I want to go see a movie with some junior high kids this weekend. Rated PG is good, maybe a PG13. No rated R. There is a mix of boys and girls. I think most already saw A Wrinkle in Time and liked it.System: how about The Strangers: Prey at Night (2018) it is a good horror movieUser: What rating is it?System: Red Sparrow (2018) is also the sameUser: I want something for 10 to 13 year olds. Not a rated R Again, Red Sparrow (2018) is a rated R.System: ohhh sorry! how about Gigantic (2018)? i hope you would watch thisUser: What is that about?System: It is Animation movieUser: It doesn't seem to be playing in my area, I can't seem to find anything about it. . The Prestige (2006) is a fun one too.System: Okay well I will definitely look it into that one, are there any other movies? I love The Prestige (2006) with Hugh Jackman! I love The Illusionist (2010) because it reminds me of it as well but betterUser: Oh, one of my favorite movies that not many people have heard of is Lucky Number Slevin (2006)System: I have heard of it but have never seen that movie, what is that one about?User: It has Bruce Willis, Morgan Freeman, and Josh Hartnett. It's about a young guy who gets caught in the middle of a fight between two high level crime families. There's lots of things going on, so it's a little hard to describe.System: I love Morgan Freeman, but it sounds very good, I think I will check that one out it sounds pretty good! Thank you so much! System: Hi there, what type of movies can I recommend to you?User: I like horror movies like The Babadook (2014)System: so you like horror movies with a bit of humor? Have you ever seen Scream (1996)? or Scary Movie (2000)User: i have not seen Scream (1996) is it more horror or more funny?System: It is more horror, but personally, I find the gor a bit on the silly side. Scary Movie (2000) is funny/scary If you are looking for something more scary, have you seen Oculus (2013)? System: Hi how are you tonightUser: Hello. I am good and how are you?System: I'm okay. I'm looking for a movie for when me and my friends go out this weekend.User: Bad Moms (2016) is a good one. Mean Girls (2004) was a great chick flick.System: Oh I bet it is a good one for me and my friends to see. We did see How to Lose a Guy in 10 Days (2003) last week and laughed all the way through it.User: I haven't seen that one. I will check it out.System: We will try to see Bad Moms but is there anything newer?'= System: hiUser: Hey.System: What are you looking for?User: Can you recommend a good animated movie? I liked Wreck-It Ralph (2012) and Inside Out (2015).System: I haven't seen Finding Nemo (2003) but I heard it is good.User: I've seen that one. It's really good.System: I haven't seen Wreck-It Ralph (2012) or Inside Out (2015), but they sound good.User: They're both good movies.System: I loved Aladdin (1992). Robin Williams was great. I enjoy the funny animated ones. Toy Story (1995) was good too and very funny. System: Hi what kind of movie were you thinking about watching today System: Hi there.User: HI! I need a good laugh. Any good comedies to watch?System: Okay, have you seen Groundhog Day (1993)? It's an older one that's really good.User: Believe it or not I have not seen that. lol the last one I saw in a theater was A Bad Moms Christmas (2017)System: Or how about Ghostbusters (1984), which is also a classic.User: I did see that one and it was good. I have not seen the newer Ghostbusters (2016). System: Hello! System: Good evening. What kind of movie are you interested in this evening?User: hello I am interested in action movies such as Need for Speed (2014), 2 Fast 2 Furious (2003)" 

In [20]:
result = await search_engine.asearch(query)

print(result.response)

### Movie Recommendations for Junior High Kids

Based on the dialogue provided, the system initially recommended movies like *The Strangers: Prey at Night (2018)* and *Red Sparrow (2018)*, which are both rated R, not suitable for junior high kids seeking PG or PG-13 movies [Data: Reports (51, 1029, 60, 636, 60, 636, 60, 636, 60, 636)]. The user clarified the need for movies suitable for 10 to 13-year-olds, leading to the suggestion of *Gigantic (2018)*, an animated movie, as a more appropriate option [Data: Reports (51, 1029, 60, 636, 60, 636, 60, 636, 60, 636]. However, the user couldn't find information about *Gigantic (2018)* and mentioned enjoying *The Prestige (2006)* as an alternative [Data: Reports (51, 1029, 60, 636, 60, 636, 60, 636, 60, 636]. The user also recommended *Lucky Number Slevin (2006)*, a lesser-known favorite involving high-level crime families, which piqued the system's interest [Data: Reports (51, 1029, 60, 636, 60, 636, 60, 636, 60, 636].

### Horror Movie Reco

In [21]:
# inspect number of LLM calls and tokens
print(
    f"LLM calls: {result.llm_calls}. Prompt tokens: {result.prompt_tokens}. Output tokens: {result.output_tokens}."
)

LLM calls: 55. Prompt tokens: 691717. Output tokens: 23902.


In [22]:
print(result)

GlobalSearchResult(response="### Movie Recommendations for Junior High Kids\n\nBased on the dialogue provided, the system initially recommended movies like *The Strangers: Prey at Night (2018)* and *Red Sparrow (2018)*, which are both rated R, not suitable for junior high kids seeking PG or PG-13 movies [Data: Reports (51, 1029, 60, 636, 60, 636, 60, 636, 60, 636)]. The user clarified the need for movies suitable for 10 to 13-year-olds, leading to the suggestion of *Gigantic (2018)*, an animated movie, as a more appropriate option [Data: Reports (51, 1029, 60, 636, 60, 636, 60, 636, 60, 636]. However, the user couldn't find information about *Gigantic (2018)* and mentioned enjoying *The Prestige (2006)* as an alternative [Data: Reports (51, 1029, 60, 636, 60, 636, 60, 636, 60, 636]. The user also recommended *Lucky Number Slevin (2006)*, a lesser-known favorite involving high-level crime families, which piqued the system's interest [Data: Reports (51, 1029, 60, 636, 60, 636, 60, 636, 6