In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
from pyprojroot import here

from poorman_graphrag.docstore import build_network
from poorman_graphrag.index import GraphRAGIndex

index = GraphRAGIndex.load(here() / "data" / "deduplicated_index.json")

G = build_network(index, include_documents=False, include_chunks=False)

In [None]:
G.nodes(data=True)

In [None]:
import networkx as nx
import pandas as pd

pd.Series(nx.degree_centrality(G)).sort_values(ascending=False)

In [None]:
# hvnx.draw(G, layout='kamada_kawai')


In [None]:
# Get degree centrality of all nodes, but filter to just those that have `type='entity'`
degree_centrality = nx.degree_centrality(G)
pd.Series(degree_centrality).sort_values(ascending=False)
# Get connected components and filter for those with 3+ nodes

In [None]:
list(G.neighbors("1d8dab3f31bf096124e482d8517b7087968f7592fab489cc3dd060b16869cb4a"))

In [None]:
G.nodes["c975f75bafedae8871942db0027b6ff2330ce80fb77fd06e5292224bbfddeb5c"]

In [None]:
# Get connected components and filter for those with 3+ nodes
large_components = [
    component
    for component in nx.connected_components(G.to_undirected())
    if len(component) >= 3
]

# Get all nodes that are in large components
nodes_in_large_components = set().union(*large_components)

len(
    list(nx.connected_components(G.subgraph(nodes_in_large_components).to_undirected()))
)

In [None]:
# Use Louvain algorithm to detect communities in the subgraph of large components
# Import community detection algorithm from networkx.community
from networkx.algorithms.community import louvain_communities

# Get the subgraph of nodes in large components and convert to undirected
# since Louvain works on undirected graphs
subgraph = G.subgraph(nodes_in_large_components).to_undirected()

# Apply Louvain community detection
communities = louvain_communities(subgraph)

# Sort communities by size in descending order
communities = sorted(communities, key=len, reverse=True)
# communities

In [None]:
import llamabot as lmb
from pydantic import BaseModel, Field


class CommunitySummary(BaseModel):
    summary: str = Field(description="A summary of the community of nodes.")


community_summarizer = lmb.StructuredBot(
    model_name="gpt-4o",
    pydantic_model=CommunitySummary,
    system_prompt="You are a helpful assistant that summarizes communities of nodes in a graph.",  # noqa: E501
)

print(G.subgraph(communities[0]).edges(data=True))

In [None]:
@lmb.prompt("user")
def community_content(nodes: list, edges: list) -> str:
    """
    Here are the relations in the community:

    {% for edge in edges %}
    {{ edge[0] }} --{{ edge[2].get('relation_type', '') }}--> {{ edge[1] }}
    {% endfor %}

    Here are the nodes in the community:

    {% for node_id, node_data in nodes %}
    ({{ node_data.get('entity_type', '') }}) {{ node_data.get('name', '') }}: {{ node_data.get('summary', '') }}
    {% endfor %}
    """  # noqa: E501

In [None]:
from poorman_graphrag.communities import Communities, Community

communities_to_add = []

for community in communities:
    content = community_content(
        G.subgraph(community).nodes(data=True), G.subgraph(community).edges(data=True)
    )
    community_summary_response = community_summarizer(content)
    communities_to_add.append(
        Community(
            nodes=list(G.subgraph(community).nodes()),
            summary=community_summary_response.summary,
        )
    )

In [None]:
index.add_communities(Communities(communities=[c for c in communities_to_add]))

In [None]:
index.community_index

In [None]:
index.save(here() / "data" / "deduplicated_index_with_communities.json")

In [None]:
from pyprojroot import here

from poorman_graphrag.index import GraphRAGIndex

index = GraphRAGIndex.load(here() / "data" / "deduplicated_index_with_communities.json")

In [None]:
index.community_index

In [None]:
# print(G.subgraph(nodes_in_large_components).nodes(data=True))

In [None]:
# Get all entity nodes
entity_nodes = [
    node for node, attrs in G.nodes(data=True) if attrs.get("type") == "entity"
]
print(f"Found {len(entity_nodes)} entity nodes")
print("\nFirst few entity nodes and their attributes:")
for node in entity_nodes[:100]:
    print(f"\nNode: {node}")
    print(f"Attributes: {G.nodes[node]}")

In [None]:
import json

from IPython.display import HTML

# Convert graph to JSON format that d3.js can understand
nodes = []
links = []

# Add nodes
for node, attrs in G.nodes(data=True):
    nodes.append(
        {
            "id": node,
            "type": attrs.get("type", ""),
            "name": attrs.get("name", ""),
            "entity_type": attrs.get("entity_type", ""),
        }
    )

# Add edges
for source, target, data in G.edges(data=True):
    links.append({"source": source, "target": target, "type": data.get("type", "")})

graph_data = {"nodes": nodes, "links": links}

# Create HTML with embedded d3.js visualization
html = """
<div id="graph"></div>
<script src="https://d3js.org/d3.v7.min.js"></script>
<script>
const data = %s;

const width = 800;
const height = 600;

const svg = d3.select("#graph")
    .append("svg")
    .attr("width", width)
    .attr("height", height);

// Create force simulation
const simulation = d3.forceSimulation(data.nodes)
    .force("link", d3.forceLink(data.links).id(d => d.id))
    .force("charge", d3.forceManyBody().strength(-100))
    .force("center", d3.forceCenter(width / 2, height / 2));

// Add links
const link = svg.append("g")
    .selectAll("line")
    .data(data.links)
    .join("line")
    .attr("stroke", "#999")
    .attr("stroke-opacity", 0.6);

// Add nodes
const node = svg.append("g")
    .selectAll("circle")
    .data(data.nodes)
    .join("circle")
    .attr("r", 5)
    .attr("fill", d => {
        switch(d.type) {
            case "document": return "#1f77b4";
            case "chunk": return "#ff7f0e";
            case "entity": return "#2ca02c";
            default: return "#d62728";
        }
    });

// Add node labels
const label = svg.append("g")
    .selectAll("text")
    .data(data.nodes)
    .join("text")
    .text(d => d.name || d.id.slice(0,8))
    .attr("font-size", "8px")
    .attr("dx", 8)
    .attr("dy", ".35em");

// Add tooltips
node.append("title")
    .text(d => `${d.type}\\n${d.name || d.id}`);

// Update positions on each tick
simulation.on("tick", () => {
    link
        .attr("x1", d => d.source.x)
        .attr("y1", d => d.source.y)
        .attr("x2", d => d.target.x)
        .attr("y2", d => d.target.y);

    node
        .attr("cx", d => d.x)
        .attr("cy", d => d.y);

    label
        .attr("x", d => d.x)
        .attr("y", d => d.y);
});

// Add drag behavior
node.call(d3.drag()
    .on("start", dragstarted)
    .on("drag", dragged)
    .on("end", dragended));

function dragstarted(event) {
    if (!event.active) simulation.alphaTarget(0.3).restart();
    event.subject.fx = event.subject.x;
    event.subject.fy = event.subject.y;
}

function dragged(event) {
    event.subject.fx = event.x;
    event.subject.fy = event.y;
}

function dragended(event) {
    if (!event.active) simulation.alphaTarget(0);
    event.subject.fx = null;
    event.subject.fy = null;
}
</script>
""" % json.dumps(graph_data)

HTML(html)