In [None]:
%pip install neo4j graphistry[bolt]

In [None]:
import os
import warnings
import neo4j
from neo4j import GraphDatabase, Driver
import graphistry
import pandas as pd


warnings.filterwarnings("ignore")

NEO4J_URI = "bolt://neo4j.neo4j.svc.cluster.local:7687"
NEO4J_PASSWORD = os.getenv("NEO4J_PASSWORD")
NEO4J_DATABASE = 'neo4j'
NEO4J_CREDS = {'uri': NEO4J_URI, 'auth': ('neo4j', NEO4J_PASSWORD)}

print('neo4j', neo4j.__version__)
print('graphistry', graphistry.__version__)

In [None]:
# Function to convert Neo4j results to a Pandas DataFrame
def neo4j_results_to_df(results):
    return pd.DataFrame([dict(record) for record in results])

In [None]:
driver = GraphDatabase.driver(**NEO4J_CREDS)

# Execute Cypher query and fetch results
cypher_query = """
MATCH (n)-[r]->(m) 
RETURN n, r, m
"""
with driver.session() as session:
    results = session.run(cypher_query)
    df = neo4j_results_to_df(results)

# Now you have a DataFrame `df` with your results
#print(df.head())
#df.rename(columns={'n': 'node_n', 'r': 'relationship_r', 'm': 'node_m'}, inplace=True)

# Create an empty DataFrame for nodes
nodes_list = []
# Create an empty DataFrame for edges
edges_list = []

node_id_counter = 0
node_mapping = {}

for index, row in df.iterrows():
    # Process node n
    if str(row['n']) not in node_mapping:
        node_mapping[str(row['n'])] = node_id_counter
        nodes_list.append({'id': node_id_counter, 'properties': row['n']})
        node_id_counter += 1
    n_id = node_mapping[str(row['n'])]
    
    # Process node m
    if str(row['m']) not in node_mapping:
        node_mapping[str(row['m'])] = node_id_counter
        nodes_list.append({'id': node_id_counter, 'properties': row['m']})
        node_id_counter += 1
    m_id = node_mapping[str(row['m'])]
    
    # Add edge
    edges_list.append({'source': n_id, 'target': m_id, 'properties': row['r']})

nodes_df = pd.DataFrame(nodes_list)
edges_df = pd.DataFrame(edges_list)

print(nodes_df.head())
print(edges_df.head())


In [78]:
# Expand the 'properties' column into separate columns
#nodes_df = pd.concat([nodes_df.drop(['properties'], axis=1), nodes_df['properties'].apply(pd.Series)], axis=1)
#edges_df = pd.concat([edges_df.drop(['properties'], axis=1), edges_df['properties'].apply(pd.Series)], axis=1)

print("Nodes count:", len(nodes_df))
print("Edges count:", len(edges_df))

#nodes_df['properties'] = nodes_df['properties'].apply(lambda x: str(x))
#edges_df['properties'] = edges_df['properties'].apply(lambda x: str(x))

edges_df['source'] = edges_df['source'].astype(str)
edges_df['target'] = edges_df['target'].astype(str)
nodes_df['id'] = nodes_df['id'].astype(str)

# Bind your nodes and edges to the Graphistry plotter
# plotter = graphistry.bind(source="source", destination="target", node='id').nodes(nodes_df).edges(edges_df)
plotter = graphistry.bind(source="source", destination="target", node='id')

plotter = plotter.bind(point_title='name')

plotter = plotter.bind(edge_title='type', edge_weight='weight')

plotter = plotter.nodes(nodes_df).edges(edges_df)


# If you expanded the properties into separate columns, you can bind them here for more detailed tooltips
# For example, if you have a 'name' column in nodes_df:
# plotter = plotter.bind(node='id', point_title='name')

# Visualize!
plotter.plot()

Failed memoization speedup attempt due to Pandas internal hash function failing. Continuing without memoization speedups.This is fine, but for speedups around skipping re-uploads of previously seen tables, try identifying which columns have types that Pandas cannot hash, and convert them to hashable types like strings.

   id      name                                          embedding platform  \
0   0      leku                                                NaN      NaN   
1   1       NaN  [0.00042079141712747514, 0.015910571441054344,...      IRC   
2   2  #𝓉𝓌𝑒𝓇𝓀𝒾𝓃                                                NaN      NaN   
3   3  chasebot                                                NaN      NaN   
4   4       NaN  [0.00042079141712747514, 0.015910571441054344,...      IRC   

                            content                         timestamp  
0                               NaN                               NaN  
1  chasebot: whats going onasdfadsf  2024-03-31T15:47:43.883680+00:00  
2                               NaN                               NaN  
3                               NaN                               NaN  
4                        bugs fixed  2024-03-31T15:49:20.825651+00:00  
   source  target  weight       type
0       0       1     NaN        NaN
1       1       2  

In [None]:
graphistry.register(
    bolt=driver,
    personal_key_id='MFX7K832K8', 
    personal_key_secret=os.getenv('GRAPHISTRY_API_KEY')) 

In [None]:
graphistry.cypher("CALL db.schema.visualization()").plot()

In [None]:
# rename graphistry nodes and relationships
#print(graphistry.nodes())

try:
    #result = graphistry.cypher("MATCH (n)-[r]->(m) RETURN n, r, m LIMIT 25")
    result = graphistry.cypher("""
        MATCH (n)-[r]->(m) RETURN n.name AS nodeName, r.type AS relationshipType, m.name AS mName LIMIT 25
    """)

    print(result)  # Or inspect the structure/type of result

    # Example: Convert a problematic 'embedding' column to a string representation
    if 'embedding' in result['nodes'].columns:
        result['nodes']['embedding'] = result['nodes']['embedding'].apply(lambda x: str(x))

    result.plot()
except Exception as e:
    print(f"Error: {e}")