In [13]:
%pip install --upgrade --quiet langchain langchain-community langchain-experimental


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.2.1[0m[39;49m -> [0m[32;49m24.3.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [14]:
import os
import re
import requests
import html
import networkx as nx
import plotly.graph_objects as go
import numpy as np
# langchains
from langchain_community.chat_models import ChatOllama
from langchain_experimental.graph_transformers import LLMGraphTransformer
from langchain.docstore.document import Document

In [15]:
def get_json_request(url):
  return requests.get(url).json()

def strip_html(text):
  """ remove HTML tags from a string """
  if not isinstance(text, str):
    return ""
  clean = re.compile("<.*?>")
  return re.sub(clean, "", text)

def preprocess_events(events):
  """ construct dictionary from event data """
  return [
    {
      "title": event["title"],
      "group_title": event["group_title"],
      "url": event["url"],
      "description": strip_html(event["description"]),
      "date": event["date"],
      "date_time": event["date_time"],
      "location": event["location"],
      "location_title": event["location_title"],
      "location_latitude": float(event["location_latitude"]) if event["location_latitude"] != None else 0,
      "location_longitude": float(event["location_longitude"]) if event["location_longitude"] != None else 0,
      "cost": event["cost"],
      "thumbnail": event["thumbnail"],
      "event_types": event["event_types"],
      "event_types_audience": event["event_types_audience"],
    }
    for event in events
  ]

def transform_event_to_sentence(event):
  # extract fields from the event record
  title = event.get("title", None)
  group_title = event.get("group_title", None)
  date = event.get("date", None)
  date_time = event.get("date_time", None)
  location = event.get("location", None)
  description = event.get("description", "").strip()
  location_title = event.get("location_title", None)
  cost = event.get("cost", None)
  event_types = event.get("event_types", None)
  event_types_audience = event.get("event_types_audience", None)
  url = event.get("url", None)

  sentence = ""
  sentence += f"The event titled '{title}' " if title else "The event with no title "
  sentence += f"is organized by {group_title} " if group_title else ""
  sentence += f"and is scheduled to take place on {date}." if date else ""
  sentence += f" At {date_time}." if date_time else ""
  sentence += f" The event will be held at {location}." if location else ""
  sentence += f" ({location_title})." if location_title else ""
  sentence += f" The cost for attending is {cost}." if cost else " The cost for attending is FREE."
  sentence += f" Description: {description}." if description else ""
  sentence += f" This event is categorized under {event_types[0]}." if event_types else ""
  sentence += f" The intended audience for this event is for {','.join(event_types_audience)}." if event_types_audience else ""
  sentence += f" For more details, you can visit the event page at {url}." if url else ""

  return html.unescape(sentence)

In [17]:
# read and write tamu events data
file_path = os.path.join(index_root, 'input', 'tamu_events.txt')
tamu_events_url = "https://calendar.tamu.edu/live/json/events/group"
tamu_events = requests.get(tamu_events_url)
data = tamu_events.json()

# transform it to sentences and store in documents
preprocessed_data = preprocess_events(data)
documents = []
for i, event in enumerate(preprocessed_data):
  sentence = transform_event_to_sentence(event)
  doc =  Document(page_content=f"Event {i + 1}: {sentence}", metadata={"source": "local"})
  documents.append(doc)

In [18]:
documents[0]

Document(metadata={'source': 'local'}, page_content="Event 1: The event titled 'Army Futures Command HMI Summit Set-up' is organized by Research Integration Center (RIC) and is scheduled to take place on November 1. The event will be held at RIC, Various. The cost for attending is FREE. For more details, you can visit the event page at https://calendar.tamu.edu/ric/event/339092-army-futures-command-hmi-summit-set-up.")

In [27]:
# First, let try with one document
llm = ChatOllama(model="mistral", temperature=0)
llm_transformer = LLMGraphTransformer(llm=llm)
graph_documents = llm_transformer.convert_to_graph_documents(documents[:10])

In [30]:
for node in graph_documents[0].nodes: #entities
  print(node.id, " , Type: ", node.type)

Research Integration Center (RIC)  , Type:  Organization
Army Futures Command HMI Summit Set-up  , Type:  Event
FREE  , Type:  Cost
https://calendar.tamu.edu/ric/event/339092-army-futures-command-hmi-summit-set-up  , Type:  URL
RIC, Various  , Type:  Location
November 1  , Type:  Date


In [31]:
for relationship in graph_documents[0].relationships:
  print(relationship.source.id, " <--> ", relationship.target.id)
  print(relationship.type)

Army Futures Command HMI Summit Set-up  <-->  Research Integration Center (RIC)
ORGANIZED_BY
Army Futures Command HMI Summit Set-up  <-->  November 1
SCHEDULED_ON
Army Futures Command HMI Summit Set-up  <-->  RIC, Various
LOCATION
Army Futures Command HMI Summit Set-up  <-->  FREE
HAS_CHARACTERISTIC
Army Futures Command HMI Summit Set-up  <-->  https://calendar.tamu.edu/ric/event/339092-army-futures-command-hmi-summit-set-up
HAS_URL


In [32]:
G = nx.Graph()
graph_data = graph_documents

for graph in graph_data:
  for node in graph.nodes:
      G.add_node(node.id, label=node.type)

  for edge in graph.relationships:
      G.add_edge(edge.source.id, edge.target.id, type=edge.type)

In [34]:
# create a 3D spring layout with more separation
pos = nx.spring_layout(G, dim=3, seed=42, k=0.5)
# extract node positions
x_nodes = [pos[node][0] for node in G.nodes()]
y_nodes = [pos[node][1] for node in G.nodes()]
z_nodes = [pos[node][2] for node in G.nodes()]
# extract edge positions
x_edges = []
y_edges = []
z_edges = []

for edge in G.edges():
    x_edges.extend([pos[edge[0]][0], pos[edge[1]][0], None])
    y_edges.extend([pos[edge[0]][1], pos[edge[1]][1], None])
    z_edges.extend([pos[edge[0]][2], pos[edge[1]][2], None])
# generate node colors based on a colormap
node_colors = [G.degree(node) for node in G.nodes()]
node_colors = np.array(node_colors)
node_colors = (node_colors - node_colors.min()) / (node_colors.max() - node_colors.min())  # Normalize to [0, 1]
# create the trace for edges
edge_trace = go.Scatter3d(
    x=x_edges, y=y_edges, z=z_edges,
    mode='lines',
    line=dict(color='lightgray', width=0.5),
    hoverinfo='none'
)
# create the trace for nodes
node_trace = go.Scatter3d(
    x=x_nodes, y=y_nodes, z=z_nodes,
    mode='markers+text',
    marker=dict(
        size=7,
        color=node_colors,
        colorscale='Viridis',  # Use a color scale for the nodes
        colorbar=dict(
            title='Node Degree',
            thickness=10,
            x=1.1,
            tickvals=[0, 1],
            ticktext=['Low', 'High']
        ),
        line=dict(width=1)
    ),
    text=[node for node in G.nodes()],
    textposition="top center",
    textfont=dict(size=10, color='black'),
    hoverinfo='text'
)

In [35]:
# create the 3D plot
fig = go.Figure(data=[edge_trace, node_trace])
# update layout for better visualization
fig.update_layout(
    title='3D Graph Visualization',
    showlegend=False,
    scene=dict(
        xaxis=dict(showbackground=False),
        yaxis=dict(showbackground=False),
        zaxis=dict(showbackground=False)
    ),
    margin=dict(l=0, r=0, b=0, t=40),
    annotations=[
        dict(
            showarrow=False,
            text="Interactive 3D visualization of GraphML data",
            xref="paper",
            yref="paper",
            x=0,
            y=0
        )
    ]
)
# show the plot
fig.show()