In [17]:
import json
import networkx as nx
from collections import defaultdict
import plotly.express as px
import plotly.io as pio
import pandas as pd

# Set white theme
pio.templates.default = "plotly_white"


with open("MC3_graph.json") as f:
    json_data = json.load(f)

G = nx.json_graph.node_link_graph(json_data, edges="edges")

In [36]:
type_counts = defaultdict(int)
for node in G.nodes():
    node_data = G.nodes[node]
    if 'type' in node_data:
        type_counts[node_data['type']] += 1

fig1 = px.bar(
    x=list(type_counts.keys()),
    y=list(type_counts.values()),
    labels={'x': 'Node Type', 'y': 'Count'},
    title='<b>Node Type Distribution</b>',
    text=list(type_counts.values()),
    color=list(type_counts.keys()),
    width=800,
    height=600
)

fig1.update_layout(
    showlegend=False,
    xaxis_title="Node Type",
    yaxis_title="Count",
    hovermode="x",
    font=dict(size=12)
)

# Improve text display
fig1.update_traces(
    texttemplate='%{text}',
    textposition='outside',
    marker_line_color='rgb(150,150,150)',
    marker_line_width=1
)

In [35]:
# Create DataFrame from nodes
nodes_data = []
for node_id, attrs in G.nodes(data=True):
    nodes_data.append({
        'id': node_id,
        'type': attrs.get('type'),
        'sub_type': attrs.get('sub_type')
    })

df = pd.DataFrame(nodes_data)

# Filter out nodes without type/sub_type
df = df.dropna(subset=['type', 'sub_type'])

# Create facet plot with clean design
fig = px.bar(
    df.groupby(['type', 'sub_type']).size().reset_index(name='count'),
    x='sub_type',
    y='count',
    facet_col='type',
    color='sub_type',
    text='count',
    title='<b>Node Subtype Distribution by Main Type</b>',
    labels={'sub_type': 'Subtype', 'count': ''},
    width=1000,
    height=600
)

# Customize layout - remove y-axis elements
fig.update_layout(
    showlegend=False,
    font=dict(size=12),
    hovermode="x",
    # Remove all y-axis titles
    yaxis_title=None,
    yaxis2_title=None,
    yaxis3_title=None
)

# Customize axes
fig.update_xaxes(
    title_text="",
    tickangle=45,
    matches=None,
    showticklabels=True
)

# Remove y-axis ticks, labels, and titles for all subplots
for axis in ['yaxis', 'yaxis2', 'yaxis3']:
    fig.update_layout({
        axis: {
            'showticklabels': False,
            'ticks': "",
            'showgrid': False,
            'title': None
        }
    })

fig.update_yaxes(matches=None)

# Improve text display
fig.update_traces(
    texttemplate='%{text}',
    textposition='outside',
    marker_line_color='rgb(150,150,150)',
    marker_line_width=1
)

# Clean up facet titles
fig.update_annotations(font_size=14)
fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1]))

# Add space above for title
fig.update_layout(margin=dict(t=100))

In [None]:
connection_counts = defaultdict(lambda: defaultdict(int))

for u, v in G.edges():
    # Get node attributes
    u_attrs = G.nodes[u]
    v_attrs = G.nodes[v]

    # Case 1: Event -> Entity (Event targets Entity)
    if u_attrs.get('type') == 'Event' and v_attrs.get('type') == 'Entity':
        event_subtype = u_attrs.get('sub_type')
        entity_subtype = v_attrs.get('sub_type')
        if event_subtype and entity_subtype:
            connection_counts[event_subtype][entity_subtype] += 1

    # Case 2: Entity -> Event (Entity participates in Event)
    elif u_attrs.get('type') == 'Entity' and v_attrs.get('type') == 'Event':
        entity_subtype = u_attrs.get('sub_type')
        event_subtype = v_attrs.get('sub_type')
        if event_subtype and entity_subtype:
            connection_counts[event_subtype][entity_subtype] += 1

rows = []
for event_subtype, entities in connection_counts.items():
    for entity_subtype, count in entities.items():
        rows.append({
            'Event Subtype': event_subtype,
            'Entity Subtype': entity_subtype,
            'Connection Count': count
        })

df = pd.DataFrame(rows)

# Pivot for heatmap
heatmap_df = df.pivot(index='Event Subtype', columns='Entity Subtype', 
                      values='Connection Count').fillna(0)

fig = px.imshow(
    heatmap_df,
    labels=dict(x="Entity Subtype", y="Event Subtype", color="Connections"),
    color_continuous_scale='Viridis',
    aspect="auto",
    title='<b>Event-Entity Subtype Connections</b>',
    width=800,
    height=600
)

# Add annotations (counts in cells)
annotations = []
for y, event_sub in enumerate(heatmap_df.index):
    for x, entity_sub in enumerate(heatmap_df.columns):
        count = heatmap_df.loc[event_sub, entity_sub]
        if count > 0:
            annotations.append(
                dict(
                    x=x, y=y,
                    text=str(int(count)),
                    xref='x1', yref='y1',
                    showarrow=False,
                    font=dict(color='white' if count > 0.5*heatmap_df.values.max() else 'black')
                )
            )

# Customize layout
fig.update_layout(
    annotations=annotations,
    xaxis_title="Entity Subtype",
    yaxis_title="Event Subtype",
    font=dict(size=12),
    margin=dict(l=100, r=50, b=100, t=100)
)

# Improve readability
fig.update_xaxes(tickangle=45)
fig.update_coloraxes(colorbar_title='Connection Count')

fig.show()

In [None]:
connection_counts = defaultdict(lambda: defaultdict(int))

for u, v in G.edges():
    u_attrs = G.nodes[u]
    v_attrs = G.nodes[v]

    # Case 1: Relationship -> Entity (Relationship targets Entity)
    if u_attrs.get('type') == 'Relationship' and v_attrs.get('type') == 'Entity':
        rel_subtype = u_attrs.get('sub_type')
        entity_subtype = v_attrs.get('sub_type')
        if rel_subtype and entity_subtype:
            connection_counts[rel_subtype][entity_subtype] += 1

    # Case 2: Entity -> Relationship (Entity is source of Relationship)
    elif u_attrs.get('type') == 'Entity' and v_attrs.get('type') == 'Relationship':
        entity_subtype = u_attrs.get('sub_type')
        rel_subtype = v_attrs.get('sub_type')
        if rel_subtype and entity_subtype:
            connection_counts[rel_subtype][entity_subtype] += 1

rows = []
for rel_subtype, entities in connection_counts.items():
    for entity_subtype, count in entities.items():
        rows.append({
            'Relationship Subtype': rel_subtype,
            'Entity Subtype': entity_subtype,
            'Connection Count': count
        })

df = pd.DataFrame(rows)

# Pivot for heatmap
heatmap_df = df.pivot(index='Relationship Subtype',
                      columns='Entity Subtype',
                      values='Connection Count').fillna(0)

fig = px.imshow(
    heatmap_df,
    labels=dict(x="Entity Subtype", y="Relationship Subtype", color="Connections"),
    color_continuous_scale='Viridis',
    aspect="auto",
    title='<b>Relationship-Entity Subtype Connections</b>',
    width=800,
    height=600
)

# Add annotations (counts in cells)
annotations = []
for y, rel_sub in enumerate(heatmap_df.index):
    for x, entity_sub in enumerate(heatmap_df.columns):
        count = heatmap_df.loc[rel_sub, entity_sub]
        if count > 0:
            annotations.append(
                dict(
                    x=x, y=y,
                    text=str(int(count)),
                    xref='x1', yref='y1',
                    showarrow=False,
                    font=dict(color='white' if count > 0.5*heatmap_df.values.max() else 'black')
                )
            )

# Customize layout
fig.update_layout(
    annotations=annotations,
    xaxis_title="Entity Subtype",
    yaxis_title="Relationship Subtype",
    font=dict(size=12),
    margin=dict(l=120, r=50, b=100, t=100)
)

# Improve readability
fig.update_xaxes(tickangle=45)
fig.update_coloraxes(colorbar_title='Connection Count')

fig.show()

print("\nRelationship Subtype to Entity Subtype Connection Counts:")
print(heatmap_df)

# Create a sorted version for better visualization
df_sorted = df.sort_values(by='Connection Count', ascending=False)

fig2 = px.bar(
    df_sorted,
    x='Relationship Subtype',
    y='Connection Count',
    color='Entity Subtype',
    barmode='stack',
    title='<b>Relationship-Entity Connections by Subtype</b>',
    labels={'Connection Count': 'Connections'},
    width=800,
    height=600
)

fig2.update_layout(
    xaxis_title="Relationship Subtype",
    yaxis_title="Total Connections",
    legend_title="Entity Subtype",
    font=dict(size=12),
    hovermode="x unified"
)

fig2.update_xaxes(tickangle=45)
fig2.show()


Relationship Subtype to Entity Subtype Connection Counts:
Entity Subtype        Group  Location  Organization  Person  Vessel
Relationship Subtype                                               
AccessPermission        5.0      64.0          10.0    12.0    46.0
Colleagues              0.0       0.0           2.0    50.0     8.0
Coordinates             1.0       4.0          25.0    16.0   103.0
Friends                 0.0       0.0           0.0     4.0     0.0
Jurisdiction            0.0      17.0           8.0     0.0     1.0
Operates                1.0       3.0          22.0    21.0    33.0
Reports                 0.0       1.0          21.0    14.0    14.0
Suspicious              0.0       1.0          17.0    24.0    14.0
Unfriendly              0.0       0.0           7.0     1.0     2.0


In [41]:
entity_connections = []

for node in G.nodes():
    node_attrs = G.nodes[node]
    if node_attrs.get('type') == 'Entity':
        # Get entity attributes
        name = node_attrs.get('name', f'Entity_{node}')
        sub_type = node_attrs.get('sub_type', 'Unknown')
        
        # Calculate total connections (degree)
        degree = G.degree(node)
        
        entity_connections.append({
            'Entity ID': node,
            'Entity Name': name,
            'Entity Subtype': sub_type,
            'Connection Count': degree
        })

# Create DataFrame
df = pd.DataFrame(entity_connections)

# Sort by connection count (descending)
df = df.sort_values('Connection Count', ascending=False)

# =============================================
# 2. Create Top Connected Entities Table
# =============================================
print("Top 20 Most Connected Entities:")
print(df[['Entity Name', 'Entity Subtype', 'Connection Count']].head(20).to_string(index=False))

# =============================================
# 3. Create Interactive Visualization
# =============================================
# Create top 50 for visualization
top_entities = df.head(50)

fig = px.bar(
    top_entities,
    x='Entity Name',
    y='Connection Count',
    color='Entity Subtype',
    title='<b>Top 50 Most Connected Entities</b>',
    labels={'Connection Count': 'Number of Connections'},
    hover_data=['Entity Subtype', 'Connection Count'],
    width=1000,
    height=600
)

# Customize layout
fig.update_layout(
    xaxis_title="Entity Name",
    yaxis_title="Connection Count",
    legend_title="Entity Subtype",
    font=dict(size=12),
    hovermode="x unified",
    xaxis={'categoryorder': 'total descending'}
)

# Improve readability
fig.update_xaxes(tickangle=45, tickfont=dict(size=10))
fig.update_traces(marker_line_color='rgb(150,150,150)', marker_line_width=1)

fig.show()

# =============================================
# 4. Create Subtype Connection Analysis
# =============================================
# Calculate average connections per subtype
subtype_stats = df.groupby('Entity Subtype').agg(
    Total_Connections=('Connection Count', 'sum'),
    Entity_Count=('Entity ID', 'count'),
    Avg_Connections=('Connection Count', 'mean')
).reset_index()

# Sort by total connections
subtype_stats = subtype_stats.sort_values('Total_Connections', ascending=False)

print("\nEntity Subtype Connection Statistics:")
print(subtype_stats.to_string(index=False))

# Create subtype visualization
fig2 = px.bar(
    subtype_stats,
    x='Entity Subtype',
    y='Total_Connections',
    color='Entity Subtype',
    title='<b>Total Connections by Entity Subtype</b>',
    labels={'Total_Connections': 'Total Connections'},
    text='Total_Connections',
    width=800,
    height=600
)

# Customize layout
fig2.update_layout(
    xaxis_title="Entity Subtype",
    yaxis_title="Total Connections",
    showlegend=False,
    font=dict(size=12)
)

fig2.update_traces(
    texttemplate='%{text}',
    textposition='outside',
    marker_line_color='rgb(150,150,150)',
    marker_line_width=1
)

fig2.show()

Top 20 Most Connected Entities:
         Entity Name Entity Subtype  Connection Count
                Mako         Vessel               167
Oceanus City Council   Organization               130
     Green Guardians   Organization               127
              Remora         Vessel               122
       Reef Guardian         Vessel               110
             Neptune         Vessel               102
           Nemo Reef       Location               102
            Sentinel         Vessel                83
            EcoVigil         Vessel                72
       Himark Harbor       Location                69
  V. Miesel Shipping   Organization                67
             Horizon         Vessel                64
               Davis         Person                64
          Mrs. Money         Person                57
          The Intern         Person                55
         The Lookout         Person                53
    Paackland Harbor       Location               


Entity Subtype Connection Statistics:
Entity Subtype  Total_Connections  Entity_Count  Avg_Connections
        Vessel                868            15        57.866667
        Person                600            18        33.333333
  Organization                355             5        71.000000
      Location                352            29        12.137931
         Group                 15             5         3.000000
