In [2]:
import pandas as pd
import networkx as nx
import pyvis.network as net

# Step 1: Load the CSV file
file_path = "smu_final.csv"  # Replace with your actual file path

try:
    data = pd.read_csv(file_path, delimiter=',', header=None)
    data.columns = ['id', 'entity1', 'relationship', 'entity2']
    print("File loaded successfully!")
except Exception as e:
    print(f"Error loading the file: {e}")
    exit()

# Step 2: Preprocessing
# Convert text to lowercase for consistency
data['entity1'] = data['entity1'].str.lower()
data['entity2'] = data['entity2'].str.lower()
data['relationship'] = data['relationship'].str.lower()

# Step 3: Network Analysis
graph = nx.DiGraph()

# Add edges to the graph, storing relationship as the edge attribute
for _, row in data.iterrows():
    graph.add_edge(row['entity1'], row['entity2'], relationship=row['relationship'])

# Step 4: Reduce Complexity - Filter Graph
# For simplicity, let's focus on the top 100 most connected entities (by degree centrality)
entity_degree = dict(graph.degree())
top_entities = sorted(entity_degree, key=entity_degree.get, reverse=True)[:100]

# Create a subgraph with only the top entities
subgraph = graph.subgraph(top_entities)

# Ensure all node IDs are strings
subgraph = nx.relabel_nodes(subgraph, {node: str(node) for node in subgraph.nodes})

# Step 5: Interactive Graph Visualization
# Use Pyvis for interactive graph visualization
interactive_graph = net.Network(notebook=True)

# From NetworkX to Pyvis
interactive_graph.from_nx(subgraph)

# Add edge labels to display relationships along the edges
for edge in subgraph.edges(data=True):
    interactive_graph.add_edge(edge[0], edge[1], title=edge[2]['relationship'], label=edge[2]['relationship'])

# Save the graph as an HTML file for interactive viewing
interactive_graph.show("interactive_graph_with_relationships.html")


File loaded successfully!
interactive_graph_with_relationships.html


In [None]:
import pandas as pd
import networkx as nx
import pyvis.network as net

# Step 1: Load the CSV file
file_path = "smu_final.csv"  # Replace with your actual file path

try:
    data = pd.read_csv(file_path, delimiter=',', header=None)
    data.columns = ['id', 'entity1', 'relationship', 'entity2']
    print("File loaded successfully!")
except Exception as e:
    print(f"Error loading the file: {e}")
    exit()

# Step 2: Preprocessing
# Convert text to lowercase for consistency
data['entity1'] = data['entity1'].str.lower()
data['entity2'] = data['entity2'].str.lower()
data['relationship'] = data['relationship'].str.lower()

# Step 3: Network Analysis
graph = nx.DiGraph()

# Add edges to the graph, storing relationship as the edge attribute
for _, row in data.iterrows():
    graph.add_edge(row['entity1'], row['entity2'], relationship=row['relationship'])

# Step 4: Reduce Complexity - Filter Graph
# For simplicity, let's focus on the top 100 most connected entities (by degree centrality)
entity_degree = dict(graph.degree())
top_entities = sorted(entity_degree, key=entity_degree.get, reverse=True)[:100]

# Create a subgraph with only the top entities
subgraph = graph.subgraph(top_entities)

# Ensure all node IDs are strings
subgraph = nx.relabel_nodes(subgraph, {node: str(node) for node in subgraph.nodes})

# Step 5: Interactive Graph Visualization
# Use Pyvis for interactive graph visualization
interactive_graph = net.Network(notebook=True, directed=True)  # Enable directed graph

# From NetworkX to Pyvis
interactive_graph.from_nx(subgraph)

# Update edge labels with relationships
for edge in subgraph.edges(data=True):
    source, target, data = edge
    interactive_graph.add_edge(
        source, target, label=data.get('relationship', ''), title=data.get('relationship', '')
    )

# Save the graph as an HTML file for interactive viewing
interactive_graph.show("interactive_graph_with_relationships.html")


In [35]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import re
import networkx as nx

# Step 1: Load the CSV file
file_path = "smu_final.csv"  # Replace with your file path
data = pd.read_csv(file_path, header=None)
data.columns = ["Source ID", "Entity1", "Relationship", "Entity2"]

# Step 2: List of countries (including SEA countries and more variations)
countries = [
    "India", "Singapore", "China", "South Korea", "Japan", 
    "United States", "USA", "UK", "United Kingdom", "Australia", 
    "Germany", "France", "Malaysia", "Thailand", "Philippines", 
    "Vietnam", "Indonesia", "Myanmar", "Brunei", "Laos", 
    "Cambodia", "Bangladesh", "Nepal", "Pakistan"
]

# Step 3: Function to match country names using regex
def match_country(name):
    # Ensure the name is a string
    if isinstance(name, str):
        # Match country names using regular expressions for better flexibility
        for country in countries:
            if re.search(r"\b" + re.escape(country) + r"\b", name, re.IGNORECASE):
                return country
    return None

# Step 4: Filter for rows where Entity1 or Entity2 matches a country
data['Entity1_matched'] = data['Entity1'].apply(match_country)
data['Entity2_matched'] = data['Entity2'].apply(match_country)

# Filter rows where either Entity1 or Entity2 matches a country
country_data = data.dropna(subset=['Entity1_matched', 'Entity2_matched'])

# Step 5: Aggregate the data for relationships involving countries
relationship_data = country_data.groupby(["Entity1_matched", "Entity2_matched"]).size().reset_index(name="Count")

# Print relationship_data to check if top relationships are included
print("Aggregated Relationship Data:")
print(relationship_data.sort_values(by="Count", ascending=False).head(20))  # Check top 20 relationships

# Step 6: Prepare data for mapping (both Entity1 and Entity2 must be countries)
map_data = relationship_data[
    (relationship_data["Entity1_matched"].isin(countries)) & 
    (relationship_data["Entity2_matched"].isin(countries))
]

# Print map_data to check if top relationships are included
print("Map Data (Filtered for countries):")
print(map_data.sort_values(by="Count", ascending=False).head(20))  # Check top 20 relationships for map

# Step 7: Create the choropleth map using Plotly
fig = px.choropleth(
    map_data,
    locations="Entity1_matched",  # Primary country
    locationmode="country names",  # Use country names for mapping
    color="Count",  # Color intensity based on relationship count
    hover_name="Entity2_matched",  # Show secondary country on hover
    title="Interactive Geographic Heatmap of Relationships",
    color_continuous_scale="Viridis"
)

# Step 8: Customize the map layout
fig.update_geos(
    projection_type="natural earth",  # Use a natural Earth map projection
    showcoastlines=True,  # Show coastlines
    coastlinecolor="Gray",
    showland=True,  # Highlight land
    landcolor="LightGreen",
    showocean=True,  # Highlight ocean
    oceancolor="LightBlue"
)

# Step 9: Adjust the layout
fig.update_layout(
    margin={"r": 0, "t": 40, "l": 0, "b": 0},  # Adjust map margins
    coloraxis_colorbar=dict(title="Relationship Count")  # Colorbar title
)

# Step 10: Save the interactive map as an HTML file
fig.write_html('interactive_map.html')
print("Choropleth map saved as interactive_map.html")

# Step 11: Create a network graph using NetworkX and Plotly
G = nx.Graph()

# Add edges to the network graph
for _, row in relationship_data.iterrows():
    G.add_edge(row['Entity1_matched'], row['Entity2_matched'], weight=row['Count'])

# Step 12: Extract data for visualization
pos = nx.spring_layout(G)  # Positioning for nodes
edge_weights = [G[u][v]['weight'] for u, v in G.edges()]
edge_trace = go.Scatter(
    x=[],
    y=[],
    line=dict(width=0.5, color='#888'),
    hoverinfo='none',
    mode='lines'
)

# Add edges to the edge trace
for edge in G.edges():
    x0, y0 = pos[edge[0]]
    x1, y1 = pos[edge[1]]
    edge_trace['x'] += (x0, x1, None)
    edge_trace['y'] += (y0, y1, None)

# Node trace for countries
node_trace = go.Scatter(
    x=[],
    y=[],
    text=[],
    mode='markers',
    hoverinfo='text',
    marker=dict(
        showscale=True,
        colorscale='Viridis',
        size=10,
        colorbar=dict(thickness=15, title='Node Connections')
    )
)

# Add node positions and labels
for node in G.nodes():
    x, y = pos[node]
    node_trace['x'] += (x,)
    node_trace['y'] += (y,)
    node_trace['text'] += (node,)

# Step 13: Create the figure
fig_network = go.Figure(data=[edge_trace, node_trace],
                       layout=go.Layout(
                           showlegend=False,
                           hovermode='closest',
                           title="Interactive Network Graph of Relationships",
                           xaxis=dict(showgrid=False, zeroline=False),
                           yaxis=dict(showgrid=False, zeroline=False)
                       ))

# Step 14: Save the network graph as an HTML file
fig_network.write_html('network_graph.html')
print("Network graph saved as network_graph.html")

# Optionally display the network graph inline (for Jupyter or local environment)
# fig_network.show()


Aggregated Relationship Data:
   Entity1_matched Entity2_matched  Count
41       Singapore       Singapore     14
9          Germany           China      7
38       Singapore        Malaysia      7
33       Singapore           China      7
19           Japan     South Korea      6
55   United States           China      6
48     South Korea           Japan      4
11           India           India      4
42       Singapore     South Korea      3
40       Singapore     Philippines      3
36       Singapore           Japan      3
46       Singapore         Vietnam      3
32       Singapore          Brunei      3
3            China           China      3
17           Japan       Indonesia      3
34       Singapore           India      2
35       Singapore       Indonesia      2
1        Australia         Myanmar      2
31     Philippines           China      2
37       Singapore            Laos      2
Map Data (Filtered for countries):
   Entity1_matched Entity2_matched  Count
41       Si

In [7]:
import pandas as pd
import plotly.graph_objects as go
import networkx as nx
import re

# Step 1: Load the CSV file
file_path = "smu_final.csv"  # Replace with your file path
data = pd.read_csv(file_path, header=None)
data.columns = ["Source ID", "Entity1", "Relationship", "Entity2"]

# Step 2: List of countries (including SEA countries and more variations)
countries = [
    "India", "Singapore", "China", "South Korea", "Japan", 
    "United States", "USA", "UK", "United Kingdom", "Australia", 
    "Germany", "France", "Malaysia", "Thailand", "Philippines", 
    "Vietnam", "Indonesia", "Myanmar", "Brunei", "Laos", 
    "Cambodia", "Bangladesh", "Nepal", "Pakistan"
]

# Step 3: Function to match country names using regex
def match_country(name):
    # Ensure the name is a string
    if isinstance(name, str):
        # Match country names using regular expressions for better flexibility
        for country in countries:
            if re.search(r"\b" + re.escape(country) + r"\b", name, re.IGNORECASE):
                return country
    return None

# Step 4: Filter for rows where Entity1 or Entity2 matches a country
data['Entity1_matched'] = data['Entity1'].apply(match_country)
data['Entity2_matched'] = data['Entity2'].apply(match_country)

# Filter rows where either Entity1 or Entity2 matches a country
country_data = data.dropna(subset=['Entity1_matched', 'Entity2_matched'])

# Step 5: Aggregate the data for relationships involving countries
relationship_data = country_data.groupby(["Entity1_matched", "Entity2_matched"]).size().reset_index(name="Count")

# Step 6: Prepare data for mapping (both Entity1 and Entity2 must be countries)
map_data = relationship_data[
    (relationship_data["Entity1_matched"].isin(countries)) & 
    (relationship_data["Entity2_matched"].isin(countries))
]

# Step 7: Create a network graph using NetworkX and Plotly
G = nx.Graph()

# Add edges to the network graph
for _, row in relationship_data.iterrows():
    G.add_edge(row['Entity1_matched'], row['Entity2_matched'], weight=row['Count'])

# Step 8: Extract data for visualization
pos = nx.spring_layout(G, seed=42, k=0.2, iterations=20)  # Fine-tuned layout

# Edge weights (thickness of the edges)
edge_weights = [G[u][v]['weight'] for u, v in G.edges()]
edge_trace = go.Scatter(
    x=[],
    y=[],
    line=dict(width=0.5, color='#888'),
    hoverinfo='none',
    mode='lines'
)

# Add edges to the edge trace
for edge in G.edges():
    x0, y0 = pos[edge[0]]
    x1, y1 = pos[edge[1]]
    edge_trace['x'] += (x0, x1, None)
    edge_trace['y'] += (y0, y1, None)

# Node trace for countries (with small size and labels)
node_trace = go.Scatter(
    x=[],
    y=[],
    text=[],
    mode='markers+text',
    hoverinfo='text',
    marker=dict(
        showscale=True,
        colorscale='Viridis',
        size=[],  # This will be filled dynamically based on degree centrality
        colorbar=dict(
            thickness=15,
            title=dict(
                text='Node Connections',
                side='right'
            ),
            xanchor='left'
        )
    )
)



# Add node positions and labels, and size based on degree centrality (small size)
for node in G.nodes():
    x, y = pos[node]
    node_trace['x'] += (x,)
    node_trace['y'] += (y,)
    node_trace['text'] += (node,)
    
    # Calculate node size based on degree (connections), but with a fixed small size
    degree = len(list(G.neighbors(node)))
    size = max(10, degree * 5)  # Keep the node size small but proportional to degree
    node_trace['marker']['size'] += (size,)

# Step 9: Create the figure
fig_network = go.Figure(data=[edge_trace, node_trace],
                       layout=go.Layout(
                           showlegend=False,
                           hovermode='closest',
                           title="Interactive Network Graph of Relationships",
                           xaxis=dict(showgrid=False, zeroline=False),
                           yaxis=dict(showgrid=False, zeroline=False),
                           margin={"r": 0, "t": 40, "l": 0, "b": 0},  # Adjust margins
                           title_x=0.5,  # Center the title
                           font=dict(size=12),
                       ))

# Step 10: Save the network graph as an HTML file
fig_network.write_html('network_graph_clean.html')
print("Network graph with small nodes saved as network_graph_clean.html")

# Optionally display the network graph inline (for Jupyter or local environment)
# fig_network.show()


Network graph with small nodes saved as network_graph_clean.html


In [12]:
import pandas as pd
import networkx as nx
import pyvis.network as net

# Step 1: Load the CSV file
file_path = "smu_final.csv"  # Replace with your actual file path

try:
    data = pd.read_csv(file_path, delimiter=',', header=None)
    data.columns = ['id', 'entity1', 'relationship', 'entity2']
    print("File loaded successfully!")
except Exception as e:
    print(f"Error loading the file: {e}")
    exit()

# Step 2: Preprocessing
# Convert text to lowercase for consistency
data['entity1'] = data['entity1'].str.lower()
data['entity2'] = data['entity2'].str.lower()
data['relationship'] = data['relationship'].str.lower()

# Step 3: Network Analysis
graph = nx.DiGraph()

# Add edges to the graph, storing relationship as the edge attribute
for _, row in data.iterrows():
    graph.add_edge(row['entity1'], row['entity2'], relationship=row['relationship'])

# Step 4: Reduce Complexity - Filter Graph
# For simplicity, let's focus on the top 100 most connected entities (by degree centrality)
entity_degree = dict(graph.degree())
top_entities = sorted(entity_degree, key=entity_degree.get, reverse=True)[:100]

# Create a subgraph with only the top entities
subgraph = graph.subgraph(top_entities)

# Ensure all node IDs are strings
subgraph = nx.relabel_nodes(subgraph, {node: str(node) for node in subgraph.nodes})

# Step 5: Interactive Graph Visualization
interactive_graph = net.Network(notebook=False, directed=True)  # Enable directed graph

# From NetworkX to Pyvis
interactive_graph.from_nx(subgraph)

# Refine edge labels with relationships only if available
for edge in subgraph.edges(data=True):
    source, target, data = edge
    relationship = data.get('relationship', '')
    if relationship:  # Add edge only if the relationship exists
        interactive_graph.add_edge(
            source, target, label=relationship, title=relationship,
            width=2, color='#cccccc'  # Lighter edge color for clarity
        )


interactive_graph.set_options("""
var options = {
  "nodes": {
    "size": 10,
    "font": {
      "size": 14,
      "face": "Arial"
    },
    "borderWidth": 2,
    "borderWidthSelected": 4
  },
  "edges": {
    "width": 1,
    "color": {"inherit": true},
    "smooth": {
      "enabled": true,
      "type": "continuous", 
      "forceDirection": "none",
      "roundness": 0.5
    }
  },
  "physics": {
    "enabled": true,
    "barnesHut": {
      "gravitationalConstant": -2000,
      "springLength": 300
    },
    "repulsion": {
      "nodeDistance": 600,
      "edgeDistance": 500
    }
  },
  "layout": {
    "randomSeed": 4
  }
}
""")

# Save the graph as an HTML file for interactive viewing
interactive_graph.write_html("interactive_graph_spaced_out_advanced.html")
print("HTML file saved.")

File loaded successfully!
HTML file saved.
