In [None]:
import pandas as pd
import numpy as np
import networkx as nx

df = pd.read_csv("data/filtered_triplets.csv")

# Getting vocab set.
nodes = set()
for word in set(df['head']).union(set(df['tail'])):
    nodes.add(word)
nodes = sorted(list(nodes))

# Creating adjacency matrix.
N = len(nodes)
adjacency_matrix = np.zeros((N, N))
for index, row in df.iterrows():
    head, type, tail = row['head'], row['type'], row['tail']
    i, j = nodes.index(head), nodes.index(tail)
    adjacency_matrix[i, j] = type

# Creating networkx graph object.
G = nx.from_numpy_array(adjacency_matrix, create_using=nx.DiGraph())
label_mapping = {i: label for i, label in enumerate(nodes)}
G = nx.relabel_nodes(G, label_mapping)

In [39]:
import pandas as pd
import numpy as np
import networkx as nx

# Read the CSV file into a pandas dataframe.
df = pd.read_csv("data/filtered_triplets.csv")

# Create NetworkX DiGraph from dataframe.
df.rename(columns={'type':'relation'}, inplace=True)
G = nx.from_pandas_edgelist(df, source='head', target='tail', edge_attr='relation', create_using=nx.DiGraph())

# Assign edge labels.
for u, v, data in G.edges(data=True):
    data['label'] = data['relation']

nx.write_gexf(G, "big_triplet_network.gexf")

In [50]:
import random

def filter_edges_by_relation(G, relation_type):
    H = G.copy()
    for u, v, data in G.edges(data=True):
        if data['relation'] != relation_type:
            H.remove_edge(u, v)
    return H

def assign_edge_colors(G, edge_color_map):
    for u, v, data in G.edges(data=True):
        relationship_type = data['relation']  # Assuming 'relationship' is the name of the edge attribute
        color = edge_color_map.get(relationship_type, "grey")  # Default color is grey if type is not in the map
        data['color'] = color

edge_color_map = {
    "subclass of": "#800080",  # Purple
    "part of": "#008000",  # Green
    "has part": "#00FFFF",  # Cyan
    "instance of": "#FF00FF",  # Magenta
    "opposite of": "#FFA500",  # Orange
    "has effect": "#40E0D0",  # Turquoise
    "different from": "#A52A2A",  # Brown
}

assign_edge_colors(G, edge_color_map)

def prune_graph(G, target_node):
    H = G.copy()

    # Iterate over all pairs of nodes.
    for u, v in list(H.edges):
        # If there are edges in both directions...
        if H.has_edge(u, v) and H.has_edge(v, u):
            # If neither node is the target, randomly remove one of the two edges.
            if u != target_node and v != target_node:
                if random.choice([True, False]):
                    H.remove_edge(u, v)
                else:
                    H.remove_edge(v, u)
            else:
                # Compute shortest paths to the target node.
                try:
                    path_u = nx.shortest_path_length(G, u, target_node)
                    path_v = nx.shortest_path_length(G, v, target_node)
                except nx.NetworkXNoPath:
                    continue
                
                # Remove the edge that is farther from the target_node.
                if path_u < path_v:
                    H.remove_edge(v, u)
                elif path_u > path_v:
                    H.remove_edge(u, v)

    return H


def generate_subgraph(G, target_node, radius, relation_type=None,):
    H = G.copy()
    H = prune_graph(H, target_node=target_node)
    if relation_type != None:
        H = filter_edges_by_relation(H, relation_type)
    H = nx.ego_graph(H, target_node, radius=radius, center=True, undirected=False)
    nx.write_gexf(H, str('graphs/' + target_node + '_' + str(radius) + '_' + str(relation_type) + '.gexf'))

generate_subgraph(G=G, target_node='COVID 19', radius=3, relation_type='subclass of')
generate_subgraph(G=G, target_node='COVID 19', radius=4, relation_type=None)