Install required packages

In [None]:
#!pip install pandas scikit-learn sentence-transformers pyvis

sentence-transformers package needs torch, tensorflow or jax available as backend
I used torch

In [None]:
#!pip install torch

In [None]:
import pandas as pd

In [None]:
ISSUES_FILE = './all_issues.csv'
issue_data = pd.read_csv(ISSUES_FILE)
issue_data.head()

In [None]:
issue_data["name"] = issue_data["ID"] + " - " + issue_data["WG"]
issue_data["text"] = issue_data["Title"] + " - " + issue_data["Description"]
issue_data.head()

In [None]:
issue_data_all = issue_data.copy()
issue_data.drop_duplicates(subset=['ID', 'WG'], inplace=True, ignore_index=True)
issue_data.head()

In [None]:
from sentence_transformers import SentenceTransformer
model = SentenceTransformer('all-MiniLM-L12-v1')

In [None]:
issue_data["Embedding"] = issue_data["text"].apply(lambda x: model.encode(x))

In [None]:
from sklearn.metrics.pairwise import cosine_similarity
similarity = cosine_similarity(issue_data["Embedding"].tolist())

In [None]:
issue_data['WG'].unique()

In [None]:
# 6 diverging colors, taken from https://colorbrewer2.org/#type=diverging&scheme=Spectral&n=6
color_by_WG = {
    'technical': '#d53e4f',
    'social': '#fc8d59',
    'Ethics': '#fee08b',
    'ethics / healthcare': '#e6f598',
    'radiologists': '#99d594',
    'healthcare': '#3288bd'
}
issue_data['color'] = issue_data['WG'].map(lambda x: color_by_WG[x])
issue_data.head()

In [None]:
from pyvis import network as net
import numpy as np
top1_graph = net.Network(notebook=True, height='900px', width='100%', directed=True)
top1_graph.barnes_hut(spring_length=100, spring_strength=0.1, central_gravity=8, overlap=1)
# net.force_atlas_2based(spring_length=10)
top1_graph.add_nodes(
    issue_data.index, 
    title=issue_data['text'],
    label=issue_data['name'],
    color=issue_data['color'],
)

for src in range(len(issue_data.index)):
    highest_similarity = np.argsort(-similarity[src,])[1:2] # self has highest similarity, find 5 others
    for dest in highest_similarity:
        top1_graph.add_edge(src, int(dest), value=float(similarity[src,dest]), arrowStrikethrough=False)

top1_graph.show("top-1.html")



In [None]:
top2_graph = net.Network(notebook=True, height='900px', width='100%', directed=True)
top2_graph.barnes_hut(spring_length=100, spring_strength=0.1, central_gravity=2, overlap=1)
# net.force_atlas_2based(spring_length=10)
top2_graph.add_nodes(
    issue_data.index, 
    title=issue_data['text'],
    label=issue_data['name'],
    color=issue_data['color']
)

for src in range(len(issue_data.index)):
    highest_similarity = np.argsort(-similarity[src,])[1:3] # self has highest similarity, find 5 others
    for dest in highest_similarity:
        top2_graph.add_edge(src, int(dest), value=float(similarity[src,dest]), arrowStrikethrough=False)

top2_graph.show("top-2.html")