In [1]:
import sys, os
import networkx as nx
import pandas as pd
import matplotlib.pyplot as plt
import pygraphviz as pgv
from pygraphviz import *
import numpy as np
import plotly.graph_objects as go

In [2]:
def ellipse(x_center=0, y_center=0, ax1=[1, 0], ax2=[0, 1], a=1, b=1, N=100):
    # x_center, y_center the coordinates of ellipse center
    # ax1 ax2 two orthonormal vectors representing the ellipse axis directions
    # a, b the ellipse parameters
    if np.linalg.norm(ax1) != 1 or np.linalg.norm(ax2) != 1:
        raise ValueError('ax1, ax2 must be unit vectors')
    if abs(np.dot(ax1, ax2)) > 1e-06:
        raise ValueError('ax1, ax2 must be orthogonal vectors')
    t = np.linspace(0, 2 * np.pi, N)
    # ellipse parameterization with respect to a system of axes of directions a1, a2
    xs = a * np.cos(t)
    ys = b * np.sin(t)
    # rotation matrix
    R = np.array([ax1, ax2]).T
    # coordinate of the  ellipse points with respect to the system of axes [1, 0], [0,1] with origin (0,0)
    xp, yp = np.dot(R, [xs, ys])
    x = xp + x_center
    y = yp + y_center
    return x, y

In [44]:
 g = pgv.AGraph()

df = pd.read_csv("word_data.csv", engine='python')
arr = df.values
word_dict = {}
for i in range(len(arr)):
    for j in range(len(arr[i])):
        arr[i][j] = arr[i][j].strip()
    word_dict[arr[i][0]] = arr[i][1]
    g.add_node(arr[i][0], label=arr[i][0])

groups = open("groups.txt")
groups_dict = {}
for line in groups.readlines():
    group_name = 'n'+str(len(groups_dict))
    g.add_node(group_name, label=group_name)
    
    groups_dict[group_name] = []
    for word in line.strip().split():
        groups_dict[group_name].append(word)
#         print(group_name, word)
        g.add_edge(word, group_name)
for n in g.nodes():
    if n.attr['label'] == '\\N':
        print(n)
g.layout()
layout = []
for n in g.nodes():
    tmp_attr = {'label': g.get_node(n).attr['label'].strip()}
    tmp_attr['x'] = float(g.get_node(n).attr['pos'].split(",")[0])
    tmp_attr['y'] = float(g.get_node(n).attr['pos'].split(",")[1])
    tmp_attr['height'] = float(g.get_node(n).attr['height'])
    tmp_attr['width'] = float(g.get_node(n).attr['width'])

    if tmp_attr['label'] in word_dict.keys():
        tmp_attr['hover'] = word_dict[n]
    else:
        tmp_attr['hover'] = ""
    layout.append(tmp_attr)

g.draw("file.png")

In [45]:
df = pd.DataFrame.from_dict(layout)
df['label'] = df['label'].astype(str)
fig = go.Figure()
a=0
b=len(df)

fig.add_trace(go.Scatter(
    x=df['x'].values[a:b],
    y=df['y'].values[a:b],
    mode="text",
    text=list(df['label'].values[a:b]),
    hovertext=df['hover'],
    hovertemplate = "%{text}<br>(%{hovertext})",
    textfont=dict(
        family="sans serif",
        size=14,
        color="black"
    )
))
# fig.update_traces(textposition='middle center', textfont_size=14)
# fig.update_layout(uniformtext_minsize=12, uniformtext_mode='hide')

scale=50
for idx, r in df.iterrows():
    x, y = ellipse(r['x'], r['y'], a=scale*r['width'] / 2, b=scale*r['height'] )
    fig.add_scatter(x=x, y=y, mode='lines', hoverinfo='skip')
for i in range(len(g.edges())):
    tmp_edge = g.edges()[i].attr['pos'].split(" ")
#     tmp_edge =[tmp_edge[0]] + [tmp_edge[3]]
    tmp_edge = np.array([x.split(',') for x in tmp_edge])
    fig.add_scatter(x=tmp_edge[:,0], y=tmp_edge[:,1], mode='lines', hoverinfo='skip')
fig.show()

In [46]:
df[df['label']=='\\N']

Unnamed: 0,label,x,y,height,width,hover
