In [1]:
import sys, os
import networkx as nx
import pandas as pd
import matplotlib.pyplot as plt
import pygraphviz as pgv
from pygraphviz import *
import numpy as np
import plotly.graph_objects as go

In [26]:
import plotly.io as pio
pio.renderers.default = "browser"

In [63]:
try:
    from nltk.corpus import wordnet
except:
    import nltk
    nltk.download('wordnet')
from nltk.corpus import wordnet

In [279]:
def ellipse(x_center=0, y_center=0, ax1=[1, 0], ax2=[0, 1], a=1, b=1, N=100):
    # x_center, y_center the coordinates of ellipse center
    # ax1 ax2 two orthonormal vectors representing the ellipse axis directions
    # a, b the ellipse parameters
    if np.linalg.norm(ax1) != 1 or np.linalg.norm(ax2) != 1:
        raise ValueError('ax1, ax2 must be unit vectors')
    if abs(np.dot(ax1, ax2)) > 1e-06:
        raise ValueError('ax1, ax2 must be orthogonal vectors')
    t = np.linspace(0, 2 * np.pi, N)
    # ellipse parameterization with respect to a system of axes of directions a1, a2
    xs = a * np.cos(t)
    ys = b * np.sin(t)
    # rotation matrix
    R = np.array([ax1, ax2]).T
    # coordinate of the  ellipse points with respect to the system of axes [1, 0], [0,1] with origin (0,0)
    xp, yp = np.dot(R, [xs, ys])
    x = xp + x_center
    y = yp + y_center
    return x, y

In [280]:
def isindict(w, d):
    for key in d.keys():
        for w2 in d[key]:
            if w.strip() == w2.strip():
                return key

In [281]:
df = pd.read_csv("word_data_detailed.csv", engine='python')
df = df.fillna('')
df['word'] = df['word'].map(lambda x: x.strip())
df = df.set_index('word')


In [282]:
groups = open("groups.txt")
groups_dict = {}
for line in groups.readlines():
    group_name = 'n'+str(len(groups_dict))
    
    groups_dict[group_name] = set()
    for word in line.strip().split():
        groups_dict[group_name].add(word)

# refine groups
for w in df.index:
    try:
        df.loc[w]['synonyms'].split(',')
    except:
        print(w)
        continue
    for s in df.loc[w]['synonyms'].split(','):
        s = s.strip()
        gs_key = isindict(s, groups_dict)
        
        if gs_key is not None: # s in group. add w to that group
            groups_dict[gs_key].add(w)
            print(w,s, gs_key)
    
    # s not in group dict
    for s in df.loc[w]['synonyms'].split(','):
        
        for w2 in df.index:
            if s == w2: # s in word list
                if w == w2:
                    continue
                gw_key = isindict(w, groups_dict)
                gs_key = isindict(s, groups_dict)
                if gw_key == gs_key == None:
                    print(w,w2,s, gw_key)
                    group_name = 'n'+str(len(groups_dict))
                    g.add_node(group_name, label=group_name)
                    groups_dict[group_name] = set([w, w2])
                    break

abound abound n35
austere spartan spartan None
capricious whimsical whimsical None
congenial congenial n1
conspicuous conspicuous n2
cursory cursory n10
cursory perfunctory n25
disseminate diffuse diffuse None
feasible viable viable None
humdrum humdrum n3
humdrum prosaic n3
insipid insipid n4
loquacious loquacious n5
loquacious garrulous n5
placate placate n15
placate assuage n14
placate appease n15
proclivity propensity propensity None
spendthrift spendthrift n6
spendthrift extravagant n31
spendthrift prodigal n6
taciturn taciturn n7
wary wary n8
adulterate adulterate n11
advocate advocate n12
aggrandize embellish embellish None
alacrity alacrity n13
ameliorate ameliorate n14
amenable tractable tractable None
audacious brazen n19
audacious intrepid n19
audacious audacious n19
banal banal n3
banal hackneyed n3
benign benign n15
brazen brazen n19
brazen audacious n19
calumny slander slander None
castigate castigate n16
castigate chastise n16
caustic mordant mordant None
convoluted tort

libertine profligate n6
manifest manifest n97
manifest patent n97
slander calumny n48
slander slander n48
terse laconic n86
terse terse n86
universal cosmopolitan n54
universal universal n54
animosity animosity n89
animosity animus n89
enthrall enthrall n92
enthrall enchant n92
fledgling neophyte n65
fledgling fledgling n65
render yield yield None
stinting scant scant None
heterodox dissident n100
heterodox heterodox n100
fervent fervid n13
fervent fervent n13
temper mollify mollify None
painstaking scrupulous n0
exonerate exculpate n77
exonerate exonerate n77
propensity propensity n45
propensity proclivity n45
embellish embellish n46
embellish aggrandize n46
arresting arresting n94
arresting sensational n94
obstinate obstinate n90
obstinate wayward n90
chastise castigate n16
chastise chastise n16
profound profound n98
profound sound n98
propagate disseminate n43
propagate diffuse n43
propagate propagate n43
curb inhibit n96
curb curb n96
curb bridle n96
precipitous precipitate n57
pre

In [283]:
groups_dict

{'n0': {'exacting',
  'exigent',
  'fastidious',
  'meticulous',
  'painstaking',
  'pedantic',
  'polemical',
  'pristine',
  'punctilious',
  'scrupulous'},
 'n1': {'congenial', 'convivial'},
 'n2': {'accentuate', 'conspicuous', 'disparate', 'flamboyant', 'florid'},
 'n3': {'banal',
  'hackneyed',
  'humdrum',
  'monotonous',
  'mundane',
  'pedestrian',
  'plodding',
  'prosaic',
  'soporific'},
 'n4': {'innocuous', 'insipid', 'vapid'},
 'n5': {'cacophonous',
  'clamorous',
  'garrulous',
  'loquacious',
  'pugnacious',
  'truculent',
  'vociferous'},
 'n6': {'extravagant',
  'libertine',
  'parsimonious',
  'prodigal',
  'profligate',
  'spendthrift'},
 'n7': {'aloof', 'diffident', 'distant', 'taciturn', 'timorous'},
 'n8': {'circumspect', 'discreet', 'wary'},
 'n9': {'byzantine', 'convoluted', 'tortuous'},
 'n10': {'cursory', 'perfunctory', 'superficial', 'trivial'},
 'n11': {'adulterate', 'impair', 'subvert', 'vitiate'},
 'n12': {'advocate', 'corroborate'},
 'n13': {'alacrity', '

In [284]:
g = pgv.AGraph()
for w in df.index:
    g.add_node(w, label=w)        
for key in groups_dict:
    g.add_node(key, label=key)
    for w in groups_dict[key]:
        g.add_edge(w, key)
for n in g.nodes():
    if n.attr['label'] == '\\N':
        print(n)
g.layout()

In [285]:
df

Unnamed: 0_level_0,meaning,synonyms,antonyms,examples,definitions
word,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
abound,large numbers or amounts,"abound,bristle,burst",,None\n,be abundant or plentiful; exist in large quant...
amorphous,without a clearly defined shape or form,"amorphous,uncrystallized,uncrystallised,unstru...",,1. amorphous clouds of insects \n2. an aggrega...,having no definite form or distinct shape
austere,"strict in manner, having no comforts or luxuries","spartan,austere,stark,ascetical,ascetic,severe...",,1. a stark interior \n,severely simple
belie,"fail to give true impression of something, con...","negate,contradict,misrepresent,belie",,None\n,be in contradiction with
capricious,"sudden change of mood, unpredictable, fickle","capricious,whimsical,freakish,impulsive",,1. a capricious summer breeze \n2. freakish we...,changeable
...,...,...,...,...,...
squander,waste in a reckless manner,"waste,squander,consume,ware,blow",conserve,1. He wasted his inheritance on his insincere ...,spend thoughtlessly; throw away
cajole,persuade to do something by flattery,"wheedle,coax,sweet-talk,cajole,inveigle,blarne...",,1. He palavered her into going along \n,"influence or urge by gentle urging, caressing,..."
unalloyed,pure/ complete and unreserved,unalloyed,,1. unalloyed metal \n2. unalloyed pleasure \n,free from admixture
heady,intoxicating / having a strong or exhilarating...,"foolhardy,intoxicating,wise,rash,heady,judicio...",,1. judicious use of one's money \n2. a wise de...,marked by the exercise of good judgment or com...


In [286]:
layout = []
for n in g.nodes():
    w = g.get_node(n).attr['label'].strip()
    tmp_attr = {'label': w}
    tmp_attr['x'] = float(g.get_node(n).attr['pos'].split(",")[0])
    tmp_attr['y'] = float(g.get_node(n).attr['pos'].split(",")[1])
    tmp_attr['height'] = float(g.get_node(n).attr['height'])
    tmp_attr['width'] = float(g.get_node(n).attr['width'])

    if tmp_attr['label'] in df.index.values:
        tmp_attr['hover'] = "meaning#1: " + df.loc[w]['meaning']
        tmp_attr['hover'] += "\nmeaning#2: " + df.loc[w]['definitions']
        tmp_attr['hover'] += "\nsynonyms: " + df.loc[w]['synonyms']
        tmp_attr['hover'] += "\nantonyms: " + df.loc[w]['antonyms']
        tmp_attr['hover'] += "\nexamples: " + df.loc[w]['examples']
    else:
        tmp_attr['hover'] = ""
    layout.append(tmp_attr)
df2 = pd.DataFrame.from_dict(layout)
df2['label'] = df2['label'].astype(str)
df2 =df2.set_index('label')
g.draw("file.png")

In [287]:
df3 = df.join(df2, how='outer')

In [296]:
fig = go.Figure()
a=0
b=len(df3)
df3['examples'] = df3['examples'].fillna('').map(lambda x: x.replace('\n','<br>'))
fig.add_trace(go.Scatter(
    x=df3['x'].values[a:b],
    y=df3['y'].values[a:b],
    mode="text",
    text=list(df3.index.values[a:b]),
    customdata=df3[['meaning', 'definitions', 'synonyms','antonyms','examples']].values,
#     hovertext=df3['hover'],
    hovertemplate = "%{text}<br>meaning#1:%{customdata[0]}<br>meaning#2:%{customdata[1]}<br>synonyms:%{customdata[2]}<br>antonyms:%{customdata[3]}<br>examples:<br>%{customdata[4]}",
    textfont=dict(
        family="sans serif",
        size=14,
        color="black"
    ),
    showlegend=False,
))
# fig.update_traces(textposition='middle center', textfont_size=14)
# fig.update_layout(uniformtext_minsize=12, uniformtext_mode='hide')

scale=50
for idx, r in df3.iterrows():
    x, y = ellipse(r['x'], r['y'], a=scale*r['width'] / 2, b=scale*r['height'] )
    fig.add_scatter(x=x, y=y, mode='lines', hoverinfo='skip',showlegend=False)
for i in range(len(g.edges())):
    node1, node2 = g.get_node(g.edges()[i][0]),g.get_node(g.edges()[i][1])
    tmp_edge = np.array([list(map(float,node1.attr['pos'].split(','))), list(map(float,node2.attr['pos'].split(',')))])
    
#     tmp_edge = g.edges()[i].attr['pos'].split(" ")
# #     tmp_edge =[tmp_edge[0]] + [tmp_edge[3]]
#     tmp_edge = np.array([x.split(',') for x in tmp_edge])
    fig.add_scatter(x=tmp_edge[:,0], y=tmp_edge[:,1], mode='lines', hoverinfo='skip', showlegend=False)
fig.write_html('output.html', auto_open=True )
# fig.show()

In [297]:
df3[df3.index=='\\N']

Unnamed: 0,meaning,synonyms,antonyms,examples,definitions,x,y,height,width,hover


## Add details to word list

In [218]:
from PyDictionary import PyDictionary
dictionary=PyDictionary()
print (dictionary.meaning("indentation"))
# dictionary.synonym("everyone") # mow working

{'Noun': ['a concave cut into a surface or edge (as in a coastline', 'the formation of small pits in a surface as a consequence of corrosion', 'the space left between the margin and the start of an indented line', 'the act of cutting into an edge with toothlike notches or angular incisions']}


In [219]:
_df = pd.read_csv("word_data.csv", engine='python')
_df

Unnamed: 0,word,meaning,synonyms
0,abound,large numbers or amounts,
1,amorphous,without a clearly defined shape or form,
2,austere,"strict in manner, having no comforts or luxuries",
3,belie,"fail to give true impression of something, con...",
4,capricious,"sudden change of mood, unpredictable, fickle",
...,...,...,...
803,squander,waste in a reckless manner,
804,cajole,persuade to do something by flattery,
805,unalloyed,pure/ complete and unreserved,
806,heady,intoxicating / having a strong or exhilarating...,


In [220]:
syns[0].lemmas()[0].name()

'clamant'

In [221]:
synonyms = []
antonyms = []
examples = []
definitions = []
for word in _df['word']:
    word = word.strip()
    syns = wordnet.synsets(word)
#     print(word, end=' ')
#     print(syns[0].lemmas()[0].name())
    _examples = syns[0].examples()
    _str_examples = ""
    for idx,ex in enumerate(_examples):
        _str_examples += str(idx+1)+". "+ex+" \n"
    if len(_examples) == 0:
        _str_examples += "None\n"
    examples.append(_str_examples)
    definitions.append(syns[0].definition())
    
    _synonyms = []
    _antonyms = []
    for syn in wordnet.synsets(word):
        for l in syn.lemmas():
            _synonyms.append(l.name())
            if l.antonyms():
                _antonyms.append(l.antonyms()[0].name())
    synonyms.append(','.join(set(_synonyms)))
    antonyms.append(','.join(set(_antonyms)))
_df['synonyms'] = synonyms
_df['antonyms'] = antonyms
_df['examples'] = examples
_df['definitions'] = definitions


In [222]:
_df

Unnamed: 0,word,meaning,synonyms,antonyms,examples,definitions
0,abound,large numbers or amounts,"abound,bristle,burst",,None\n,be abundant or plentiful; exist in large quant...
1,amorphous,without a clearly defined shape or form,"amorphous,uncrystallized,uncrystallised,unstru...",,1. amorphous clouds of insects \n2. an aggrega...,having no definite form or distinct shape
2,austere,"strict in manner, having no comforts or luxuries","spartan,austere,stark,ascetical,ascetic,severe...",,1. a stark interior \n,severely simple
3,belie,"fail to give true impression of something, con...","negate,contradict,misrepresent,belie",,None\n,be in contradiction with
4,capricious,"sudden change of mood, unpredictable, fickle","capricious,whimsical,freakish,impulsive",,1. a capricious summer breeze \n2. freakish we...,changeable
...,...,...,...,...,...,...
803,squander,waste in a reckless manner,"waste,squander,consume,ware,blow",conserve,1. He wasted his inheritance on his insincere ...,spend thoughtlessly; throw away
804,cajole,persuade to do something by flattery,"wheedle,coax,sweet-talk,cajole,inveigle,blarne...",,1. He palavered her into going along \n,"influence or urge by gentle urging, caressing,..."
805,unalloyed,pure/ complete and unreserved,unalloyed,,1. unalloyed metal \n2. unalloyed pleasure \n,free from admixture
806,heady,intoxicating / having a strong or exhilarating...,"foolhardy,intoxicating,wise,rash,heady,judicio...",,1. judicious use of one's money \n2. a wise de...,marked by the exercise of good judgment or com...


In [223]:
_df.to_csv('word_data_detailed.csv', index=False)

In [234]:
_df.loc['esoteric']

KeyError: 'esoteric'