## Analysis of semantic networks generated from word2vec, GloVe and USF Norms datasets

In [1]:
import pandas as pd
import networkx as nx
import numpy as np
import os
import locale
import glob

from matplotlib.pyplot import *

pd.set_option('display.float_format', 
              lambda x: locale.format('%.0f', x, grouping=True) if abs(x)>1000 else locale.format('%.2f', x))

In [3]:
results_all = pd.concat([
    pd.read_csv('../semnet_compare/results/undirected_results.csv', index_col=0),
    pd.read_csv('../semnet_compare/results/directed_results.csv', index_col=0)],
    axis=1).T

In [4]:
to_rename = {
    'aspl': 'L',  # Average shortest path length
    'avg_degree': '<k>', # Average node degree
    'cc' : 'C', # clustering coefficient
    'connectedness': 'C_k', # connectedness (% of nodes in the largest connected component)
    'diameter': 'D', 
    'n_edges': 'm',
    'n_nodes': 'n',
    'rnd_aspl': 'L_{rnd}',
    'rnd_cc':'C_{rnd}',
}
results_all = results_all.rename(columns=to_rename)

# convert to integers relevant fields
locale.setlocale(locale.LC_ALL, '')

results_all['D'] = results_all['D'].map(lambda x: np.int(x))
    
to_prec3 = ['C', 'C_{rnd}']
for col in to_prec3:
    results_all[col] = results_all[col].map(lambda x: "%.3f"%x)

results_all['<k>'] = results_all['<k>'].map(lambda x: "%.1f"%x)

results_all.columns=['$' + col +'$'for col in results_all.columns]
results_all

Unnamed: 0,$L$,$<k>$,$C$,$C_k$,$D$,$m$,$n$,$L_{rnd}$,$C_{rnd}$,$s$
USF undirected,3.04,22.0,0.186,100.0,5,55236,5018,3.03,0.004,0.44
word2vec-cos,4.24,21.3,0.325,99.84,12,52317,4902,3.05,0.004,0.44
word2vec-dot,4.19,22.1,0.356,96.35,13,44442,4022,2.92,0.006,0.55
glove-cos,4.61,22.1,0.373,98.88,12,51244,4632,2.98,0.005,0.48
glove-dot,4.28,22.0,0.395,98.16,15,46534,4229,2.96,0.005,0.52
USF directed,4.26,12.7,0.187,96.51,10,63619,5018,3.62,0.005,0.25
word2vec-cs-cos,4.79,12.6,0.238,99.32,11,62764,4977,3.64,0.005,0.25
word2vec-cs-dot,4.93,12.6,0.248,93.55,16,62775,4977,3.59,0.006,0.25
word2vec-knn-cos,4.77,12.7,0.232,99.32,12,63165,4977,3.63,0.005,0.26
word2vec-knn-dot,5.11,12.7,0.234,96.08,15,63165,4977,3.62,0.005,0.26


In [7]:
results = results_all.loc[filter(lambda x: 'USF' in x or 'cos' in x, results_all.index)]
results.index = pd.Index(map(lambda x: x if 'USF' in x else x[:x.find('-cos')], results.index))
results

Unnamed: 0,$L$,$<k>$,$C$,$C_k$,$D$,$m$,$n$,$L_{rnd}$,$C_{rnd}$,$s$
USF undirected,3.04,22.0,0.186,100.0,5,55236,5018,3.03,0.004,0.44
word2vec,4.24,21.3,0.325,99.84,12,52317,4902,3.05,0.004,0.44
glove,4.61,22.1,0.373,98.88,12,51244,4632,2.98,0.005,0.48
USF directed,4.26,12.7,0.187,96.51,10,63619,5018,3.62,0.005,0.25
word2vec-cs,4.79,12.6,0.238,99.32,11,62764,4977,3.64,0.005,0.25
word2vec-knn,4.77,12.7,0.232,99.32,12,63165,4977,3.63,0.005,0.26
glove-cs,5.05,12.4,0.266,97.07,13,61619,4988,3.64,0.005,0.25
glove-knn,5.03,12.7,0.259,97.91,13,63262,4988,3.62,0.005,0.25
