In [None]:
import urllib.request
import gensim.models as gnm
import networkx as nx
import os
from networkx.algorithms import community
%matplotlib inline
import matplotlib.pyplot as plt
from matplotlib import style
style.use('ggplot')

In [None]:
def take_model():
    folder_list = os.listdir()
    m = "ruscorpora_mystem_cbow_300_2_2015.bin.gz"
    url = "http://rusvectores.org/static/models/" \
    "rusvectores2/"
    url += m
    if m not in folder_list:
        urllib.request.urlretrieve(url, m)
    if m.endswith('.vec.gz'):
        model = gnm.KeyedVectors.load_word2vec_format(m, binary=False)
    elif m.endswith('.bin.gz'):
        model = gnm.KeyedVectors.load_word2vec_format(m, binary=True)
    else:
        model = gnm.KeyedVectors.load(m)
    return model

In [None]:
def boundwords(model, word, stops):
    found = []
    for i in model.most_similar(positive=[word], topn=15):
        if i[1] >= 0.5 and i[0].endswith('_S'):
            if i[0] not in stops:
                edge = (word, i[0], i[1])
                found.append(edge)
    return found

In [None]:
def semantic_net(model):
    keys = ['коммунизм_S', 'социализм_S']
    edges = []
    for word in keys:
        b = boundwords(model, word, keys)
        edges.extend(b)
    stage1 = edges.copy()
    for tup in stage1:
        word = tup[1]
        keys.append(word)
        b = boundwords(model, word, keys)
        edges.extend(b)
    return edges

In [None]:
def words_only(found):
    w = []
    for tup in found:
        for item in tup:
            if not isinstance(item, float):
                cand = item
                if cand not in w:
                    w.append(cand)
    return w

In [None]:
def semantic_frame():
    model = take_model()
    edges = semantic_net(model)
    words = words_only(edges)
    return edges, words

In [None]:
edges, words = semantic_frame()

In [None]:
def makeG(edges, words):
    G = nx.Graph()
    for w in words:
        G.add_node(w[:-2])
    for tup in edges:
        n0 = tup[0][:-2]
        n1 = tup[1][:-2]
        cosin = tup[2]
        G.add_edge(n0, n1, weight=cosin)
    return G

In [None]:
def drawG(G):
    font = 'Helvetica'
    pos=nx.spring_layout(G)
    nx.draw_networkx_nodes(G, pos, node_color='red', node_size=20) 
    nx.draw_networkx_edges(G, pos, edge_color='yellow')
    nx.draw_networkx_labels(G, pos, font_size=45, font_family=font)
    plt.rcParams["figure.figsize"] = (80, 60)
    plt.axis('off') 
    plt.show()

In [None]:
def paragraph4(G):
    m = 'Центральность:\n'
    print(m)
    cen = ['degree', 'closeness', 'betweenness', 'eigen']
    wins = []
    c = 'centrality'
    deg = nx.degree_centrality(G)
    p = sorted(deg, key=deg.get, reverse=True)[0]
    wins.append(p)
    deg = nx.closeness_centrality(G)
    p = sorted(deg, key=deg.get, reverse=True)[0]
    wins.append(p)
    deg = nx.betweenness_centrality(G)
    p = sorted(deg, key=deg.get, reverse=True)[0]
    wins.append(p)
    deg = nx.eigenvector_centrality(G)
    p = sorted(deg, key=deg.get, reverse=True)[0]
    wins.append(p)
    for i in range(0, 4):
        m = cen[i] + ' ' + c
        m += ':\t' + wins[i]
        print(m)

In [None]:
def paragraph5(G):
    m = 'Параметры графа:\n'
    print(m)
    cen = ['density', 'diameter', 'radius', 
           'clustering', 'assortativity']
    wins = []
    a = nx.density(G)
    wins.append(a)
    a = nx.diameter(G)
    wins.append(a)
    a = nx.radius(G)
    wins.append(a)
    a = nx.average_clustering(G)
    wins.append(a)
    a = nx.degree_pearson_correlation_coefficient(G)
    wins.append(a)
    for i in range(0, 5):
        m = cen[i] + ':\t' + str(wins[i])
        print(m)

In [None]:
def whole_graph(edges, words):
    G = makeG(edges, words)
    drawG(G)
    paragraph4(G)
    paragraph5(G)
    return G

In [None]:
G = whole_graph(edges, words)

In [None]:
def communities(G):
    c = community.greedy_modularity_communities(G)
    for i, com in enumerate(c, start=1):
        print(i, ' ', list(com))

In [None]:
communities(G)

В графе выделено 8 сообществ. Слова в сообществе 1 имеют большее отношение к октябрьскому перевороту 1917 года; сообщество 2 связано с формами правления или же номинальными агентами власти при разных формах. Сообщество 3 содержит себе слова, которые являются обобщающими для большинства остальных слов. Сообщество 5 связано с теориями, не получившими практического воплощения (и слово "атеизм"). Слова из сообщества 6 показывают направления общественно-политической мысли в Европе XIX века. Сообщество 7 содержит разные типы общественных формаций (в марксистской терминологии).
Сообщества 4 и 8, как мне кажется, образованы по остаточному признаку.