In [1]:
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
from network import *

In [3]:
from collections import Counter
import math
from scipy.stats import power_divergence


In [None]:
def plot_xy_line(x, y):
    plt.plot(x, y)
    plt.show()

In [None]:
def plot_xy_dot(x, y):
    plt.plot(x, y, '.', markersize=2)
    plt.show()

In [None]:
def plot_xy_point(x, y):
    plt.plot(x, y, 'o', markersize=3, alpha=0.5)
    plt.show()

In [None]:
def convert(data, do):
    if do:
        return [math.log(x + 1) for x in data]
    else:
        return data

In [None]:
def plot_pairs(pairs, plot_foo, log = False):
    val = [x[1] for x in pairs]
    plot_foo([x[0] for x in pairs], convert(val, log) )
    print(power_divergence(val))

In [None]:
def plor_rank(y, plot_foo, log = False):
    val = list(range(0,len(y)))
    plot_foo(val, convert(y, log) )
    print(power_divergence(y))

In [None]:
def plot_dict(stat, plot_foo, log = False):
    plot_foo([x for x in stat.keys()], convert([x for x in stat.values()], log) )


In [None]:
def plot_labels(xlabel, ylabel, title):
    plt.xlabel(xlabel)
    plt.ylabel(ylabel)
    plt.title(title)

In [4]:
coauthorNetwork = CoauthorNetwork.load_with_loader("./data/out_test.txt", parse_dataset_file);


Uniq authors: 42619


In [None]:
def neighbour_rank(graph, log = False):
    vals = sorted([len(graph.neighbors(author)) for author in graph.nodes()], reverse=True)
    plor_rank(vals, plot_xy_dot, log=log)

plot_labels("Author rank", "Num. of coauthors", "Coauthors count graph")
neighbour_rank(coauthorNetwork.gr)

plot_labels("Author rank", "Num. of cite (log)", "Cite count graph")
neighbour_rank(coauthorNetwork.cgr, log=True)

In [None]:
def neighbour_dist(graph, log = False):
    d = sorted(Counter([len(graph.neighbors(author)) for author in graph.nodes()]).items(), reverse=True)
    plot_pairs(d, plot_xy_line, log)

plot_labels("Num. of coauthors", "Count of authors", "Num. of coauthors distribution")
neighbour_dist(coauthorNetwork.gr)

plot_labels("Num. of cite", "Count of authors(log)", "Num. of cite distribution")
neighbour_dist(coauthorNetwork.cgr, log=True)


In [None]:
def in_degree_rank(graph, log=False):
    vals = sorted([graph.in_degree(author) for author in graph.nodes()], reverse=True)
    plor_rank(vals, plot_xy_line,log=log)

def in_degree_dist(graph, log=False):
    d = sorted(Counter([graph.in_degree(author) for author in graph.nodes()]).items(), reverse=True)
    plot_pairs(d, plot_xy_line, log=log)
    

plot_labels("Author rank", "Num. of citation from others", "Cited count graph")
in_degree_rank(coauthorNetwork.cgr)

plot_labels("Num. of citation from others", "Count of authors(log)", "Num. of cited distribution")
in_degree_dist(coauthorNetwork.cgr,log=True)


In [None]:
def coauth_cite_plot(network):
    p = [(network.cgr.in_degree(author), len(network.cgr.neighbors(author)))
     for author in network.author_to_article.keys()]
    plot_pairs(p, plot_xy_point)

plot_labels("Num. of citation from others", "Number of coauthors", "Citation-coauthor distribution")
coauth_cite_plot(coauthorNetwork)

In [None]:
def pub_rank(network, log = False):
    r = sorted([len(x) for x in network.author_to_article.values()], reverse=True)
    plor_rank(r, plot_xy_dot, log = log)

def pub_dist(network, log = False):
    r = sorted(Counter([len(x) for x in network.author_to_article.values()]).items(), reverse=True)
    plot_pairs(r, plot_xy_line, log = log)
    
plot_labels("Author rank", "Number of publications", "Publication count graph")
pub_rank(coauthorNetwork)


plot_labels("Count of publications", "Number of authors(log)", "Publication distribution graph")
pub_dist(coauthorNetwork, log = True)

In [None]:
def coathorship_rank(network, log = False):
    r = sorted([x for x in network.coauth_count.values()], reverse=True)
    plor_rank(r, plot_xy_dot, log=log)
    
def coathorship_dist(network, log = False):
    r = sorted(Counter([x for x in network.coauth_count.values()]).items(), reverse=True)
    plot_pairs(r, plot_xy_line, log=log)
    
plot_labels("Author couple rank", "Number of publications", "Publications of author duet")
coathorship_rank(coauthorNetwork)


plot_labels("Count of cooperative pulications", "Number of authors(log)", "Publications of author duet distribution")
coathorship_dist(coauthorNetwork, log = True)
    

In [None]:
def publication_year_dist(network, log=False):
    p = []
    for x in network.author_year.values():
        for y in x:
            if y > 1000:
                p.append(y)
    p = sorted(Counter(p).items(), key= lambda x: x[0], reverse=False)
    
    plot_pairs(p, plot_xy_line, log=log)

plot_labels("Year", "Number of publication(log)", "Publication distribution over years")

publication_year_dist(coauthorNetwork, log=True)