In [1]:
import xnet
import json
import glob
import util

import numpy as np
import matplotlib.pyplot as plt
from collections import OrderedDict

from igraph import *

In [2]:
history_out_filename = 'data/authors_pac_out_2lvls_2multi.json'
history_in_filename = 'data/authors_pac_in_2lvls_2multi.json'
history_filename = 'data/authors_pac_2lvls_2multi.json'

In [3]:
data = xnet.xnet2igraph('data/citation_network_ge1985_pacs.xnet')

In [4]:
filenames = sorted(glob.glob('data/pacs/2lvls/*_delta4_multilevel2.xnet'))
pac_nets = []
for filename in filenames:
    net = xnet.xnet2igraph(filename)
    pac_nets.append(net)

In [14]:
for net in pac_nets:
    print(set(net.vs["community"]))

{'6', '5', '2', '7', '3', '1', '4'}
{'6', '5', '2', '7', '3', '1', '8', '4'}
{'6', '5', '2', '7', '3', '1', '8', '4'}
{'6', '5', '2', '7', '3', '1', '8', '4'}
{'6', '9', '5', '2', '7', '3', '1', '8', '4'}
{'10', '6', '9', '5', '2', '7', '3', '1', '8', '4'}
{'10', '6', '9', '5', '2', '7', '3', '1', '8', '4'}
{'10', '6', '9', '5', '2', '7', '3', '1', '8', '4'}
{'10', '6', '9', '5', '2', '7', '3', '1', '8', '4'}
{'10', '6', '9', '5', '2', '7', '3', '1', '8', '4'}
{'6', '9', '5', '2', '7', '3', '1', '8', '4'}
{'6', '9', '5', '2', '7', '3', '1', '8', '4'}
{'10', '6', '9', '5', '2', '7', '3', '1', '8', '4'}
{'6', '9', '5', '2', '7', '3', '1', '8', '4'}
{'6', '9', '5', '2', '7', '3', '1', '8', '4'}
{'6', '9', '5', '2', '7', '3', '1', '8', '4'}
{'6', '9', '5', '2', '7', '3', '1', '8', '4'}
{'6', '9', '5', '2', '7', '3', '1', '8', '4'}
{'10', '6', '9', '5', '2', '7', '3', '1', '8', '4'}
{'10', '6', '9', '5', '2', '7', '3', '1', '8', '4'}
{'10', '6', '9', '5', '2', '7', '3', '1', '8', '4'}


In [5]:
history_out = util.load(history_out_filename)
authors_out_div = util.authors_div(history_out) # diversidade das publicações citadas (author out)
history_in = util.load(history_in_filename)
authors_in_div = util.authors_div(history_in) # diversidade das publicações que citam o autor (author in)
valid_authors_by_area = util.get_area(data,pac_nets,util.get_pacs_paper_published,4)

current year 1986
current year 1987
current year 1988
current year 1989
current year 1990
current year 1991
current year 1992
current year 1993
current year 1994
current year 1995
current year 1996
current year 1997
current year 1998
current year 1999
current year 2000
current year 2001
current year 2002
current year 2003
current year 2004
current year 2005
current year 2006


In [6]:
def get_citations(data,year_begin,year_end,delta,delta_key):
    history = defaultdict(lambda:defaultdict(lambda:0))

    for i,year in enumerate(range(year_begin,year_end+1)):
#         print("current year %d" % year)
        subset = data.vs.select(year_ge=year,year_le=year+delta)
        
        for paper in subset:
            neighbors = data.neighbors(paper,mode=OUT)
            for vtx_idx in neighbors:
                vtx = data.vs[vtx_idx]
                authors_idxs = vtx['authors_idxs'].split(',')
                for a in authors_idxs:
                    history[year+delta_key][a] += 1
    
    return history

In [7]:
future_citations = get_citations(data,1990,2006,1,0)
past_citations = get_citations(data,1988,2004,1,2)

In [8]:
top_min25 = util.load('authors_ranking_min25_by_year.json')

In [9]:
def plot_future_past_div(past_citations,future_citations,authors_in_div,top_min25,cmap):
    for year,past in past_citations.items():
        future = future_citations[year]
        print(year)
        
        plt.figure(figsize=(4,3))
        freq = list(past.values())
        bin_edges = np.logspace(0, np.log10(max(freq)),10)
        hist0,bins0,_ = plt.hist(freq,bins=bin_edges)
        plt.title(year)
        plt.xlabel('$\Delta K_{P}$ - citações do passado (2 anos)')
        plt.ylabel('número de autores')
        plt.xscale("log")
        plt.yscale("log")
        plt.tight_layout()
        plt.savefig("hist_delta_K_P_%s.pdf" % year)
        plt.clf()

        authors_by_bin = defaultdict(lambda:[])
        for a,c in past.items():
            idx = np.searchsorted(bin_edges,c)
            if idx == 0:
                idx = 1
            elif idx == len(bin_edges):
                idx -= 1
            authors_by_bin[idx].append(a)

        N = len(bins0)
        f, axs = plt.subplots(N//3, 3, sharex=True, sharey=True, figsize=((N//3)*3,9))
        f.suptitle(year)
        i = 0
        to_plot = True
        IDXS = sorted(list(authors_by_bin.keys()))
        for idx in IDXS:
            group = authors_by_bin[idx]
            X = []
            Y = []
            for a in group:
                if a not in top_min25[str(year)]:
                    continue
                try:
                    x = authors_in_div[a][str(year-1)]
                    y = future[a]
                    X.append(x)
                    Y.append(y)
                except:
                    pass
            if len(X) == 0:
                to_plot = False
                plt.clf()
                continue
            axs[i//3,i%3].scatter(X,Y,alpha=0.6,c=[cmap(idx/N)]*len(X))
            axs[i//3,i%3].set_title('%.2f $ < \Delta K_{P} \leq $ %.2f' % (bins0[idx-1],bins0[idx]))
            i+=1
        if to_plot:
            f.text(0.5, 0.04, '$\Delta d$ - diversidade (5 anos do passados)', ha='center')
            f.text(0.04, 0.5, '$\Delta K_{F}$ - citações do futuro (2 anos)', va='center', rotation='vertical')
            plt.savefig("scatter_delta_d_delta_K_F_%s.pdf"%year)
            plt.clf()

In [10]:
cmaps = OrderedDict()
cmaps['Cyclic'] = ['twilight', 'twilight_shifted', 'hsv']
cmap = plt.get_cmap(cmaps['Cyclic'][-1])

In [11]:
plot_future_past_div(past_citations,future_citations,authors_in_div,top_min25,cmap)

1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000


  


2001


  


2002


  


2003


  


2004


  


2005


  


2006


  


<Figure size 288x216 with 0 Axes>

<Figure size 648x648 with 0 Axes>

<Figure size 288x216 with 0 Axes>

<Figure size 648x648 with 0 Axes>

<Figure size 288x216 with 0 Axes>

<Figure size 648x648 with 0 Axes>

<Figure size 288x216 with 0 Axes>

<Figure size 648x648 with 0 Axes>

<Figure size 288x216 with 0 Axes>

<Figure size 648x648 with 0 Axes>

<Figure size 288x216 with 0 Axes>

<Figure size 648x648 with 0 Axes>

<Figure size 288x216 with 0 Axes>

<Figure size 648x648 with 0 Axes>

<Figure size 288x216 with 0 Axes>

<Figure size 648x648 with 0 Axes>

<Figure size 288x216 with 0 Axes>

<Figure size 648x648 with 0 Axes>

<Figure size 288x216 with 0 Axes>

<Figure size 648x648 with 0 Axes>

<Figure size 288x216 with 0 Axes>

<Figure size 648x648 with 0 Axes>

<Figure size 288x216 with 0 Axes>

<Figure size 648x648 with 0 Axes>

<Figure size 288x216 with 0 Axes>

<Figure size 648x648 with 0 Axes>

<Figure size 288x216 with 0 Axes>

<Figure size 648x648 with 0 Axes>

<Figure size 288x216 with 0 Axes>

<Figure size 648x648 with 0 Axes>

<Figure size 288x216 with 0 Axes>

<Figure size 648x648 with 0 Axes>

<Figure size 288x216 with 0 Axes>

<Figure size 648x648 with 0 Axes>