## Create a graph of publications from a bibtex file

### Extract info from a  bibtex document (optionally, convert it to json)

In [1]:
# Extract thge text but with latex encoding for the accents:

#import bibtexparser
#with open('BenjaminRicaud.bib') as bibtex_file:
#    bibtex_str = bibtex_file.read()

#bib_database = bibtexparser.loads(bibtex_str)
#print(bib_database.entries)

In [2]:
import bibtexparser
from bibtexparser.bparser import BibTexParser
from bibtexparser.customization import homogeneize_latex_encoding

In [3]:
from bibtexparser.customization import convert_to_unicode

with open('BenjaminRicaud.bib') as bibtex_file:
    parser = BibTexParser()
    parser.customization = convert_to_unicode
    bib_database = bibtexparser.load(bibtex_file, parser=parser)
    print(bib_database.entries)

[{u'author': u'Shuman, David I and Ricaud, Benjamin and Vandergheynst, Pierre', u'booktitle': u'2012 IEEE Statistical Signal Processing Workshop (SSP)', u'title': u'A windowed graph Fourier transform', 'ENTRYTYPE': u'inproceedings', u'note': u'EPFL', u'year': u'2012', u'organization': u'Ieee', 'ID': 'shuman2012windowed', u'pages': u'133--136'}, {u'publisher': u'Elsevier', u'note': u'EPFL', u'author': u'Shuman, David I and Ricaud, Benjamin and Vandergheynst, Pierre', u'journal': u'Applied and Computational Harmonic Analysis', u'title': u'Vertex-frequency analysis on graphs', u'number': u'2', 'ENTRYTYPE': u'article', u'volume': u'40', u'year': u'2016', 'ID': 'shuman2016vertex', u'pages': u'260--291'}, {u'publisher': u'AIP Publishing', u'note': u'CPT-CEA', u'author': u'Clairet, F and Ricaud, B and Briolle, F and Heuraux, S and Bottereau, C', u'journal': u'Review of Scientific Instruments', u'title': u'New signal processing technique for density profile reconstruction using reflectometry',

In [4]:
import json
with open('BenjaminRicaud.json', 'w') as f:
     json.dump(bib_database.entries, f)

### Creating the graph

In [5]:
import networkx as nx

#### Add nodes

In [6]:
# Add publications as nodes
G=nx.Graph()
for node in bib_database.entries:
    G.add_node(node['ID'])

In [7]:
# Add the infos about the publication as node properties
for node in bib_database.entries:
    for key in node.keys():
        G.node[node['ID']][key]=node[key]

In [8]:
# Example:
G.node[node['ID']]

{'ENTRYTYPE': u'inproceedings',
 'ID': 'ricaud2011detection',
 u'author': u'Ricaud, Benjamin and Briolle, Francoise and Leoncini, Xavier',
 u'booktitle': u'Chaos, Complexity and Transport',
 u'note': u'CPT-CEA',
 u'organization': u'world scientific',
 u'title': u'Detection and Characterization of L\xe9vy flights in chaotic advection phenomena',
 u'year': u'2011'}

#### Add edges

In [9]:
for node in bib_database.entries:
    print node['author']

Shuman, David I and Ricaud, Benjamin and Vandergheynst, Pierre
Shuman, David I and Ricaud, Benjamin and Vandergheynst, Pierre
Clairet, F and Ricaud, B and Briolle, F and Heuraux, S and Bottereau, C
Cornean, Horia D and Duclos, Pierre and Ricaud, Benjamin
Ricaud, Benjamin and Guillemain, Philippe and Kergomard, Jean and Silva, Fabrice and Vergez, Christophe
Ricaud, Benjamin and Torrésani, Bruno
Cornean, Horia D and Duclos, Pierre and Ricaud, Benjamin
Ricaud, Benjamin and Stempfel, Guillaume and Torrésani, Bruno and Wiesmeyr, Christoph and Lachambre, Hélène and Onchis, Darian
Feichtinger, Hans Georg and Onchis-Moaca, Darian and Ricaud, Benjamin and Torrésani, Bruno and Wiesmeyr, Christoph
Briolle, F and Man’ko, VI and Ricaud, B and Mendes, R Vilela
Ricaud, Benjamin and Shuman, David I and Vandergheynst, Pierre
Cornean, Horia D and Duclos, Pierre and Ricaud, Benjamin
Cornean, Horia D and Pedersen, Thomas G and Ricaud, Benjamin
Ricaud, Benjamin and Torrésani, Bruno
Perraudin, Nathanael and

In [10]:
#G = nx.convert_node_labels_to_integers(G, first_label=1, ordering='default', label_attribute='bib_id')

In [11]:
# Co-author list of B Ricaud
coauthorslist = ['Shuman','Vandergheynst','Clairet','Briolle','Heuraux','Bottereau','Cornean','Duclos',
                 'Guillemain','Kergomard','Silva','Vergez','Torr','Stempfel','Wiesmeyr','Lachambre','Onchis',
                'Feichtinger','Man\'ko','Mendes','Pedersen','Perraudin','Smith','Shahid','Rhodes','Starr','Ibanez',
                'Parra','Escudero','Benzi','Kalofolias','Rasmussen','Savoie','Leoncini']

In [12]:
coauthorslist

['Shuman',
 'Vandergheynst',
 'Clairet',
 'Briolle',
 'Heuraux',
 'Bottereau',
 'Cornean',
 'Duclos',
 'Guillemain',
 'Kergomard',
 'Silva',
 'Vergez',
 'Torr',
 'Stempfel',
 'Wiesmeyr',
 'Lachambre',
 'Onchis',
 'Feichtinger',
 "Man'ko",
 'Mendes',
 'Pedersen',
 'Perraudin',
 'Smith',
 'Shahid',
 'Rhodes',
 'Starr',
 'Ibanez',
 'Parra',
 'Escudero',
 'Benzi',
 'Kalofolias',
 'Rasmussen',
 'Savoie',
 'Leoncini']

In [13]:
# Convert the authors to a feature vector
import string
import numpy as np
coauthorindexdic ={}
for entries in bib_database.entries:
    rlist=[]
    entries_id = entries['ID']
    for coauthor in coauthorslist:
        r =string.find(entries['author'],coauthor)+1
        if r>0: r=1
        rlist.append(r)
    coauthorindexdic.update({entries_id:np.array(rlist)})
        

In [14]:
# Compare the feature vector between nodes and create edges
# The weight is the number of authors in common
for sourcenode in G.node:
    for targetnode in G.node:
        dist = coauthorindexdic[sourcenode]*coauthorindexdic[targetnode]
        #print dist
        weight = np.sum(dist)
        if weight:
            #G.edge[sourcenode][targetnode]['weight']=weight
            G.add_edge(sourcenode, targetnode, weight=weight )
        

In [15]:
# Optional : plot the graph
if 0:
    import matplotlib.pyplot as plt
    nx.draw_spring(G)
    plt.show()

In [16]:
# Create the central node Benjamin

In [17]:
G.add_node('Benjamin')
G.node['Benjamin']['note']='Person'
G.node['Benjamin']['ID']='Benjamin'
G.node['Benjamin']['title']='Benjamin'
for papernode in G.node:
    G.add_edge('Benjamin', papernode, weight=1)

### Save the graph

In [18]:
from networkx.readwrite import json_graph
data = json_graph.node_link_data(G,attrs={'source': 'source', 'target': 'target', 'key': 'key', 'id': 'id'})
s = json.dumps(data)
with open('GraphBRicaud.json', 'w') as f:
     json.dump(data, f,indent=4)

In [19]:
G.node['Benjamin']['ID']

'Benjamin'

In [20]:
G.node['cornean2007contemporary']

{'ENTRYTYPE': u'inproceedings',
 'ID': 'cornean2007contemporary',
 u'author': u'Cornean, Horia D and Pedersen, Thomas G and Ricaud, Benjamin',
 u'booktitle': u'Adventures in Mathematical Physics',
 u'note': u'CPT',
 u'organization': u'American Mathematical Soc.',
 u'pages': u'45',
 u'title': u'Perturbative vs. variational methods in the study of carbon nanotubes',
 u'volume': u'447',
 u'year': u'2007'}

In [21]:
s

'{"directed": false, "graph": {}, "nodes": [{"note": "CPT", "author": "Cornean, Horia D and Pedersen, Thomas G and Ricaud, Benjamin", "ENTRYTYPE": "inproceedings", "booktitle": "Adventures in Mathematical Physics", "title": "Perturbative vs. variational methods in the study of carbon nanotubes", "pages": "45", "volume": "447", "year": "2007", "id": "cornean2007contemporary", "organization": "American Mathematical Soc.", "ID": "cornean2007contemporary"}, {"publisher": "Springer", "note": "LATP", "author": "Ricaud, Benjamin and Stempfel, Guillaume and Torr\\u00e9sani, Bruno and Wiesmeyr, Christoph and Lachambre, H\\u00e9l\\u00e8ne and Onchis, Darian", "ENTRYTYPE": "article", "journal": "Advances in Computational Mathematics", "title": "An optimally concentrated Gabor transform for localized time-frequency components", "number": "3", "pages": "683--702", "volume": "40", "year": "2014", "id": "ricaud2014optimally", "ID": "ricaud2014optimally"}, {"publisher": "Ieee", "note": "LATP", "author