In [1]:
import requests
import cv2
import sys
import warnings

import scholarly as slr
import networkx as nx
import matplotlib.pyplot as plt
import numpy as np

from PIL import Image
from io import BytesIO
from networkx.drawing.nx_pydot import write_dot

# warnings.filterwarnings('ignore')

%matplotlib inline

In [17]:
class ImageToFace:
    def __init__(self):
        # Model parameters
        dir_path = "/usr/local/Cellar/opencv/2.4.13.2/share/OpenCV/haarcascades"
        filename = "haarcascade_frontalface_default.xml" # for frontal faces
        #filename = "haarcascade_profileface.xml" # for profile faces
        model_path = dir_path + "/" + filename

        # Create the classifier
        self.clf = cv2.CascadeClassifier(model_path)
        
    def get_face(self, img):
        image = np.array(img.convert('RGB'))[:,:,::-1]
        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

        # Detect faces on image
        faces = self.clf.detectMultiScale(
            gray,
            scaleFactor=1.1,
            minNeighbors=5,
            minSize=(30, 30),
            flags=cv2.CASCADE_SCALE_IMAGE
        )
        
        if len(faces) > 0:
            x, y, w, h = faces[0]
        else:
            return img
        
        return Image.fromarray(cv2.cvtColor(image[y:y+h, x:x+w], cv2.COLOR_BGR2RGB))
    
imtf = ImageToFace()

get_face = imtf.get_face

In [18]:
class FakeInfo:
    def __init__(self, name):
        self.name = name
        self.id = hash(name)
        self.url_picture = '/citations/images/avatar_scholar_128.jpg'
        self.publications = []

In [None]:
class GoogleNetwork:
    def __init__(self):
        self.site_prefix = 'https://scholar.google.com'
        self.id_info = {}
        self.name_id = {}
    
    def get_info(self, author_name):
        if author_name in self.name_id:
            return self.id_info[self.name_id[author_name]]
        else:
            try:
                author_info = next(slr.search_author(author_name)).fill()
                self.id_info[author_info.id] = author_info
                self.name_id[author_name] = author_info.id
                self.name_id[author_info.name] = author_info.id
            except:
                author_info = FakeInfo(author_name)
                
            return author_info
        
    def add_node(self, G, author_name):
        author_info = self.get_info(author_name)
        
        if author_info.id in G.nodes():
            return
        
        G.add_node(author_info.id)
        
        response = requests.get(self.site_prefix + author_info.url_picture)
        img = Image.open(BytesIO(response.content))
        G.node[author_info.id]['image'] = get_face(img)
        
        G.node[author_info.id]['co-authors'] = set()
        for pub in author_info.publications:
            pub.fill()
            union = G.node[author_info.id]['co-authors'].union(pub.bib['author'].split(' and '))
            G.node[author_info.id]['co-authors'] = union
            
        for coauthor_name in G.node[author_info.id]['co-authors']:
            coauthor_info = self.get_info(coauthor_name)
            if coauthor_info.id in G.nodes():
                G.add_edge(author_info.id, coauthor_info.id)

In [None]:
G = nx.Graph()

In [None]:
gn = GoogleNetwork()

In [None]:
gn.add_node(G, 'Arseny Alexeev')

In [None]:
pos = nx.spring_layout(G)
fig = plt.figure(figsize=(7,7))
ax = plt.subplot(111)
ax.set_aspect('equal')
nx.draw_networkx_edges(G, pos, ax=ax)

plt.axis('off')

trans = ax.transData.transform
trans2 = fig.transFigure.inverted().transform

piesize = 0.1 # this is the image size
p2 = piesize / 2.0
for n in G:
    xx, yy = trans(pos[n]) # figure coordinates
    xa, ya = trans2((xx, yy)) # axes coordinates
    a = plt.axes([xa - p2, ya - p2, piesize, piesize])
    a.set_aspect('equal')
    a.imshow(G.node[n]['image'])
    a.axis('off')