In [38]:
import os
import sys
import requests

import mercury
from dotenv import load_dotenv

import networkx as nx
from netgraph import Graph, InteractiveGraph
from ipysigma import Sigma
from pyvis.network import Network

import pandas as pd
from pandas import json_normalize

import matplotlib
import matplotlib.pyplot as plt

import itertools
from itertools import combinations

In [2]:
# Load app for json display
mercury.App(title="Display JSON", static_notebook=True)

# Load server environment
load_dotenv(os.path.dirname(sys.path[1]) + '/server/.env')

True

In [3]:
# Elastic search info
SCANR_API_URL = os.environ.get('SCANR_API_URL')
SCANR_API_TOKEN = os.environ.get('SCANR_API_TOKEN')
header = {'Authorization': SCANR_API_TOKEN}

In [4]:
# Query json
json_query = {
    "size": 5000,
    "_source": [
        "id",
        "authors",
        "domains",
        "title",
        "year",
        "isOa",
        "type",
        "affiliations",
        "keywords",
        "summary",
        "alternativeSummary"
    ],
    "query": {
        "bool": {
            "filter": [
                {"terms": {"authors.role.keyword": ["author", "directeurthese"]}},
                {"range": {"year": {"gte": "2018", "lte": "2023"}}},
                {"terms": {"affiliations.id.keyword": ["196012231"]}},
            ],
        }
    }
}

# Request answer
json_answer = requests.post(SCANR_API_URL, json=json_query, headers=header).json()

In [None]:
# Display json
mercury.JSON(json_answer)

In [10]:
# Get publications data
works = list(map(lambda x: x.get("_source"), json_answer.get("hits").get("hits")))
print(f"Number of publications : {len(works)}")

Number of publications : 3510


In [14]:
# Filter publications
max_affiliations = 20
works_filter = list(filter(lambda x: len(x.get("affiliations")) < max_affiliations, works))
print(f"Number of publications filtered : {len(works_filter)}/{len(works)}")

Number of publications filtered : 3405/3510


In [142]:
# Compute nodes (structures)

nodes_dict = {}
for work in works_filter:
        work_id = work.get("id")
        for affiliation in work.get("affiliations") or {}:
            affiliation_id = affiliation.get("id")
            country = affiliation.get("address")[0].get("country") if ("address" in affiliation) else None
            gps = affiliation.get("address")[0].get("gps") if ("address" in affiliation) else None
            if affiliation_id and gps and country == "France":
                if affiliation_id in nodes_dict:
                    nodes_dict[affiliation_id]["publications"].append(work_id)
                else:
                    nodes_dict[affiliation_id] = {"id": affiliation_id, 
                                             "name": affiliation.get("label").get("en") or affiliation.get("label").get("default"),
                                             "publications": [work_id],
                                             "x": gps.get("lon"),
                                             "y": gps.get("lat")}

nodes = list(nodes_dict.values())
print(f"Number of nodes (structures) found : {len(nodes)}")

Number of nodes (structures) found : 978


In [143]:
# Compute edges (publications)
edges = []
for source, target in combinations(nodes, 2):
    similar_publications = set(source.get("publications")) & set(target.get("publications"))
    if similar_publications:
        edges.append({"source":source.get("id"),
                      "target":target.get("id"),
                      "weight": len(similar_publications)})

In [144]:
# Create graph
G = nx.Graph()

# Add nodes
for node in nodes:
    G.add_node(node.get("id"), label=node.get("name"), weight=len(node.get("publications")))

# Add edges
for edge in edges:
    G.add_edge(edge.get("source"), edge.get("target"), weight=edge.get("weight"))

In [145]:
# Filter graph
max_order = 100
min_weight = 1

while G.order() > max_order:
    min_weight += 1
    G = G.subgraph([node for node, attrdict in G.nodes.items() if attrdict.get("weight") >= min_weight]) 
    # print(f"Minimum number of works auto computed : {min_weight} (order={G.order()})")

print(f"Graph filtered : {len(G.nodes) or 0} \nMinimum number of works required: {min_weight}")

Graph filtered : 96 
Minimum number of works required: 24


In [146]:
# Use sigma widget
Sigma(G, node_size=G.degree, 
      node_metrics={"community": "louvain"}, 
      node_color="community",
      node_border_color_from="node",
      layout=nodes_dict,
      default_edge_type="curve",
      hide_edges_on_move=True)

Sigma(nx.Graph with 96 nodes and 2,019 edges)

In [103]:
import folium

m = folium.Map()

In [104]:
m