In [1]:
import os
import requests
import pandas as pd
from dotenv import load_dotenv

In [4]:
load_dotenv("../client/.env.local")

True

In [20]:
scanr_url = "https://cluster-production.elasticsearch.dataesr.ovh/scanr-publications/_search"
scanr_key = f'Basic {os.getenv("VITE_API_KEY")}'
scanr_headers = {"Authorization": scanr_key}


def scanr_ipcc(agg: str):

    body = {
        "size": 0,
        "query": {
            "bool": {
                "must": [{"term": {"predict_teds.label.keyword": "ipcc"}}],
            },
        },
        "aggs": {
            f"{agg}": {
                "terms": {"field": f"{agg}.keyword", "size": 2000},
            },
        },
    }

    res = requests.post(scanr_url, json=body, headers=scanr_headers)
    if res.status_code == 200:
        return res.json()
    else:
        print(f"error {res.status_code}: {res.reason}")
        print(res.text)
        return None

In [21]:
res = scanr_ipcc("co_authors")
co_authors = res.get("aggregations").get("co_authors").get("buckets")

In [24]:
links = []
for link in co_authors:
    source = link["key"].split("---")[0].split("###")[1]
    target = link["key"].split("---")[1].split("###")[1]
    links.append({"source_id": source, "target_id": target, "strength": link["doc_count"]})


links

[{'source_id': 'Matthieu Lengaigne',
  'target_id': 'Jérôme Vialard',
  'strength': 65},
 {'source_id': 'Philippe Ciais',
  'target_id': 'Frédéric Chevallier',
  'strength': 62},
 {'source_id': 'Henri Weimerskirch',
  'target_id': 'Christophe Barbraud',
  'strength': 54},
 {'source_id': 'Yann H. Kerr',
  'target_id': 'Jean-Pierre Wigneron',
  'strength': 54},
 {'source_id': 'Daniel Schertzer',
  'target_id': 'Auguste Gires',
  'strength': 46},
 {'source_id': 'Philippe Ciais', 'target_id': 'Nicolas Viovy', 'strength': 46},
 {'source_id': 'Jean-Claude Dauvin',
  'target_id': 'Jean-Philippe Pezy',
  'strength': 45},
 {'source_id': 'Yann H. Kerr', 'target_id': 'Arnaud Mialon', 'strength': 45},
 {'source_id': 'Cathy Clerbaux',
  'target_id': 'Martin Van Damme',
  'strength': 40},
 {'source_id': 'Jean-Luc Dupuy',
  'target_id': 'François Pimont',
  'strength': 38},
 {'source_id': 'Philippe Ciais', 'target_id': 'Ana Bastos', 'strength': 36},
 {'source_id': 'Serge Hercberg',
  'target_id': 'Em

In [26]:
df = pd.DataFrame(links)
df.to_json("teds_ipcc_authors.json", orient="records")