In [2]:
import requests

In [3]:
url = "https://cluster.elasticsearch.dataesr.ovh/bsso-publications/_search"
headers = { "Authorization": open("../../.env_es_bsso", "r").read().strip() }
AGG_SIZE = 15
LATEST_OBSERVATION_DATE = "2021Q1"

### Sommaire

* [1. Publications](#publications)
    * [1.1. Général](#publi_general)
        * [1.1.1. Quelle est la dynamique d’ouverture de la santé en France ?](#dynamique_ouverture)
        * [1.1.2. Quelles sont les voies d’ouverture choisies pour les publications en santé ?](#voie_ouverture)
        * [1.1.3. Quelles sont les genres les plus ouverts en santé ?](#genre)
        * [1.1.4. Quelles sont les langues des productions ouvertes en santé ?](#langue)
        * [1.1.5. Quel impact le financement a-t-il sur l’ouverture des publications en santé ?](#financement)
        * [1.1.6. Quelle transparence dans la déclaration des conflits d'intérêts en santé ?](#coi)
    * [1.2. Les disciplines](#publi_disciplines)
        * [1.2.1. Quelle est la dynamique d’ouverture de la santé en fonction de ses disciplines ?](#dynamique_ouverture_disciplines)
        * [1.2.2. Quelles sont les voies d’ouverture choisies par les publications en fonction des disciplines ?](#voie_ouverture_disciplines)
    * [1.3. Les éditeurs/plateformes](#publi_editeurs)
        * [1.3.1. Quelle est la dynamique d’ouverture de la santé chez les éditeurs/plateformes ?](#dynamique_ouverture_editeurs)
        * [1.3.2. Quel type d’ouverture est majoritaire chez les éditeurs/plateformes en santé ?](#voie_ouverture_editeurs)
        * [1.3.3. Quelles sont les politiques d’ouverture des éditeurs/plateformes en santé ?](#politique_ouverture_editeurs)
        * [1.3.4. Quel est le poids des revues prédatrices dans la dynamique de science ouverte en santé ?](#predateur)
        * [1.3.5. Quelle est la répartition des licences utilisées chez les éditeurs/plateformes en santé ?](#licence)
        * [1.3.6. Quels sont les coûts des publications chez les éditeurs/plateformes en santé ?](#apc)
    * [1.4. Les archives](#publications_archives)
        * [1.4.1. Quelle est la dynamique d’ouverture de la santé parmi les archives ?](#publications_archives_ouverture)
        * [1.4.2. Quelles archives ouvertes sont les plus utilisées en santé ?](#publications_archives_utilisation)
        * [1.4.3. Quelle est la dynamique de dépôt par archive ouverte en santé ?](#publications_archives_dynamique)
        * [1.4.4. Quelle place occupe HAL dans la dynamique des archives ouvertes en santé ?](#publications_archives_hal)
    * [1.5. Les affiliations](#publications_affiliations)
        * [1.5.1. Taux d’ouverture des publications françaises, dans le domaine de la santé, par millésime tous types d’établissements confondus](#publications_affiliations_ouverture)
        * [1.5.2. Evolution du taux d’ouverture des publications en santé par types d’établissement entre millésimes](#publications_affiliations_evolution)
        * [1.5.3. Quel impact le pays d’affiliation des auteurs a-t-il sur le taux d’ouverture en santé ?](#publications_affiliations_impact)
        * [1.5.4. Classement des 10 pays d'affiliation des auteurs de rang utile avec lesquels la France collabore le plus, selon le taux d’accès ouvert de leurs publications en santé](#publications_affiliations_classement)

# 1. Publications <a class="anchor" id="publications"></a>

## 1.1. Général <a class="anchor" id="publi_general"></a>

### 1.1.1. Quelle est la dynamique d’ouverture de la santé en France ? (barchart + linechart)<a class="anchor" id="dynamique_ouverture"></a>

In [3]:
json = {
  "size": 0,
  "aggs": {
    "by_observation_year": {
      "terms": {
        "field": "oa_details.observation_date.keyword"
      },
      "aggs": {
        "by_publication_year": {
          "terms": {
            "field": "publication_year"
          },
          "aggs": {
            "by_is_oa": {
              "terms": {
                "field": "oa_details.is_oa"
              }
            }
          }
        }
      }
    }
  }
}

requests.post(url, json=json, headers=headers).json()['aggregations']['by_observation_year']['buckets']

[{'key': '2021Q1',
  'doc_count': 343182,
  'by_publication_year': {'doc_count_error_upper_bound': 0,
   'sum_other_doc_count': 0,
   'buckets': [{'key': 2020.0,
     'doc_count': 54356,
     'by_is_oa': {'doc_count_error_upper_bound': 0,
      'sum_other_doc_count': 0,
      'buckets': [{'key': 1, 'key_as_string': 'true', 'doc_count': 32742},
       {'key': 0, 'key_as_string': 'false', 'doc_count': 25466}]}},
    {'key': 2019.0,
     'doc_count': 45324,
     'by_is_oa': {'doc_count_error_upper_bound': 0,
      'sum_other_doc_count': 0,
      'buckets': [{'key': 1, 'key_as_string': 'true', 'doc_count': 29877},
       {'key': 0, 'key_as_string': 'false', 'doc_count': 24521}]}},
    {'key': 2017.0,
     'doc_count': 44348,
     'by_is_oa': {'doc_count_error_upper_bound': 0,
      'sum_other_doc_count': 0,
      'buckets': [{'key': 1, 'key_as_string': 'true', 'doc_count': 27875},
       {'key': 0, 'key_as_string': 'false', 'doc_count': 24938}]}},
    {'key': 2018.0,
     'doc_count': 4430

### 1.1.2. Quelles sont les voies d’ouverture choisies pour les publications en santé ? (stacked barchart + stacked area graph + treemap) <a class="anchor" id="voie_ouverture"></a>

In [4]:
json = {
  "size": 0,
  "query": {
    "bool": {
      "filter": [{
        "term": {
          "oa_details.observation_date.keyword": LATEST_OBSERVATION_DATE
        }
      }]
    }
  },
  "aggs": {
    "by_publication_year": {
      "terms": {
        "field": "publication_year"
      },
      "aggs": {
        "by_oa_host_type": {
          "terms": {
            "field": "oa_details.oa_host_type.keyword"
          }
        }
      }
    }
  }
}

requests.post(url, json=json, headers=headers).json()['aggregations']['by_publication_year']['buckets']

[{'key': 2020.0,
  'doc_count': 54356,
  'by_oa_host_type': {'doc_count_error_upper_bound': 0,
   'sum_other_doc_count': 0,
   'buckets': [{'key': 'closed', 'doc_count': 25466},
    {'key': 'publisher;repository', 'doc_count': 22683},
    {'key': 'publisher', 'doc_count': 7127},
    {'key': 'repository', 'doc_count': 6161}]}},
 {'key': 2019.0,
  'doc_count': 45324,
  'by_oa_host_type': {'doc_count_error_upper_bound': 0,
   'sum_other_doc_count': 0,
   'buckets': [{'key': 'closed', 'doc_count': 24521},
    {'key': 'publisher;repository', 'doc_count': 19281},
    {'key': 'publisher', 'doc_count': 8259},
    {'key': 'repository', 'doc_count': 8059}]}},
 {'key': 2017.0,
  'doc_count': 44348,
  'by_oa_host_type': {'doc_count_error_upper_bound': 0,
   'sum_other_doc_count': 0,
   'buckets': [{'key': 'closed', 'doc_count': 24938},
    {'key': 'publisher;repository', 'doc_count': 17032},
    {'key': 'repository', 'doc_count': 8882},
    {'key': 'publisher', 'doc_count': 6170}]}},
 {'key': 2018

### 1.1.3. Quelles sont les genres les plus ouverts en santé ? <a class="anchor" id="genre"></a>

In [5]:
json = {
  "size": 0,
  "query": {
    "bool": {
      "filter": [{
        "term": {
          "oa_details.observation_date.keyword": LATEST_OBSERVATION_DATE
        }
      }]
    }
  },
  "aggs": {
    "by_is_oa": {
      "terms": {
        "field": "oa_details.is_oa"
      },
      "aggs": {
        "by_publication_genre": {
          "terms": {
            "field": "genre.keyword"
          }
        }
      }
    }
  }
}

requests.post(url, json=json, headers=headers).json()['aggregations']['by_is_oa']['buckets']

[{'key': 1,
  'key_as_string': 'true',
  'doc_count': 207935,
  'by_publication_genre': {'doc_count_error_upper_bound': 0,
   'sum_other_doc_count': 0,
   'buckets': [{'key': 'journal-article', 'doc_count': 207138},
    {'key': 'book-chapter', 'doc_count': 716},
    {'key': 'proceedings-article', 'doc_count': 43},
    {'key': 'posted-content', 'doc_count': 22},
    {'key': 'journal-issue', 'doc_count': 8},
    {'key': 'book', 'doc_count': 5},
    {'key': 'reference-entry', 'doc_count': 2},
    {'key': 'report', 'doc_count': 1}]}},
 {'key': 0,
  'key_as_string': 'false',
  'doc_count': 184371,
  'by_publication_genre': {'doc_count_error_upper_bound': 0,
   'sum_other_doc_count': 0,
   'buckets': [{'key': 'journal-article', 'doc_count': 181530},
    {'key': 'book-chapter', 'doc_count': 2783},
    {'key': 'proceedings-article', 'doc_count': 36},
    {'key': 'reference-entry', 'doc_count': 10},
    {'key': 'journal-issue', 'doc_count': 6},
    {'key': 'other', 'doc_count': 4},
    {'key': 

### 1.1.4. Quelles sont les langues des productions ouvertes en santé ? <a class="anchor" id="langue"></a>

In [6]:
json = {
  "size": 0,
  "query": {
    "bool": {
      "filter": [{
        "term": {
          "oa_details.observation_date.keyword": LATEST_OBSERVATION_DATE
        }
      }]
    }
  },
  "aggs": {
    "by_is_oa": {
      "terms": {
        "field": "oa_details.is_oa"
      },
      "aggs": {
        "by_publication_genre": {
          "terms": {
            "field": "language.keyword"
          }
        }
      }
    }
  }
}

requests.post(url, json=json, headers=headers).json()['aggregations']['by_is_oa']['buckets']

[{'key': 1,
  'key_as_string': 'true',
  'doc_count': 207935,
  'by_publication_genre': {'doc_count_error_upper_bound': 0,
   'sum_other_doc_count': 0,
   'buckets': [{'key': 'en', 'doc_count': 204696},
    {'key': 'fr', 'doc_count': 3104},
    {'key': 'sp', 'doc_count': 53},
    {'key': 'po', 'doc_count': 46},
    {'key': 'ge', 'doc_count': 22},
    {'key': 'tu', 'doc_count': 6},
    {'key': 'ru', 'doc_count': 4},
    {'key': 'gr', 'doc_count': 2},
    {'key': 'hu', 'doc_count': 1},
    {'key': 'ko', 'doc_count': 1}]}},
 {'key': 0,
  'key_as_string': 'false',
  'doc_count': 184371,
  'by_publication_genre': {'doc_count_error_upper_bound': 0,
   'sum_other_doc_count': 0,
   'buckets': [{'key': 'en', 'doc_count': 164565},
    {'key': 'fr', 'doc_count': 19682},
    {'key': 'ge', 'doc_count': 65},
    {'key': 'sp', 'doc_count': 42},
    {'key': 'ru', 'doc_count': 9},
    {'key': 'tu', 'doc_count': 4},
    {'key': 'po', 'doc_count': 2},
    {'key': 'gr', 'doc_count': 1},
    {'key': 'hu', 

### 1.1.5. Quel impact le financement a-t-il sur l’ouverture des publications en santé ? <a class="anchor" id="financement"></a>

#### 1.1.5.1. Taux d'ouverture des publications en santé par déclaration de financement par projet

In [4]:
json = {
  "size": 0,
  "query": {
    "bool": {
      "filter": [{
        "term": {
          "oa_details.observation_date.keyword": LATEST_OBSERVATION_DATE
        }
      }]
    }
  },
  "aggs": {
    "by_publication_year": {
      "terms": {
        "field": "publication_year"
      },
      "aggs": {
        "by_is_oa": {
          "terms": {
            "field": "oa_details.is_oa"
          }
        }
      },
      "aggs": {
        "by_has_grant": {
          "terms": {
            "field": "has_grant"
          },
          "aggs": {
            "by_is_oa": {
              "terms": {
                "field": "oa_details.is_oa"
              }
            }
          }
        }
      }
    }
  }
}

requests.post(url, json=json, headers=headers).json()['aggregations']['by_publication_year']['buckets']

[{'key': 2020.0,
  'doc_count': 54356,
  'by_has_grant': {'doc_count_error_upper_bound': 0,
   'sum_other_doc_count': 0,
   'buckets': [{'key': 0,
     'key_as_string': 'false',
     'doc_count': 45239,
     'by_is_oa': {'doc_count_error_upper_bound': 0,
      'sum_other_doc_count': 0,
      'buckets': [{'key': 1, 'key_as_string': 'true', 'doc_count': 25895},
       {'key': 0, 'key_as_string': 'false', 'doc_count': 22462}]}},
    {'key': 1,
     'key_as_string': 'true',
     'doc_count': 9117,
     'by_is_oa': {'doc_count_error_upper_bound': 0,
      'sum_other_doc_count': 0,
      'buckets': [{'key': 1, 'key_as_string': 'true', 'doc_count': 6847},
       {'key': 0, 'key_as_string': 'false', 'doc_count': 3004}]}}]}},
 {'key': 2019.0,
  'doc_count': 45324,
  'by_has_grant': {'doc_count_error_upper_bound': 0,
   'sum_other_doc_count': 0,
   'buckets': [{'key': 0,
     'key_as_string': 'false',
     'doc_count': 37799,
     'by_is_oa': {'doc_count_error_upper_bound': 0,
      'sum_other_d

#### 1.1.5.2. Répartition par déclaration de financement par projet et par type d’hébergement en santé

In [6]:
json = {
  "size": 0,
  "query": {
    "bool": {
      "filter": [{
        "term": {
          "oa_details.observation_date.keyword": LATEST_OBSERVATION_DATE
        }
      }]
    }
  },
  "aggs": {
    "by_is_oa": {
      "terms": {
        "field": "oa_details.is_oa"
      },
      "aggs": {
        "by_oa_host_type": {
          "terms": {
            "field": "oa_details.oa_host_type.keyword"
          },
          "aggs": {
            "by_grant_agency": {
              "terms": {
                "field": "grants.agency.keyword"
              }
            }
          }
        }
      }
    }
  }
}

requests.post(url, json=json, headers=headers).json()['aggregations']['by_is_oa']['buckets']

[{'key': 1,
  'key_as_string': 'true',
  'doc_count': 207935,
  'by_oa_host_type': {'doc_count_error_upper_bound': 0,
   'sum_other_doc_count': 0,
   'buckets': [{'key': 'publisher;repository',
     'doc_count': 129667,
     'by_grant_agency': {'doc_count_error_upper_bound': 0,
      'sum_other_doc_count': 29462,
      'buckets': [{'key': 'Medical Research Council', 'doc_count': 4143},
       {'key': 'Wellcome Trust', 'doc_count': 2702},
       {'key': 'NCI NIH HHS', 'doc_count': 2337},
       {'key': 'NIGMS NIH HHS', 'doc_count': 2056},
       {'key': 'NIAID NIH HHS', 'doc_count': 1798},
       {'key': 'European Research Council', 'doc_count': 1475},
       {'key': 'NHLBI NIH HHS', 'doc_count': 1431},
       {'key': 'NIDDK NIH HHS', 'doc_count': 1134},
       {'key': 'Biotechnology and Biological Sciences Research Council',
        'doc_count': 1133},
       {'key': 'NINDS NIH HHS', 'doc_count': 1041}]}},
    {'key': 'repository',
     'doc_count': 57763,
     'by_grant_agency': {'doc

### 1.1.6. Quelle transparence dans la déclaration des conflits d'intérêts en santé ? <a class="anchor" id="coi"></a>

In [9]:
json = {
  "query": {
    "bool": {
      "filter": [{
        "term": {
          "oa_details.observation_date.keyword": LATEST_OBSERVATION_DATE
        }
      }]
    }
  },
  "size": 0,
  "aggs": {
    "by_publication_year": {
      "terms": {
        "field": "publication_year"
      },
      "aggs": {
        "by_has_coi": {
          "terms": {
            "field": "has_coi"
          }
        }
      }
    }
  }
}

requests.post(url, json=json, headers=headers).json()['aggregations']['by_publication_year']['buckets']

[{'key': 2020.0,
  'doc_count': 54356,
  'by_has_coi': {'doc_count_error_upper_bound': 0,
   'sum_other_doc_count': 0,
   'buckets': [{'key': 0, 'key_as_string': 'false', 'doc_count': 52871},
    {'key': 1, 'key_as_string': 'true', 'doc_count': 1485}]}},
 {'key': 2019.0,
  'doc_count': 45324,
  'by_has_coi': {'doc_count_error_upper_bound': 0,
   'sum_other_doc_count': 0,
   'buckets': [{'key': 0, 'key_as_string': 'false', 'doc_count': 44683},
    {'key': 1, 'key_as_string': 'true', 'doc_count': 641}]}},
 {'key': 2017.0,
  'doc_count': 44348,
  'by_has_coi': {'doc_count_error_upper_bound': 0,
   'sum_other_doc_count': 0,
   'buckets': [{'key': 0, 'key_as_string': 'false', 'doc_count': 43995},
    {'key': 1, 'key_as_string': 'true', 'doc_count': 353}]}},
 {'key': 2018.0,
  'doc_count': 44307,
  'by_has_coi': {'doc_count_error_upper_bound': 0,
   'sum_other_doc_count': 0,
   'buckets': [{'key': 0, 'key_as_string': 'false', 'doc_count': 43822},
    {'key': 1, 'key_as_string': 'true', 'doc_

## 1.2. Les disciplines <a class="anchor" id="publi_disciplines"></a>

### 1.2.1. Quelle est la dynamique d’ouverture de la santé en fonction de ses disciplines ?  <a class="anchor" id="dynamique_ouverture_disciplines"></a>

In [10]:
json = {
  "size": 0,
  "aggs": {
    "by_discipline": {
      "terms": {
        "field": "bsso_fields.keyword"
      },
      "aggs": {
        "by_observation_year": {
          "terms": {
            "field": "oa_details.observation_date.keyword"
          },
          "aggs": {
            "by_publication_year": {
              "terms": {
                "field": "publication_year"
              },
              "aggs": {
                "by_is_oa": {
                  "terms": {
                    "field": "oa_details.is_oa"
                  }
                }
              }
            }
          }
        }
      }
    }
  }
}

requests.post(url, json=json, headers=headers).json()['aggregations']['by_discipline']['buckets']

[{'key': 'Clinical Sciences',
  'doc_count': 105101,
  'by_observation_year': {'doc_count_error_upper_bound': 0,
   'sum_other_doc_count': 0,
   'buckets': [{'key': '2021Q1',
     'doc_count': 105002,
     'by_publication_year': {'doc_count_error_upper_bound': 0,
      'sum_other_doc_count': 0,
      'buckets': [{'key': 2020.0,
        'doc_count': 17577,
        'by_is_oa': {'doc_count_error_upper_bound': 0,
         'sum_other_doc_count': 0,
         'buckets': [{'key': 0, 'key_as_string': 'false', 'doc_count': 9899},
          {'key': 1, 'key_as_string': 'true', 'doc_count': 8669}]}},
       {'key': 2019.0,
        'doc_count': 13710,
        'by_is_oa': {'doc_count_error_upper_bound': 0,
         'sum_other_doc_count': 0,
         'buckets': [{'key': 0, 'key_as_string': 'false', 'doc_count': 9101},
          {'key': 1, 'key_as_string': 'true', 'doc_count': 7406}]}},
       {'key': 2018.0,
        'doc_count': 13324,
        'by_is_oa': {'doc_count_error_upper_bound': 0,
         's

### 1.2.2. Quelles sont les voies d’ouverture choisies par les publications en fonction des disciplines ?  <a class="anchor" id="voie_ouverture_disciplines"></a>

In [11]:
json = {
  "size": 0,
  "aggs": {
    "by_observation_year": {
      "terms": {
        "field": "oa_details.observation_date.keyword"
      },
      "aggs": {
        "by_publication_year": {
          "terms": {
            "field": "publication_year"
          },
          "aggs": {
            "by_oa_host_type": {
              "terms": {
                "field": "oa_details.oa_host_type.keyword"
              },
              "aggs": {
                "by_discipline": {
                  "terms": {
                    "field": "bsso_fields.keyword"
                  }
                }
              }
            }
          }
        }
      }
    }
  }
}

requests.post(url, json=json, headers=headers).json()['aggregations']['by_observation_year']['buckets']

[{'key': '2021Q1',
  'doc_count': 343182,
  'by_publication_year': {'doc_count_error_upper_bound': 0,
   'sum_other_doc_count': 0,
   'buckets': [{'key': 2020.0,
     'doc_count': 54356,
     'by_oa_host_type': {'doc_count_error_upper_bound': 0,
      'sum_other_doc_count': 0,
      'buckets': [{'key': 'closed',
        'doc_count': 25466,
        'by_discipline': {'doc_count_error_upper_bound': 0,
         'sum_other_doc_count': 3696,
         'buckets': [{'key': 'Clinical Sciences', 'doc_count': 9899},
          {'key': 'Multidisciplinary', 'doc_count': 3400},
          {'key': 'Biochemistry and Cell Biology', 'doc_count': 1874},
          {'key': 'Cardiorespiratory Medicine and Haematology',
           'doc_count': 1446},
          {'key': 'Medicinal and Biomolecular Chemistry', 'doc_count': 1361},
          {'key': 'Oncology and Carcinogenesis', 'doc_count': 1032},
          {'key': 'Microbiology', 'doc_count': 923},
          {'key': 'Pharmacology and Pharmaceutical Sciences',
   

## 1.3. Les éditeurs/plateformes <a class="anchor" id="publi_editeurs"></a>

### 1.3.1. Quelle est la dynamique d’ouverture de la santé chez les éditeurs/plateformes ?  <a class="anchor" id="dynamique_ouverture_editeurs"></a>

In [12]:
filter = "*" # *, Elsevier BV, Springer Science and Business Media LLC, Wiley...
json = {
  "size": 0,
  "query": {
    "bool": {
      "filter": [
        { "term": { "oa_details.oa_host_type": "publisher" }}, # ATTENTION pas de .keyword ici pour avoir publisher et publisher;repository
        { "wildcard": { "publisher.keyword": filter }}
      ]
    }
  },
  "aggs": {
    "by_observation_year": {
      "terms": {
        "field": "oa_details.observation_date.keyword"
      },
      "aggs": {
        "by_publication_year": {
          "terms": {
            "field": "publication_year"
          },
          "aggs": {
            "by_is_oa": {
              "terms": {
                "field": "oa_details.is_oa"
              }
            }
          }
        }
      }
    }
  }
}

requests.post(url, json=json, headers=headers).json()['aggregations']['by_observation_year']['buckets']

[{'key': '2021Q1',
  'doc_count': 163520,
  'by_publication_year': {'doc_count_error_upper_bound': 0,
   'sum_other_doc_count': 0,
   'buckets': [{'key': 2020.0,
     'doc_count': 27240,
     'by_is_oa': {'doc_count_error_upper_bound': 0,
      'sum_other_doc_count': 0,
      'buckets': [{'key': 1, 'key_as_string': 'true', 'doc_count': 27240},
       {'key': 0, 'key_as_string': 'false', 'doc_count': 1584}]}},
    {'key': 2019.0,
     'doc_count': 23408,
     'by_is_oa': {'doc_count_error_upper_bound': 0,
      'sum_other_doc_count': 0,
      'buckets': [{'key': 1, 'key_as_string': 'true', 'doc_count': 23408},
       {'key': 0, 'key_as_string': 'false', 'doc_count': 4776}]}},
    {'key': 2018.0,
     'doc_count': 22720,
     'by_is_oa': {'doc_count_error_upper_bound': 0,
      'sum_other_doc_count': 0,
      'buckets': [{'key': 1, 'key_as_string': 'true', 'doc_count': 22720},
       {'key': 0, 'key_as_string': 'false', 'doc_count': 6682}]}},
    {'key': 2017.0,
     'doc_count': 21582,


### 1.3.2. Quel type d’ouverture est majoritaire chez les éditeurs/plateformes en santé ? <a class="anchor" id="voie_ouverture_editeurs"></a>


In [13]:
filter = "*" # *, Elsevier BV, Springer Science and Business Media LLC, Wiley...
json = {
  "size": 0,
  "query": {
    "bool": {
      "filter": [
        { "term": { "oa_details.oa_host_type": "publisher" }}, # ATTENTION pas de .keyword ici pour avoir publisher et publisher;repository
        { "wildcard": { "publisher.keyword": filter }}
      ]
    }
  },
  "aggs": {
    "by_observation_year": {
      "terms": {
        "field": "oa_details.observation_date.keyword"
      },
      "aggs": {
        "by_publication_year": {
          "terms": {
            "field": "publication_year"
          },
          "aggs": {
            "by_oa_colors": {
              "terms": {
                "field": "oa_details.oa_colors.keyword"
              }
            }
          }
        }
      }
    }
  }
}

requests.post(url, json=json, headers=headers).json()['aggregations']['by_observation_year']['buckets']

[{'key': '2021Q1',
  'doc_count': 163520,
  'by_publication_year': {'doc_count_error_upper_bound': 0,
   'sum_other_doc_count': 0,
   'buckets': [{'key': 2020.0,
     'doc_count': 27240,
     'by_oa_colors': {'doc_count_error_upper_bound': 0,
      'sum_other_doc_count': 0,
      'buckets': [{'key': 'green', 'doc_count': 22758},
       {'key': 'gold', 'doc_count': 17179},
       {'key': 'hybrid', 'doc_count': 5657},
       {'key': 'bronze', 'doc_count': 4752},
       {'key': 'closed', 'doc_count': 1584}]}},
    {'key': 2019.0,
     'doc_count': 23408,
     'by_oa_colors': {'doc_count_error_upper_bound': 0,
      'sum_other_doc_count': 0,
      'buckets': [{'key': 'green', 'doc_count': 19491},
       {'key': 'gold', 'doc_count': 13565},
       {'key': 'bronze', 'doc_count': 6165},
       {'key': 'closed', 'doc_count': 4776},
       {'key': 'hybrid', 'doc_count': 4491}]}},
    {'key': 2018.0,
     'doc_count': 22720,
     'by_oa_colors': {'doc_count_error_upper_bound': 0,
      'sum_othe

### 1.3.3. Quelles sont les politiques d’ouverture des éditeurs/plateformes en santé ? <a class="anchor" id="politique_ouverture_editeurs"></a>

In [8]:
publication_year = 2020 # 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020
json = {
  "size": 0,
  "query": {
    "bool": {
      "filter": [
       { "term": { "publication_year": publication_year }}
      ]
    }
  },
  "aggs": {
    "by_publisher": {
      "terms": {
        "field": "publisher.keyword"
      },
      "aggs": {
        "by_oa_color": {
          "terms": {
            "field": "oa_details.oa_colors.keyword"
          }
        }
      }
    }
  }
}

requests.post(url, json=json, headers=headers).json()['aggregations']['by_publisher']['buckets']

[{'key': 'Elsevier BV',
  'doc_count': 15197,
  'by_oa_color': {'doc_count_error_upper_bound': 0,
   'sum_other_doc_count': 0,
   'buckets': [{'key': 'closed', 'doc_count': 10458},
    {'key': 'green', 'doc_count': 4350},
    {'key': 'gold', 'doc_count': 1456},
    {'key': 'hybrid', 'doc_count': 1451},
    {'key': 'bronze', 'doc_count': 1422}]}},
 {'key': 'Springer Science and Business Media LLC',
  'doc_count': 8741,
  'by_oa_color': {'doc_count_error_upper_bound': 0,
   'sum_other_doc_count': 0,
   'buckets': [{'key': 'green', 'doc_count': 5819},
    {'key': 'gold', 'doc_count': 4145},
    {'key': 'closed', 'doc_count': 2859},
    {'key': 'hybrid', 'doc_count': 813},
    {'key': 'bronze', 'doc_count': 708}]}},
 {'key': 'Wiley',
  'doc_count': 5738,
  'by_oa_color': {'doc_count_error_upper_bound': 0,
   'sum_other_doc_count': 0,
   'buckets': [{'key': 'closed', 'doc_count': 3458},
    {'key': 'green', 'doc_count': 2218},
    {'key': 'hybrid', 'doc_count': 964},
    {'key': 'gold', 'do

### 1.3.4. Quel est le poids des revues prédatrices dans la dynamique de science ouverte en santé ? <a class="anchor" id="predateur"></a>

In [15]:
json = {
  "size": 0,
  "aggs": {
    "by_publication_year": {
      "terms": {
        "field": "publication_year"
      },
      "aggs": {
        "by_predatory": {
          "terms": {
            "field": "predatory_journal"
          }
        }
      }
    }
  }
}

requests.post(url, json=json, headers=headers).json()['aggregations']['by_publication_year']['buckets']

[{'key': 2020.0,
  'doc_count': 54356,
  'by_predatory': {'doc_count_error_upper_bound': 0,
   'sum_other_doc_count': 0,
   'buckets': [{'key': 0, 'key_as_string': 'false', 'doc_count': 54184},
    {'key': 1, 'key_as_string': 'true', 'doc_count': 172}]}},
 {'key': 2019.0,
  'doc_count': 45324,
  'by_predatory': {'doc_count_error_upper_bound': 0,
   'sum_other_doc_count': 0,
   'buckets': [{'key': 0, 'key_as_string': 'false', 'doc_count': 45144},
    {'key': 1, 'key_as_string': 'true', 'doc_count': 180}]}},
 {'key': 2017.0,
  'doc_count': 44348,
  'by_predatory': {'doc_count_error_upper_bound': 0,
   'sum_other_doc_count': 0,
   'buckets': [{'key': 0, 'key_as_string': 'false', 'doc_count': 43952},
    {'key': 1, 'key_as_string': 'true', 'doc_count': 396}]}},
 {'key': 2018.0,
  'doc_count': 44307,
  'by_predatory': {'doc_count_error_upper_bound': 0,
   'sum_other_doc_count': 0,
   'buckets': [{'key': 0, 'key_as_string': 'false', 'doc_count': 43956},
    {'key': 1, 'key_as_string': 'true'

### 1.3.5. Quelle est la répartition des licences utilisées chez les éditeurs/plateformes en santé ? <a class="anchor" id="licence"></a>


#### 1.3.5.1. Répartition des publications ouvertes en santé par licences utilisées chez les éditeurs/plateformes

In [19]:
filter = "*" # *, Elsevier BV, Springer Science and Business Media LLC, Wiley...
json = {
  "size": 0,
  "query": {
    "bool": {
      "filter": [
        { "term": { "oa_details.oa_host_type": "publisher" }}, # ATTENTION pas de .keyword ici pour avoir publisher et publisher;repository
        { "term": { "publication_year": 2020 }},
        { "term": { "oa_details.observation_date.keyword": LATEST_OBSERVATION_DATE }},
        { "wildcard": { "publisher.keyword": filter }}
      ]
    }
  },
  "aggs": {
    "by_licence": {
      "terms": {
        "field": "oa_details.licence_publisher.keyword",
        "missing": "N/A"
      }
    }
  }
}

requests.post(url, json=json, headers=headers).json()['aggregations']['by_licence']['buckets']

[{'key': 'cc-by', 'doc_count': 16297},
 {'key': 'no license', 'doc_count': 5133},
 {'key': 'cc-by-nc-nd', 'doc_count': 4207},
 {'key': 'cc-by-nc', 'doc_count': 2280},
 {'key': 'publisher-specific', 'doc_count': 187},
 {'key': 'implied-oa', 'doc_count': 109},
 {'key': 'cc-by-nc-sa', 'doc_count': 77},
 {'key': 'cc-by-nd', 'doc_count': 9},
 {'key': 'cc-by-sa', 'doc_count': 3}]

#### 1.3.5.2. Classement des 10 éditeurs/plateformes les plus importants (en nombre de publications en santé) selon le type de licences utilisées

In [20]:
json = {
  "size": 0,
  "query": {
    "bool": {
      "filter": [
        { "term": { "oa_details.oa_host_type": "publisher" }}, # ATTENTION pas de .keyword ici pour avoir publisher et publisher;repository
        { "term": { "publication_year": 2020 }},
        { "term": { "oa_details.observation_date.keyword": LATEST_OBSERVATION_DATE }}
      ]
    }
  },
  "aggs": {
    "by_publisher": {
      "terms": {
        "field": "publisher.keyword"
      },
      "aggs": {
        "by_licence": {
          "terms": {
            "field": "oa_details.licence_publisher.keyword",
            "missing": "N/A"
          }
        }
      }
    }
  }
}

requests.post(url, json=json, headers=headers).json()['aggregations']['by_publisher']['buckets']

[{'key': 'Springer Science and Business Media LLC',
  'doc_count': 5663,
  'by_licence': {'doc_count_error_upper_bound': 0,
   'sum_other_doc_count': 0,
   'buckets': [{'key': 'cc-by', 'doc_count': 4805},
    {'key': 'no license', 'doc_count': 712},
    {'key': 'cc-by-nc', 'doc_count': 134},
    {'key': 'cc-by-nc-nd', 'doc_count': 8},
    {'key': 'publisher-specific', 'doc_count': 4},
    {'key': 'cc-by-nc-sa', 'doc_count': 2}]}},
 {'key': 'Elsevier BV',
  'doc_count': 4253,
  'by_licence': {'doc_count_error_upper_bound': 0,
   'sum_other_doc_count': 0,
   'buckets': [{'key': 'cc-by-nc-nd', 'doc_count': 2398},
    {'key': 'no license', 'doc_count': 1431},
    {'key': 'cc-by', 'doc_count': 631},
    {'key': 'cc-by-nc', 'doc_count': 21},
    {'key': 'implied-oa', 'doc_count': 7},
    {'key': 'cc-by-nc-sa', 'doc_count': 2},
    {'key': 'publisher-specific', 'doc_count': 1}]}},
 {'key': 'MDPI AG',
  'doc_count': 4159,
  'by_licence': {'doc_count_error_upper_bound': 0,
   'sum_other_doc_cou

### 1.3.6. Quels sont les coûts des publications chez les éditeurs/plateformes en santé ? <a class="anchor" id="apc"></a>


#### 1.3.6.1. Dépenses estimées pour la communauté scientifique, en APC pour la production francaise en santé

In [18]:
json = {
  "size": 0,
  "aggs": {
    "by_publication_year": {
      "terms": {
        "field": "publication_year"
      },
      "aggs": {
        "apc": {
          "sum": {
            "field": "amount_apc_EUR"
          }
        }
      }
    }
  }
}

requests.post(url, json=json, headers=headers).json()['aggregations']['by_publication_year']['buckets']

[{'key': 2020.0, 'doc_count': 54356, 'apc': {'value': 46949741.24392891}},
 {'key': 2019.0, 'doc_count': 45324, 'apc': {'value': 62306758.61728668}},
 {'key': 2017.0, 'doc_count': 44348, 'apc': {'value': 72211356.74385071}},
 {'key': 2018.0, 'doc_count': 44307, 'apc': {'value': 69701714.83007431}},
 {'key': 2016.0, 'doc_count': 42385, 'apc': {'value': 63973595.62034178}},
 {'key': 2015.0, 'doc_count': 39370, 'apc': {'value': 59645934.527275085}},
 {'key': 2014.0, 'doc_count': 34699, 'apc': {'value': 44457259.95906067}},
 {'key': 2013.0, 'doc_count': 27667, 'apc': {'value': 20182620.695999146}},
 {'key': 2021.0, 'doc_count': 10288, 'apc': {'value': 6188013.423858643}},
 {'key': 2012.0, 'doc_count': 730, 'apc': {'value': 303922.67276000977}}]

#### 1.3.6.2. Distribution des tarifs unitaires des frais de publication par article dans le domaine de la santé

In [19]:
filter = "*" # *, Elsevier BV, Springer Science and Business Media LLC, Wiley...
json = {
  "size": 0,
  "query": {
    "bool": {
      "filter": [
        { "wildcard": { "publisher.keyword": filter }}
      ]
    }
  },
  "aggs": {
    "tarif": {
      "histogram": {
        "field": "amount_apc_EUR",
        "interval": 250
      }
    }
  }
}


requests.post(url, json=json, headers=headers).json()['aggregations']['tarif']['buckets']

[{'key': 0.0, 'doc_count': 746},
 {'key': 250.0, 'doc_count': 2226},
 {'key': 500.0, 'doc_count': 2841},
 {'key': 750.0, 'doc_count': 3607},
 {'key': 1000.0, 'doc_count': 9813},
 {'key': 1250.0, 'doc_count': 19041},
 {'key': 1500.0, 'doc_count': 23320},
 {'key': 1750.0, 'doc_count': 21910},
 {'key': 2000.0, 'doc_count': 21146},
 {'key': 2250.0, 'doc_count': 17825},
 {'key': 2500.0, 'doc_count': 21326},
 {'key': 2750.0, 'doc_count': 13447},
 {'key': 3000.0, 'doc_count': 12672},
 {'key': 3250.0, 'doc_count': 7038},
 {'key': 3500.0, 'doc_count': 5130},
 {'key': 3750.0, 'doc_count': 3282},
 {'key': 4000.0, 'doc_count': 3229},
 {'key': 4250.0, 'doc_count': 2666},
 {'key': 4500.0, 'doc_count': 1663},
 {'key': 4750.0, 'doc_count': 1121},
 {'key': 5000.0, 'doc_count': 858},
 {'key': 5250.0, 'doc_count': 568},
 {'key': 5500.0, 'doc_count': 118},
 {'key': 5750.0, 'doc_count': 11},
 {'key': 6000.0, 'doc_count': 4},
 {'key': 6250.0, 'doc_count': 0},
 {'key': 6500.0, 'doc_count': 1},
 {'key': 6750.

#### 1.3.6.3. Distribution des tarifs unitaires des frais de publication par article, par année de publication, dans la  santé

violin : 1. one request for density

In [20]:
filter = "*" # *, Elsevier BV, Springer Science and Business Media LLC, Wiley...
json = {
  "size": 0,
  "query": {
    "bool": {
      "filter": [
        { "wildcard": { "publisher.keyword": filter }}
      ]
    }
  },
  "aggs": {
    "by_publication_year": {
      "terms": {
        "field": "publication_year"
      },
      "aggs": {
        "tarif": {
          "histogram": {
            "field": "amount_apc_EUR",
            "interval": 250
          }
        }
      }
    }
  }
}

requests.post(url, json=json, headers=headers).json()['aggregations']['by_publication_year']['buckets']

[{'key': 2020.0,
  'doc_count': 54356,
  'tarif': {'buckets': [{'key': 0.0, 'doc_count': 42},
    {'key': 250.0, 'doc_count': 159},
    {'key': 500.0, 'doc_count': 364},
    {'key': 750.0, 'doc_count': 414},
    {'key': 1000.0, 'doc_count': 659},
    {'key': 1250.0, 'doc_count': 1695},
    {'key': 1500.0, 'doc_count': 5062},
    {'key': 1750.0, 'doc_count': 3252},
    {'key': 2000.0, 'doc_count': 2610},
    {'key': 2250.0, 'doc_count': 1372},
    {'key': 2500.0, 'doc_count': 1127},
    {'key': 2750.0, 'doc_count': 1114},
    {'key': 3000.0, 'doc_count': 704},
    {'key': 3250.0, 'doc_count': 674},
    {'key': 3500.0, 'doc_count': 540},
    {'key': 3750.0, 'doc_count': 500},
    {'key': 4000.0, 'doc_count': 78},
    {'key': 4250.0, 'doc_count': 897},
    {'key': 4500.0, 'doc_count': 145},
    {'key': 4750.0, 'doc_count': 52},
    {'key': 5000.0, 'doc_count': 4},
    {'key': 5250.0, 'doc_count': 40},
    {'key': 5500.0, 'doc_count': 0},
    {'key': 5750.0, 'doc_count': 0},
    {'key': 60

violin : second request for percentiles

In [21]:
filter = "*" # *, Elsevier BV, Springer Science and Business Media LLC, Wiley...
json = {
  "size": 0,
  "query": {
    "bool": {
      "filter": [
        { "wildcard": { "publisher.keyword": filter }}
      ]
    }
  },
  "aggs": {
    "by_publication_year": {
      "terms": {
        "field": "publication_year"
      },
      "aggs": {
        "tarif_percentiles": {
          "percentiles": {
            "field": "amount_apc_EUR"
          }
        }
      }
    }
  }
}

requests.post(url, json=json, headers=headers).json()['aggregations']['by_publication_year']['buckets']

[{'key': 2020.0,
  'doc_count': 54356,
  'tarif_percentiles': {'values': {'1.0': 512.4931969233922,
    '5.0': 1039.4421074007764,
    '25.0': 1622.9219380908946,
    '50.0': 1904.9708958675988,
    '75.0': 2659.4614645640054,
    '95.0': 4392.723665090708,
    '99.0': 4537.6806640625}}},
 {'key': 2019.0,
  'doc_count': 45324,
  'tarif_percentiles': {'values': {'1.0': 506.64468624369306,
    '5.0': 1016.8347309112548,
    '25.0': 1664.2905925814284,
    '50.0': 2155.4761735536904,
    '75.0': 2798.978810104594,
    '95.0': 4139.791015625,
    '99.0': 4938.76220703125}}},
 {'key': 2017.0,
  'doc_count': 44348,
  'tarif_percentiles': {'values': {'1.0': 466.72127131870815,
    '5.0': 1137.868520114818,
    '25.0': 1713.8586382496471,
    '50.0': 2373.178778162223,
    '75.0': 2989.204499535326,
    '95.0': 4025.9164986529604,
    '99.0': 5008.313860150505}}},
 {'key': 2018.0,
  'doc_count': 44307,
  'tarif_percentiles': {'values': {'1.0': 402.9474175589425,
    '5.0': 981.8855662682577,
 

## 1.4. Les archives <a class="anchor" id="publications_archives"></a>

### 1.4.1. Quelle est la dynamique d’ouverture de la santé parmi les archives ? <a class="anchor" id="publications_archives_ouverture"></a>

In [22]:
json = {
  "size": 0,
  "query": {
    "bool": {
      "filter": [{
        "term": {
          "oa_details.oa_host_type": "repository"
        }
      }]
    }
  },
  "aggs": {
    "by_observation_year": {
      "terms": {
        "field": "oa_details.observation_date.keyword"
      },
      "aggs": {
        "by_publication_year": {
          "terms": {
            "field": "publication_year"
          }
        }
      }
    }
  }
}

requests.post(url, json=json, headers=headers).json()['aggregations']['by_observation_year']['buckets']

[{'key': '2021Q1',
  'doc_count': 175896,
  'by_publication_year': {'doc_count_error_upper_bound': 0,
   'sum_other_doc_count': 0,
   'buckets': [{'key': 2020.0, 'doc_count': 28290},
    {'key': 2019.0, 'doc_count': 25961},
    {'key': 2018.0, 'doc_count': 24479},
    {'key': 2017.0, 'doc_count': 23514},
    {'key': 2016.0, 'doc_count': 21938},
    {'key': 2015.0, 'doc_count': 20024},
    {'key': 2014.0, 'doc_count': 16485},
    {'key': 2013.0, 'doc_count': 11542},
    {'key': 2021.0, 'doc_count': 3333},
    {'key': 2012.0, 'doc_count': 330}]}},
 {'key': '2020',
  'doc_count': 166799,
  'by_publication_year': {'doc_count_error_upper_bound': 0,
   'sum_other_doc_count': 0,
   'buckets': [{'key': 2019.0, 'doc_count': 25958},
    {'key': 2018.0, 'doc_count': 24477},
    {'key': 2017.0, 'doc_count': 23513},
    {'key': 2020.0, 'doc_count': 22473},
    {'key': 2016.0, 'doc_count': 21938},
    {'key': 2015.0, 'doc_count': 20021},
    {'key': 2014.0, 'doc_count': 16484},
    {'key': 2013.0, '

### 1.4.2. Quelles archives ouvertes sont les plus utilisées en santé ? <a class="anchor" id="publications_archives_utilisation"></a>

In [16]:
json = {
  "size": 0,
  "query": {
    "bool": {
      "filter": [{
        "term": {
          "oa_details.oa_host_type": "repository"
        }
      }]
    }
  },
  "aggs": {
    "by_observation_year": {
      "terms": {
        "field": "oa_details.observation_date.keyword"
      },
      "aggs": {
        "by_repository": {
          "terms": {
            "field": "oa_details.repositories.keyword",
            "size": AGG_SIZE
          }
        }
      }
    }
  }
}

requests.post(url, json=json, headers=headers).json()['aggregations']['by_observation_year']['buckets']

[{'key': '2021Q1',
  'doc_count': 175896,
  'by_repository': {'doc_count_error_upper_bound': 0,
   'sum_other_doc_count': 105922,
   'buckets': [{'key': 'www.ncbi.nlm.nih.gov', 'doc_count': 120265},
    {'key': 'europepmc.org', 'doc_count': 103390},
    {'key': 'pdfs.semanticscholar.org', 'doc_count': 68118},
    {'key': 'HAL', 'doc_count': 55465},
    {'key': 'arxiv.org', 'doc_count': 8274},
    {'key': 'prodinra.inra.fr', 'doc_count': 3184},
    {'key': 'discovery.ucl.ac.uk', 'doc_count': 3001},
    {'key': 'www.biorxiv.org', 'doc_count': 2309},
    {'key': 'www.pure.ed.ac.uk', 'doc_count': 2239},
    {'key': 'www.research.ed.ac.uk', 'doc_count': 2228},
    {'key': 'spiral.imperial.ac.uk', 'doc_count': 2108},
    {'key': 'digital.csic.es', 'doc_count': 1983},
    {'key': 'escholarship.org', 'doc_count': 1835},
    {'key': 'univoak.eu', 'doc_count': 1786},
    {'key': 'eprints.whiterose.ac.uk', 'doc_count': 1749}]}},
 {'key': '2020',
  'doc_count': 166799,
  'by_repository': {'doc_cou

### 1.4.3. Quelle est la dynamique de dépôt par archive ouverte en santé ? <a class="anchor" id="publications_archives_dynamique"></a>

In [17]:
json = {
  "size": 0,
  "aggs": {
    "by_repository": {
      "terms": {
        "field": "oa_details.repositories.keyword",
        "missing": "N/A",
        "size": 12
      },
      "aggs": {
        "by_observation_year": {
          "terms": {
            "field": "oa_details.observation_date.keyword"
          },
          "aggs": {
            "by_publication_year": {
              "terms": {
                "field": "publication_year"
              }
            }
          }
        }
      }
    }
  }
}

requests.post(url, json=json, headers=headers).json()['aggregations']['by_repository']['buckets']

[{'key': 'N/A',
  'doc_count': 167835,
  'by_observation_year': {'doc_count_error_upper_bound': 0,
   'sum_other_doc_count': 0,
   'buckets': [{'key': '2021Q1',
     'doc_count': 167543,
     'by_publication_year': {'doc_count_error_upper_bound': 0,
      'sum_other_doc_count': 0,
      'buckets': [{'key': 2020.0, 'doc_count': 26176},
       {'key': 2017.0, 'doc_count': 20834},
       {'key': 2016.0, 'doc_count': 20447},
       {'key': 2018.0, 'doc_count': 19828},
       {'key': 2019.0, 'doc_count': 19364},
       {'key': 2015.0, 'doc_count': 19346},
       {'key': 2014.0, 'doc_count': 18214},
       {'key': 2013.0, 'doc_count': 16125},
       {'key': 2021.0, 'doc_count': 6809},
       {'key': 2012.0, 'doc_count': 400}]}},
    {'key': '2020',
     'doc_count': 154190,
     'by_publication_year': {'doc_count_error_upper_bound': 0,
      'sum_other_doc_count': 0,
      'buckets': [{'key': 2017.0, 'doc_count': 20830},
       {'key': 2016.0, 'doc_count': 20446},
       {'key': 2018.0, 'doc

### 1.4.4. Quelle place occupe HAL dans la dynamique des archives ouvertes en santé ? <a class="anchor" id="publications_archives_hal"></a>

In [18]:
json = {
  "size": 0,
  "query": {
    "bool": {
      "filter": [
        { "term": { "oa_details.oa_host_type": "repository" }},
        { "term": { "oa_details.observation_date.keyword": LATEST_OBSERVATION_DATE }}
      ]
    }
  },
  "aggs": {
    "by_discipline": {
      "terms": {
        "field": "oa_details.repositories.keyword",
        "missing": "N/A"
      }
    }
  }
}

requests.post(url, json=json, headers=headers).json()['aggregations']['by_discipline']

{'doc_count_error_upper_bound': 0,
 'sum_other_doc_count': 115383,
 'buckets': [{'key': 'www.ncbi.nlm.nih.gov', 'doc_count': 120265},
  {'key': 'europepmc.org', 'doc_count': 103390},
  {'key': 'pdfs.semanticscholar.org', 'doc_count': 68118},
  {'key': 'HAL', 'doc_count': 55465},
  {'key': 'arxiv.org', 'doc_count': 8274},
  {'key': 'prodinra.inra.fr', 'doc_count': 3184},
  {'key': 'discovery.ucl.ac.uk', 'doc_count': 3001},
  {'key': 'www.biorxiv.org', 'doc_count': 2309},
  {'key': 'www.pure.ed.ac.uk', 'doc_count': 2239},
  {'key': 'www.research.ed.ac.uk', 'doc_count': 2228}]}

_Idée : Should post process to group / sum all repositories other than 'HAL'_

## 1.5. Les affiliations <a class="anchor" id="publications_affiliations"></a>

### 1.5.1. Taux d’ouverture des publications françaises, dans le domaine de la santé, par millésime tous types d’établissements confondus <a class="anchor" id="publications_affiliations_ouverture"></a>

In [26]:
filter = "*" # *, university, hospital, cnrs, inserm
json = {
  "size": 0,
  "query": {
    "bool": {
      "filter": [
        { "wildcard": { "french_affiliations_types": filter }}
      ]
    }
  },
  "aggs": {
    "by_observation_year": {
      "terms": {
        "field": "oa_details.observation_date.keyword"
      },
      "aggs": {
        "by_publication_year": {
          "terms": {
            "field": "publication_year"
          },
          "aggs": {
            "by_is_oa": {
              "terms": {
                "field": "oa_details.is_oa"
              }
            }
          }
        }
      }
    }
  }
}

requests.post(url, json=json, headers=headers).json()['aggregations']['by_observation_year']['buckets']

[{'key': '2021Q1',
  'doc_count': 306855,
  'by_publication_year': {'doc_count_error_upper_bound': 0,
   'sum_other_doc_count': 0,
   'buckets': [{'key': 2020.0,
     'doc_count': 48983,
     'by_is_oa': {'doc_count_error_upper_bound': 0,
      'sum_other_doc_count': 0,
      'buckets': [{'key': 1, 'key_as_string': 'true', 'doc_count': 29291},
       {'key': 0, 'key_as_string': 'false', 'doc_count': 23281}]}},
    {'key': 2019.0,
     'doc_count': 40770,
     'by_is_oa': {'doc_count_error_upper_bound': 0,
      'sum_other_doc_count': 0,
      'buckets': [{'key': 1, 'key_as_string': 'true', 'doc_count': 26893},
       {'key': 0, 'key_as_string': 'false', 'doc_count': 22208}]}},
    {'key': 2017.0,
     'doc_count': 39868,
     'by_is_oa': {'doc_count_error_upper_bound': 0,
      'sum_other_doc_count': 0,
      'buckets': [{'key': 1, 'key_as_string': 'true', 'doc_count': 25011},
       {'key': 0, 'key_as_string': 'false', 'doc_count': 22597}]}},
    {'key': 2018.0,
     'doc_count': 3981

### 1.5.2. Evolution du taux d’ouverture des publications en santé par types d’établissement entre millésimes <a class="anchor" id="publications_affiliations_evolution"></a>

_Warning: Les données ne sont pas encore prêtes._

### 1.5.3. Quel impact le pays d’affiliation des auteurs a-t-il sur le taux d’ouverture en santé ? <a class="anchor" id="publications_affiliations_impact"></a>

In [27]:
json = {
  "size": 0,
  "aggs": {
    "by_publication_year": {
      "terms": {
        "field": "publication_year"
      },
      "aggs": {
        "by_author_useful_rank_fr": {
          "terms": {
            "field": "author_useful_rank_fr"
          },
          "aggs": {
            "by_is_oa": {
              "terms": {
                "field": "oa_details.is_oa"
              }
            }
          }
        }
      }
    }
  }
}

requests.post(url, json=json, headers=headers).json()['aggregations']['by_publication_year']['buckets']

[{'key': 2020.0,
  'doc_count': 54356,
  'by_author_useful_rank_fr': {'doc_count_error_upper_bound': 0,
   'sum_other_doc_count': 0,
   'buckets': [{'key': 1,
     'key_as_string': 'true',
     'doc_count': 38521,
     'by_is_oa': {'doc_count_error_upper_bound': 0,
      'sum_other_doc_count': 0,
      'buckets': [{'key': 1, 'key_as_string': 'true', 'doc_count': 21761},
       {'key': 0, 'key_as_string': 'false', 'doc_count': 19557}]}},
    {'key': 0,
     'key_as_string': 'false',
     'doc_count': 15835,
     'by_is_oa': {'doc_count_error_upper_bound': 0,
      'sum_other_doc_count': 0,
      'buckets': [{'key': 1, 'key_as_string': 'true', 'doc_count': 10981},
       {'key': 0, 'key_as_string': 'false', 'doc_count': 5909}]}}]}},
 {'key': 2019.0,
  'doc_count': 45324,
  'by_author_useful_rank_fr': {'doc_count_error_upper_bound': 0,
   'sum_other_doc_count': 0,
   'buckets': [{'key': 1,
     'key_as_string': 'true',
     'doc_count': 32228,
     'by_is_oa': {'doc_count_error_upper_boun

### 1.5.4. Classement des 10 pays d'affiliation des auteurs de rang utile avec lesquels la France collabore le plus, selon le taux d’accès ouvert de leurs publications en santé <a class="anchor" id="publications_affiliations_classement"></a>

In [28]:
json = {
  "size": 0,
  "query": {
    "bool": {
      "filter": [
        { "term": { "author_useful_rank_fr": "true" }}
      ]
    }
  },
  "aggs": {
    "by_country": {
      "terms": {
        "field": "affiliations.countries.keyword",
        "exclude": "FR"
      },
      "aggs": {
        "by_is_oa": {
          "terms": {
            "field": "oa_details.is_oa"
          }
        }
      }
    }
  }
}

requests.post(url, json=json, headers=headers).json()['aggregations']['by_country']['buckets']

[{'key': 'US',
  'doc_count': 33592,
  'by_is_oa': {'doc_count_error_upper_bound': 0,
   'sum_other_doc_count': 0,
   'buckets': [{'key': 1, 'key_as_string': 'true', 'doc_count': 23742},
    {'key': 0, 'key_as_string': 'false', 'doc_count': 15493}]}},
 {'key': 'GB',
  'doc_count': 20292,
  'by_is_oa': {'doc_count_error_upper_bound': 0,
   'sum_other_doc_count': 0,
   'buckets': [{'key': 1, 'key_as_string': 'true', 'doc_count': 15678},
    {'key': 0, 'key_as_string': 'false', 'doc_count': 8011}]}},
 {'key': 'DE',
  'doc_count': 13659,
  'by_is_oa': {'doc_count_error_upper_bound': 0,
   'sum_other_doc_count': 0,
   'buckets': [{'key': 1, 'key_as_string': 'true', 'doc_count': 10055},
    {'key': 0, 'key_as_string': 'false', 'doc_count': 5829}]}},
 {'key': 'IT',
  'doc_count': 12697,
  'by_is_oa': {'doc_count_error_upper_bound': 0,
   'sum_other_doc_count': 0,
   'buckets': [{'key': 1, 'key_as_string': 'true', 'doc_count': 8533},
    {'key': 0, 'key_as_string': 'false', 'doc_count': 6252}]