In [1]:
import requests

In [2]:
url = "https://cluster.elasticsearch.dataesr.ovh/bso-publications/_search"
headers = { "Authorization": open("../../.env_es_bsso", "r").read().strip() }
AGG_SIZE = 15
LATEST_OBSERVATION_DATE = "2021Q1"

In [3]:
json = {
  "size": 0,
  "aggs" : {
    "observation_dates" : {
      "terms" : { "field": "observation_dates.keyword" }
    }
  }
}
requests.post(url, json=json, headers=headers).json()["aggregations"]["observation_dates"]["buckets"]

[{'key': '2021Q2', 'doc_count': 1299496},
 {'key': '2021Q1', 'doc_count': 1276571},
 {'key': '2020', 'doc_count': 1208194},
 {'key': '2019', 'doc_count': 1027464},
 {'key': '2018', 'doc_count': 813320}]

### Sommaire

* [1. Publications](#publications)
    * [1.1. Général](#publi_general)
        * [1.1.1. Quelle est la dynamique d’ouverture de la santé en France ?](#dynamique_ouverture)
        * [1.1.2. Quelles sont les voies d’ouverture choisies pour les publications en santé ?](#voie_ouverture)
        * [1.1.3. Quelles sont les genres les plus ouverts en santé ?](#genre)
        * [1.1.4. Quelles sont les langues des productions ouvertes en santé ?](#langue)
        * [1.1.5. Quel impact le financement a-t-il sur l’ouverture des publications en santé ?](#financement)
        * [1.1.6. Quelle transparence dans la déclaration des conflits d'intérêts en santé ?](#coi)
    * [1.2. Les disciplines](#publi_disciplines)
        * [1.2.1. Quelle est la dynamique d’ouverture de la santé en fonction de ses disciplines ?](#dynamique_ouverture_disciplines)
        * [1.2.2. Quelles sont les voies d’ouverture choisies par les publications en fonction des disciplines ?](#voie_ouverture_disciplines)
    * [1.3. Les éditeurs/plateformes](#publi_editeurs)
        * [1.3.1. Quelle est la dynamique d’ouverture de la santé chez les éditeurs/plateformes ?](#dynamique_ouverture_editeurs)
        * [1.3.2. Quel type d’ouverture est majoritaire chez les éditeurs/plateformes en santé ?](#voie_ouverture_editeurs)
        * [1.3.3. Quelles sont les politiques d’ouverture des éditeurs/plateformes en santé ?](#politique_ouverture_editeurs)
        * [1.3.4. Quel est le poids des revues prédatrices dans la dynamique de science ouverte en santé ?](#predateur)
        * [1.3.5. Quelle est la répartition des licences utilisées chez les éditeurs/plateformes en santé ?](#licence)
        * [1.3.6. Quels sont les coûts des publications chez les éditeurs/plateformes en santé ?](#apc)
    * [1.4. Les archives](#publications_archives)
        * [1.4.1. Quelle est la dynamique d’ouverture de la santé parmi les archives ?](#publications_archives_ouverture)
        * [1.4.2. Quelles archives ouvertes sont les plus utilisées en santé ?](#publications_archives_utilisation)
        * [1.4.3. Quelle est la dynamique de dépôt par archive ouverte en santé ?](#publications_archives_dynamique)
        * [1.4.4. Quelle place occupe HAL dans la dynamique des archives ouvertes en santé ?](#publications_archives_hal)
    * [1.5. Les affiliations](#publications_affiliations)
        * [1.5.1. Taux d’ouverture des publications françaises, dans le domaine de la santé, par millésime tous types d’établissements confondus](#publications_affiliations_ouverture)
        * [1.5.2. Evolution du taux d’ouverture des publications en santé par types d’établissement entre millésimes](#publications_affiliations_evolution)
        * [1.5.3. Quel impact le pays d’affiliation des auteurs a-t-il sur le taux d’ouverture en santé ?](#publications_affiliations_impact)
        * [1.5.4. Classement des 10 pays d'affiliation des auteurs de rang utile avec lesquels la France collabore le plus, selon le taux d’accès ouvert de leurs publications en santé](#publications_affiliations_classement)

# 1. Publications <a class="anchor" id="publications"></a>

## 1.1. Général <a class="anchor" id="publi_general"></a>

### 1.1.1. Quelle est la dynamique d’ouverture de la santé en France ? (barchart + linechart)<a class="anchor" id="dynamique_ouverture"></a>

In [10]:
# Autant de requêtes que de observation date sont à faire
# exemple pour la dernière observation date
json = {
  "size": 0,
  "aggs": {
        "by_publication_year": {
          "terms": {
            "field": "publication_year"
          },
          "aggs": {
            "by_is_oa": {
              "terms": {
                "field": f"oa_details.2021Q1.is_oa"
              }
            }
          }
    }
  }
}

results = requests.post(url, json=json, headers=headers).json()['aggregations']['by_publication_year']['buckets']
results

[{'key': 2020.0,
  'doc_count': 54356,
  'by_is_oa': {'doc_count_error_upper_bound': 0,
   'sum_other_doc_count': 0,
   'buckets': [{'key': 1, 'key_as_string': 'true', 'doc_count': 32296},
    {'key': 0, 'key_as_string': 'false', 'doc_count': 22060}]}},
 {'key': 2019.0,
  'doc_count': 45324,
  'by_is_oa': {'doc_count_error_upper_bound': 0,
   'sum_other_doc_count': 0,
   'buckets': [{'key': 1, 'key_as_string': 'true', 'doc_count': 28839},
    {'key': 0, 'key_as_string': 'false', 'doc_count': 16485}]}},
 {'key': 2017.0,
  'doc_count': 44348,
  'by_is_oa': {'doc_count_error_upper_bound': 0,
   'sum_other_doc_count': 0,
   'buckets': [{'key': 1, 'key_as_string': 'true', 'doc_count': 26787},
    {'key': 0, 'key_as_string': 'false', 'doc_count': 17561}]}},
 {'key': 2018.0,
  'doc_count': 44307,
  'by_is_oa': {'doc_count_error_upper_bound': 0,
   'sum_other_doc_count': 0,
   'buckets': [{'key': 1, 'key_as_string': 'true', 'doc_count': 27657},
    {'key': 0, 'key_as_string': 'false', 'doc_cou

pour le 1er graphique (barre), pour chaque date d'observation, on calcule le ratio d'ouverture pour la publication_date de l'année précédente.
Donc, pour observation_date = 2021Q1, on calcule le ratio d'ouvertue de la publication_date 2020 : nb de oa true / total

In [23]:
tmp = [result for result in results if result['key'] == 2020][0]['by_is_oa']['buckets']
score_true = [result for result in tmp if result['key'] == 1][0]['doc_count']
score_false = [result for result in tmp if result['key'] == 0][0]['doc_count']
total = score_true + score_false
response = score_true / total * 100
response

59.415703878136725

### 1.1.2. Quelles sont les voies d’ouverture choisies pour les publications en santé ? (stacked barchart + stacked area graph + treemap) <a class="anchor" id="voie_ouverture"></a>

In [4]:
json = {
  "size": 0,
  "aggs": {
    "by_publication_year": {
      "terms": {
        "field": "publication_year"
      },
      "aggs": {
        "by_oa_host_type": {
          "terms": {
            "field": f"oa_details.{LATEST_OBSERVATION_DATE}.oa_host_type.keyword",
            "missing": "N/A"
          }
        }
      }
    }
  }
}

requests.post(url, json=json, headers=headers).json()['aggregations']['by_publication_year']['buckets']

[{'key': 2020.0,
  'doc_count': 54356,
  'by_oa_host_type': {'doc_count_error_upper_bound': 0,
   'sum_other_doc_count': 0,
   'buckets': [{'key': 'publisher;repository', 'doc_count': 22423},
    {'key': 'closed', 'doc_count': 22060},
    {'key': 'repository', 'doc_count': 5816},
    {'key': 'publisher', 'doc_count': 4057}]}},
 {'key': 2019.0,
  'doc_count': 45324,
  'by_oa_host_type': {'doc_count_error_upper_bound': 0,
   'sum_other_doc_count': 0,
   'buckets': [{'key': 'publisher;repository', 'doc_count': 18962},
    {'key': 'closed', 'doc_count': 16485},
    {'key': 'repository', 'doc_count': 6636},
    {'key': 'publisher', 'doc_count': 3241}]}},
 {'key': 2017.0,
  'doc_count': 44348,
  'by_oa_host_type': {'doc_count_error_upper_bound': 0,
   'sum_other_doc_count': 0,
   'buckets': [{'key': 'closed', 'doc_count': 17561},
    {'key': 'publisher;repository', 'doc_count': 16409},
    {'key': 'repository', 'doc_count': 6215},
    {'key': 'publisher', 'doc_count': 4163}]}},
 {'key': 2018

### 1.1.3. Quelles sont les genres les plus ouverts en santé ? <a class="anchor" id="genre"></a>

In [24]:
json = {
  "size": 0,
  "aggs": {
    "by_is_oa": {
      "terms": {
        "field": f"oa_details.{LATEST_OBSERVATION_DATE}.is_oa"
      },
      "aggs": {
        "by_publication_genre": {
          "terms": {
            "field": "genre.keyword"
          }
        }
      }
    }
  }
}

requests.post(url, json=json, headers=headers).json()['aggregations']['by_is_oa']['buckets']

[{'key': 1,
  'key_as_string': 'true',
  'doc_count': 60941,
  'by_publication_genre': {'doc_count_error_upper_bound': 0,
   'sum_other_doc_count': 0,
   'buckets': [{'key': 'journal-article', 'doc_count': 60657},
    {'key': 'book-chapter', 'doc_count': 255},
    {'key': 'posted-content', 'doc_count': 21},
    {'key': 'proceedings-article', 'doc_count': 7},
    {'key': 'book', 'doc_count': 1}]}},
 {'key': 0,
  'key_as_string': 'false',
  'doc_count': 39767,
  'by_publication_genre': {'doc_count_error_upper_bound': 0,
   'sum_other_doc_count': 0,
   'buckets': [{'key': 'journal-article', 'doc_count': 39169},
    {'key': 'book-chapter', 'doc_count': 595},
    {'key': 'other', 'doc_count': 3}]}}]

### 1.1.4. Quelles sont les langues des productions ouvertes en santé ? <a class="anchor" id="langue"></a>

In [25]:
json = {
  "size": 0,
  "aggs": {
    "by_is_oa": {
      "terms": {
        "field": f"oa_details.{LATEST_OBSERVATION_DATE}.is_oa"
      },
      "aggs": {
        "by_publication_genre": {
          "terms": {
            "field": "language.keyword"
          }
        }
      }
    }
  }
}

requests.post(url, json=json, headers=headers).json()['aggregations']['by_is_oa']['buckets']

[{'key': 1,
  'key_as_string': 'true',
  'doc_count': 61570,
  'by_publication_genre': {'doc_count_error_upper_bound': 0,
   'sum_other_doc_count': 0,
   'buckets': [{'key': 'en', 'doc_count': 60519},
    {'key': 'fr', 'doc_count': 1009},
    {'key': 'sp', 'doc_count': 16},
    {'key': 'po', 'doc_count': 13},
    {'key': 'ge', 'doc_count': 10},
    {'key': 'ru', 'doc_count': 2},
    {'key': 'hu', 'doc_count': 1}]}},
 {'key': 0,
  'key_as_string': 'false',
  'doc_count': 40138,
  'by_publication_genre': {'doc_count_error_upper_bound': 0,
   'sum_other_doc_count': 0,
   'buckets': [{'key': 'en', 'doc_count': 36070},
    {'key': 'fr', 'doc_count': 4048},
    {'key': 'ge', 'doc_count': 12},
    {'key': 'ru', 'doc_count': 4},
    {'key': 'sp', 'doc_count': 4}]}}]


### 1.1.5. Quel impact le financement a-t-il sur l’ouverture des publications en santé ? <a class="anchor" id="financement"></a>

#### 1.1.5.1. Taux d'ouverture des publications en santé par déclaration de financement par projet

In [13]:
filter = "*" # Medical Research Council, NCI NIH HHS, Wellcome Trust, NIGMS NIH HHS ...

json = {
  "size": 0,
  "query": {
    "bool": {
      "filter": [
        { "term": { "domains.keyword": "health" }},
        { "match": { "has_grant": "true" }},
        { "wildcard": { "grants.agency.keyword": filter }}
      ]
    }
  },
  "aggs": {
    "by_publication_year": {
      "terms": {
        "field": "publication_year"
      },
      "aggs": {
        "by_is_oa": {
          "terms": {
            "field": f"oa_details.{LATEST_OBSERVATION_DATE}.is_oa"
          }
        },
        "by_has_grant": {
          "terms": {
            "field": "has_grant"
          },
          "aggs": {
            "by_is_oa": {
              "terms": {
                "field": f"oa_details.{LATEST_OBSERVATION_DATE}.is_oa"
              }
            }
          }
        }
      }
    }
  }
}

requests.post(url, json=json, headers=headers).json()['aggregations']['by_publication_year']['buckets']

[{'key': 2020,
  'doc_count': 8496,
  'by_is_oa': {'doc_count_error_upper_bound': 0,
   'sum_other_doc_count': 0,
   'buckets': [{'key': 1, 'key_as_string': 'true', 'doc_count': 6384},
    {'key': 0, 'key_as_string': 'false', 'doc_count': 2098}]},
  'by_has_grant': {'doc_count_error_upper_bound': 0,
   'sum_other_doc_count': 0,
   'buckets': [{'key': 1,
     'key_as_string': 'true',
     'doc_count': 8496,
     'by_is_oa': {'doc_count_error_upper_bound': 0,
      'sum_other_doc_count': 0,
      'buckets': [{'key': 1, 'key_as_string': 'true', 'doc_count': 6384},
       {'key': 0, 'key_as_string': 'false', 'doc_count': 2098}]}}]}},
 {'key': 2019,
  'doc_count': 7153,
  'by_is_oa': {'doc_count_error_upper_bound': 0,
   'sum_other_doc_count': 0,
   'buckets': [{'key': 1, 'key_as_string': 'true', 'doc_count': 5912},
    {'key': 0, 'key_as_string': 'false', 'doc_count': 1234}]},
  'by_has_grant': {'doc_count_error_upper_bound': 0,
   'sum_other_doc_count': 0,
   'buckets': [{'key': 1,
     '

Pour retrouver la liste des agences :

In [9]:
json = {
  "size": 0,
  "query": {
    "bool": {
      "filter": { "term": { "domains.keyword": "health" }},
      "must": { "match": { "has_grant": "true" }}
    }
  },
  "aggs": {
    "by_agency": {
      "terms": {
        "field": "grants.agency.keyword"
      }
    }
  }
}

requests.post(url, json=json, headers=headers).json()['aggregations']['by_agency']['buckets']

[{'key': 'Medical Research Council', 'doc_count': 4912},
 {'key': 'NCI NIH HHS', 'doc_count': 3744},
 {'key': 'Wellcome Trust', 'doc_count': 3061},
 {'key': 'NIGMS NIH HHS', 'doc_count': 2879},
 {'key': 'NIAID NIH HHS', 'doc_count': 2298},
 {'key': 'European Research Council', 'doc_count': 2182},
 {'key': 'NHLBI NIH HHS', 'doc_count': 2115},
 {'key': 'Agence Nationale de la Recherche', 'doc_count': 2105},
 {'key': 'NINDS NIH HHS', 'doc_count': 1587},
 {'key': 'NIDDK NIH HHS', 'doc_count': 1569}]

#### 1.1.5.2. Répartition par déclaration de financement par projet et par type d’hébergement en santé

In [27]:
json = {
  "size": 0,
  "aggs": {
    "by_is_oa": {
      "terms": {
        "field": f"oa_details.{LATEST_OBSERVATION_DATE}.is_oa"
      },
      "aggs": {
        "by_oa_host_type": {
          "terms": {
            "field": f"oa_details.{LATEST_OBSERVATION_DATE}.oa_host_type.keyword"
          },
          "aggs": {
            "by_grant_agency": {
              "terms": {
                "field": "grants.agency.keyword"
              }
            }
          }
        }
      }
    }
  }
}

requests.post(url, json=json, headers=headers).json()['aggregations']['by_is_oa']['buckets']

[{'key': 1,
  'key_as_string': 'true',
  'doc_count': 77388,
  'by_oa_host_type': {'doc_count_error_upper_bound': 0,
   'sum_other_doc_count': 0,
   'buckets': [{'key': 'publisher;repository',
     'doc_count': 51524,
     'by_grant_agency': {'doc_count_error_upper_bound': 0,
      'sum_other_doc_count': 18207,
      'buckets': [{'key': 'Medical Research Council', 'doc_count': 1509},
       {'key': 'Wellcome Trust', 'doc_count': 991},
       {'key': 'Agence Nationale de la Recherche', 'doc_count': 944},
       {'key': 'NCI NIH HHS', 'doc_count': 697},
       {'key': 'NIGMS NIH HHS', 'doc_count': 654},
       {'key': 'NIAID NIH HHS', 'doc_count': 529},
       {'key': 'NHLBI NIH HHS', 'doc_count': 438},
       {'key': 'Department of Health', 'doc_count': 391},
       {'key': 'NIH HHS', 'doc_count': 377},
       {'key': 'Biotechnology and Biological Sciences Research Council',
        'doc_count': 357}]}},
    {'key': 'repository',
     'doc_count': 15176,
     'by_grant_agency': {'doc_co

### 1.1.6. Quelle transparence dans la déclaration des conflits d'intérêts en santé ? <a class="anchor" id="coi"></a>

In [28]:
json = {
  "size": 0,
  "aggs": {
    "by_publication_year": {
      "terms": {
        "field": "publication_year"
      },
      "aggs": {
        "by_has_coi": {
          "terms": {
            "field": "has_coi"
          }
        }
      }
    }
  }
}

requests.post(url, json=json, headers=headers).json()['aggregations']['by_publication_year']['buckets']

[{'key': 2020.0,
  'doc_count': 54271,
  'by_has_coi': {'doc_count_error_upper_bound': 0,
   'sum_other_doc_count': 0,
   'buckets': [{'key': 0, 'key_as_string': 'false', 'doc_count': 52786},
    {'key': 1, 'key_as_string': 'true', 'doc_count': 1485}]}},
 {'key': 2019.0,
  'doc_count': 45218,
  'by_has_coi': {'doc_count_error_upper_bound': 0,
   'sum_other_doc_count': 0,
   'buckets': [{'key': 0, 'key_as_string': 'false', 'doc_count': 44577},
    {'key': 1, 'key_as_string': 'true', 'doc_count': 641}]}},
 {'key': 2018.0,
  'doc_count': 21494,
  'by_has_coi': {'doc_count_error_upper_bound': 0,
   'sum_other_doc_count': 0,
   'buckets': [{'key': 0, 'key_as_string': 'false', 'doc_count': 21263},
    {'key': 1, 'key_as_string': 'true', 'doc_count': 231}]}},
 {'key': 2021.0,
  'doc_count': 10275,
  'by_has_coi': {'doc_count_error_upper_bound': 0,
   'sum_other_doc_count': 0,
   'buckets': [{'key': 0, 'key_as_string': 'false', 'doc_count': 9616},
    {'key': 1, 'key_as_string': 'true', 'doc_c

## 1.2. Les disciplines <a class="anchor" id="publi_disciplines"></a>

### 1.2.1. Quelle est la dynamique d’ouverture de la santé en fonction de ses disciplines ?  <a class="anchor" id="dynamique_ouverture_disciplines"></a>

In [30]:
# pour chaque observation date

json = {
  "size": 0,
  "aggs": {
    "by_discipline": {
      "terms": {
        "field": "bsso_fields.keyword"
      },
      "aggs": {
        "by_observation_year": {
          "terms": {
            "field": f"oa_details.{LATEST_OBSERVATION_DATE}.observation_date.keyword"
          },
          "aggs": {
            "by_publication_year": {
              "terms": {
                "field": "publication_year"
              },
              "aggs": {
                "by_is_oa": {
                  "terms": {
                    "field": f"oa_details.{LATEST_OBSERVATION_DATE}.is_oa"
                  }
                }
              }
            }
          }
        }
      }
    }
  }
}

requests.post(url, json=json, headers=headers).json()['aggregations']['by_discipline']['buckets']

[{'key': 'Clinical Sciences',
  'doc_count': 43878,
  'by_observation_year': {'doc_count_error_upper_bound': 0,
   'sum_other_doc_count': 0,
   'buckets': []}},
 {'key': 'Multidisciplinary',
  'doc_count': 26898,
  'by_observation_year': {'doc_count_error_upper_bound': 0,
   'sum_other_doc_count': 0,
   'buckets': []}},
 {'key': 'Biochemistry and Cell Biology',
  'doc_count': 12658,
  'by_observation_year': {'doc_count_error_upper_bound': 0,
   'sum_other_doc_count': 0,
   'buckets': []}},
 {'key': 'Cardiorespiratory Medicine and Haematology',
  'doc_count': 7060,
  'by_observation_year': {'doc_count_error_upper_bound': 0,
   'sum_other_doc_count': 0,
   'buckets': []}},
 {'key': 'Microbiology',
  'doc_count': 7048,
  'by_observation_year': {'doc_count_error_upper_bound': 0,
   'sum_other_doc_count': 0,
   'buckets': []}},
 {'key': 'Medicinal and Biomolecular Chemistry',
  'doc_count': 6589,
  'by_observation_year': {'doc_count_error_upper_bound': 0,
   'sum_other_doc_count': 0,
   'bu

### 1.2.2. Quelles sont les voies d’ouverture choisies par les publications en fonction des disciplines ?  <a class="anchor" id="voie_ouverture_disciplines"></a>

In [31]:
json = {
  "size": 0,
      "aggs": {
        "by_publication_year": {
          "terms": {
            "field": "publication_year"
          },
          "aggs": {
            "by_oa_host_type": {
              "terms": {
                "field": f"oa_details.{LATEST_OBSERVATION_DATE}.oa_host_type.keyword"
              },
              "aggs": {
                "by_discipline": {
                  "terms": {
                    "field": "bsso_fields.keyword"
                  }
                }
              }
            }
          }
        }
      }
    }

requests.post(url, json=json, headers=headers).json()['aggregations']['by_publication_year']['buckets']

[{'key': 2020.0,
  'doc_count': 54299,
  'by_oa_host_type': {'doc_count_error_upper_bound': 0,
   'sum_other_doc_count': 0,
   'buckets': [{'key': 'publisher;repository',
     'doc_count': 22410,
     'by_discipline': {'doc_count_error_upper_bound': 0,
      'sum_other_doc_count': 2981,
      'buckets': [{'key': 'Clinical Sciences', 'doc_count': 5649},
       {'key': 'Multidisciplinary', 'doc_count': 5413},
       {'key': 'Biochemistry and Cell Biology', 'doc_count': 2365},
       {'key': 'Microbiology', 'doc_count': 1404},
       {'key': 'Genetics', 'doc_count': 1094},
       {'key': 'Cardiorespiratory Medicine and Haematology', 'doc_count': 926},
       {'key': 'Oncology and Carcinogenesis', 'doc_count': 837},
       {'key': 'Medicinal and Biomolecular Chemistry', 'doc_count': 793},
       {'key': 'Immunology', 'doc_count': 650},
       {'key': 'Public Health and Health Services', 'doc_count': 613}]}},
    {'key': 'closed',
     'doc_count': 22034,
     'by_discipline': {'doc_count_e

## 1.3. Les éditeurs/plateformes <a class="anchor" id="publi_editeurs"></a>

### 1.3.1. Quelle est la dynamique d’ouverture de la santé chez les éditeurs/plateformes ?  <a class="anchor" id="dynamique_ouverture_editeurs"></a>

In [34]:
filter = "*" # *, Elsevier BV, Springer Science and Business Media LLC, Wiley...
json = {
  "size": 0,
  "query": {
    "bool": {
      "filter": [
        { "term": { f"oa_details.{LATEST_OBSERVATION_DATE}.oa_host_type": "publisher" }}, # ATTENTION pas de .keyword ici pour avoir publisher et publisher;repository
        { "wildcard": { "publisher.keyword": filter }}
      ]
    }
  },
  "aggs": {
        "by_publication_year": {
          "terms": {
            "field": "publication_year"
          },
          "aggs": {
            "by_is_oa": {
              "terms": {
                "field": f"oa_details.{LATEST_OBSERVATION_DATE}.is_oa"
              }
            }
          }
        }
      }
    }

requests.post(url, json=json, headers=headers).json()['aggregations']['by_publication_year']['buckets']

[{'key': 2020.0,
  'doc_count': 26468,
  'by_is_oa': {'doc_count_error_upper_bound': 0,
   'sum_other_doc_count': 0,
   'buckets': [{'key': 1, 'key_as_string': 'true', 'doc_count': 26468}]}},
 {'key': 2019.0,
  'doc_count': 22153,
  'by_is_oa': {'doc_count_error_upper_bound': 0,
   'sum_other_doc_count': 0,
   'buckets': [{'key': 1, 'key_as_string': 'true', 'doc_count': 22153}]}},
 {'key': 2018.0,
  'doc_count': 21317,
  'by_is_oa': {'doc_count_error_upper_bound': 0,
   'sum_other_doc_count': 0,
   'buckets': [{'key': 1, 'key_as_string': 'true', 'doc_count': 21317}]}},
 {'key': 2021.0,
  'doc_count': 5091,
  'by_is_oa': {'doc_count_error_upper_bound': 0,
   'sum_other_doc_count': 0,
   'buckets': [{'key': 1, 'key_as_string': 'true', 'doc_count': 5091}]}},
 {'key': 2017.0,
  'doc_count': 691,
  'by_is_oa': {'doc_count_error_upper_bound': 0,
   'sum_other_doc_count': 0,
   'buckets': [{'key': 1, 'key_as_string': 'true', 'doc_count': 691}]}},
 {'key': 2016.0,
  'doc_count': 22,
  'by_is_o

### 1.3.2. Quel type d’ouverture est majoritaire chez les éditeurs/plateformes en santé ? <a class="anchor" id="voie_ouverture_editeurs"></a>


In [36]:
filter = "*" # *, Elsevier BV, Springer Science and Business Media LLC, Wiley...
json = {
  "size": 0,
  "query": {
    "bool": {
      "filter": [
        { "term": { f"oa_details.{LATEST_OBSERVATION_DATE}.oa_host_type": "publisher" }}, # ATTENTION pas de .keyword ici pour avoir publisher et publisher;repository
        { "wildcard": { "publisher.keyword": filter }}
      ]
    }
  },
      "aggs": {
        "by_publication_year": {
          "terms": {
            "field": "publication_year"
          },
          "aggs": {
            "by_oa_colors": {
              "terms": {
                "field": f"oa_details.{LATEST_OBSERVATION_DATE}.oa_colors.keyword"
              }
            }
          }
        }
      }
    }

requests.post(url, json=json, headers=headers).json()['aggregations']['by_publication_year']['buckets']

[{'key': 2020.0,
  'doc_count': 26468,
  'by_oa_colors': {'doc_count_error_upper_bound': 0,
   'sum_other_doc_count': 0,
   'buckets': [{'key': 'green', 'doc_count': 22382},
    {'key': 'gold', 'doc_count': 17121},
    {'key': 'hybrid', 'doc_count': 5401},
    {'key': 'bronze', 'doc_count': 3946}]}},
 {'key': 2019.0,
  'doc_count': 22153,
  'by_oa_colors': {'doc_count_error_upper_bound': 0,
   'sum_other_doc_count': 0,
   'buckets': [{'key': 'green', 'doc_count': 18948},
    {'key': 'gold', 'doc_count': 13413},
    {'key': 'bronze', 'doc_count': 4641},
    {'key': 'hybrid', 'doc_count': 4099}]}},
 {'key': 2018.0,
  'doc_count': 21317,
  'by_oa_colors': {'doc_count_error_upper_bound': 0,
   'sum_other_doc_count': 0,
   'buckets': [{'key': 'green', 'doc_count': 17597},
    {'key': 'gold', 'doc_count': 11868},
    {'key': 'bronze', 'doc_count': 6021},
    {'key': 'hybrid', 'doc_count': 3428}]}},
 {'key': 2021.0,
  'doc_count': 5091,
  'by_oa_colors': {'doc_count_error_upper_bound': 0,
   

### 1.3.3. Quelles sont les politiques d’ouverture des éditeurs/plateformes en santé ? <a class="anchor" id="politique_ouverture_editeurs"></a>

In [37]:
publication_year = 2020 # 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020
json = {
  "size": 0,
  "query": {
    "bool": {
      "filter": [
       { "term": { "publication_year": publication_year }}
      ]
    }
  },
  "aggs": {
    "by_publisher": {
      "terms": {
        "field": "publisher.keyword"
      },
      "aggs": {
        "by_oa_color": {
          "terms": {
            "field": f"oa_details.{LATEST_OBSERVATION_DATE}.oa_colors.keyword"
          }
        }
      }
    }
  }
}

requests.post(url, json=json, headers=headers).json()['aggregations']['by_publisher']['buckets']

[{'key': 'Elsevier BV',
  'doc_count': 15190,
  'by_oa_color': {'doc_count_error_upper_bound': 0,
   'sum_other_doc_count': 0,
   'buckets': [{'key': 'closed', 'doc_count': 9415},
    {'key': 'green', 'doc_count': 4328},
    {'key': 'gold', 'doc_count': 1440},
    {'key': 'hybrid', 'doc_count': 1398},
    {'key': 'bronze', 'doc_count': 1097}]}},
 {'key': 'Springer Science and Business Media LLC',
  'doc_count': 8741,
  'by_oa_color': {'doc_count_error_upper_bound': 0,
   'sum_other_doc_count': 0,
   'buckets': [{'key': 'green', 'doc_count': 5817},
    {'key': 'gold', 'doc_count': 4142},
    {'key': 'closed', 'doc_count': 2547},
    {'key': 'hybrid', 'doc_count': 812},
    {'key': 'bronze', 'doc_count': 667}]}},
 {'key': 'Wiley',
  'doc_count': 5730,
  'by_oa_color': {'doc_count_error_upper_bound': 0,
   'sum_other_doc_count': 0,
   'buckets': [{'key': 'closed', 'doc_count': 2955},
    {'key': 'green', 'doc_count': 2210},
    {'key': 'hybrid', 'doc_count': 927},
    {'key': 'gold', 'doc

### 1.3.4. Quel est le poids des revues prédatrices dans la dynamique de science ouverte en santé ? <a class="anchor" id="predateur"></a>

In [38]:
json = {
  "size": 0,
  "aggs": {
    "by_publication_year": {
      "terms": {
        "field": "publication_year"
      },
      "aggs": {
        "by_predatory": {
          "terms": {
            "field": "predatory_journal"
          }
        }
      }
    }
  }
}

requests.post(url, json=json, headers=headers).json()['aggregations']['by_publication_year']['buckets']

[{'key': 2020.0,
  'doc_count': 54302,
  'by_predatory': {'doc_count_error_upper_bound': 0,
   'sum_other_doc_count': 0,
   'buckets': [{'key': 0, 'key_as_string': 'false', 'doc_count': 54130},
    {'key': 1, 'key_as_string': 'true', 'doc_count': 172}]}},
 {'key': 2019.0,
  'doc_count': 45251,
  'by_predatory': {'doc_count_error_upper_bound': 0,
   'sum_other_doc_count': 0,
   'buckets': [{'key': 0, 'key_as_string': 'false', 'doc_count': 45071},
    {'key': 1, 'key_as_string': 'true', 'doc_count': 180}]}},
 {'key': 2018.0,
  'doc_count': 43823,
  'by_predatory': {'doc_count_error_upper_bound': 0,
   'sum_other_doc_count': 0,
   'buckets': [{'key': 0, 'key_as_string': 'false', 'doc_count': 43476},
    {'key': 1, 'key_as_string': 'true', 'doc_count': 347}]}},
 {'key': 2021.0,
  'doc_count': 10276,
  'by_predatory': {'doc_count_error_upper_bound': 0,
   'sum_other_doc_count': 0,
   'buckets': [{'key': 0, 'key_as_string': 'false', 'doc_count': 10230},
    {'key': 1, 'key_as_string': 'true'

### 1.3.5. Quelle est la répartition des licences utilisées chez les éditeurs/plateformes en santé ? <a class="anchor" id="licence"></a>


#### 1.3.5.1. Répartition des publications ouvertes en santé par licences utilisées chez les éditeurs/plateformes

In [39]:
filter = "*" # *, Elsevier BV, Springer Science and Business Media LLC, Wiley...
json = {
  "size": 0,
  "query": {
    "bool": {
      "filter": [
        { "term": { f"oa_details.{LATEST_OBSERVATION_DATE}.oa_host_type": "publisher" }}, # ATTENTION pas de .keyword ici pour avoir publisher et publisher;repository
        { "term": { "publication_year": 2020 }},
        { "wildcard": { "publisher.keyword": filter }}
      ]
    }
  },
  "aggs": {
    "by_licence": {
      "terms": {
        "field": f"oa_details.{LATEST_OBSERVATION_DATE}.licence_publisher.keyword",
        "missing": "N/A"
      }
    }
  }
}

requests.post(url, json=json, headers=headers).json()['aggregations']['by_licence']['buckets']

[{'key': 'cc-by', 'doc_count': 15874},
 {'key': 'no license', 'doc_count': 4191},
 {'key': 'cc-by-nc-nd', 'doc_count': 4064},
 {'key': 'cc-by-nc', 'doc_count': 2008},
 {'key': 'publisher-specific', 'doc_count': 177},
 {'key': 'implied-oa', 'doc_count': 75},
 {'key': 'cc-by-nc-sa', 'doc_count': 73},
 {'key': 'cc-by-nd', 'doc_count': 4},
 {'key': 'cc-by-sa', 'doc_count': 2}]

#### 1.3.5.2. Classement des 10 éditeurs/plateformes les plus importants (en nombre de publications en santé) selon le type de licences utilisées

In [40]:
json = {
  "size": 0,
  "query": {
    "bool": {
      "filter": [
        { "term": { f"oa_details.{LATEST_OBSERVATION_DATE}.oa_host_type": "publisher" }}, # ATTENTION pas de .keyword ici pour avoir publisher et publisher;repository
        { "term": { "publication_year": 2020 }}
      ]
    }
  },
  "aggs": {
    "by_publisher": {
      "terms": {
        "field": "publisher.keyword"
      },
      "aggs": {
        "by_licence": {
          "terms": {
            "field": f"oa_details.{LATEST_OBSERVATION_DATE}.licence_publisher.keyword",
            "missing": "N/A"
          }
        }
      }
    }
  }
}

requests.post(url, json=json, headers=headers).json()['aggregations']['by_publisher']['buckets']

[{'key': 'Springer Science and Business Media LLC',
  'doc_count': 5621,
  'by_licence': {'doc_count_error_upper_bound': 0,
   'sum_other_doc_count': 0,
   'buckets': [{'key': 'cc-by', 'doc_count': 4803},
    {'key': 'no license', 'doc_count': 671},
    {'key': 'cc-by-nc', 'doc_count': 133},
    {'key': 'cc-by-nc-nd', 'doc_count': 8},
    {'key': 'publisher-specific', 'doc_count': 4},
    {'key': 'cc-by-nc-sa', 'doc_count': 2}]}},
 {'key': 'MDPI AG',
  'doc_count': 4158,
  'by_licence': {'doc_count_error_upper_bound': 0,
   'sum_other_doc_count': 0,
   'buckets': [{'key': 'cc-by', 'doc_count': 4156},
    {'key': 'no license', 'doc_count': 2}]}},
 {'key': 'Elsevier BV',
  'doc_count': 3935,
  'by_licence': {'doc_count_error_upper_bound': 0,
   'sum_other_doc_count': 0,
   'buckets': [{'key': 'cc-by-nc-nd', 'doc_count': 2287},
    {'key': 'no license', 'doc_count': 1103},
    {'key': 'cc-by', 'doc_count': 537},
    {'key': 'implied-oa', 'doc_count': 7},
    {'key': 'cc-by-nc-sa', 'doc_co

### 1.3.6. Quels sont les coûts des publications chez les éditeurs/plateformes en santé ? <a class="anchor" id="apc"></a>


#### 1.3.6.1. Dépenses estimées pour la communauté scientifique, en APC pour la production francaise en santé

In [41]:
json = {
  "size": 0,
  "aggs": {
    "by_publication_year": {
      "terms": {
        "field": "publication_year"
      },
      "aggs": {
        "apc": {
          "sum": {
            "field": "amount_apc_EUR"
          }
        }
      }
    }
  }
}

requests.post(url, json=json, headers=headers).json()['aggregations']['by_publication_year']['buckets']

[{'key': 2020.0, 'doc_count': 54302, 'apc': {'value': 46912077.394807816}},
 {'key': 2019.0, 'doc_count': 45251, 'apc': {'value': 62282369.934425354}},
 {'key': 2018.0, 'doc_count': 43823, 'apc': {'value': 68827064.79177475}},
 {'key': 2021.0, 'doc_count': 10276, 'apc': {'value': 6182940.7159729}},
 {'key': 2017.0, 'doc_count': 1305, 'apc': {'value': 2109575.7909088135}},
 {'key': 2016.0, 'doc_count': 38, 'apc': {'value': 43556.57635498047}},
 {'key': 2015.0, 'doc_count': 3, 'apc': {'value': 3415.805908203125}},
 {'key': 2013.0, 'doc_count': 1, 'apc': {'value': 0.0}},
 {'key': 2014.0, 'doc_count': 1, 'apc': {'value': 0.0}}]

#### 1.3.6.2. Distribution des tarifs unitaires des frais de publication par article dans le domaine de la santé

In [42]:
filter = "*" # *, Elsevier BV, Springer Science and Business Media LLC, Wiley...
json = {
  "size": 0,
  "query": {
    "bool": {
      "filter": [
        { "wildcard": { "publisher.keyword": filter }}
      ]
    }
  },
  "aggs": {
    "tarif": {
      "histogram": {
        "field": "amount_apc_EUR",
        "interval": 250
      }
    }
  }
}


requests.post(url, json=json, headers=headers).json()['aggregations']['tarif']['buckets']

[{'key': 0.0, 'doc_count': 245},
 {'key': 250.0, 'doc_count': 723},
 {'key': 500.0, 'doc_count': 1501},
 {'key': 750.0, 'doc_count': 1455},
 {'key': 1000.0, 'doc_count': 2695},
 {'key': 1250.0, 'doc_count': 6611},
 {'key': 1500.0, 'doc_count': 13760},
 {'key': 1750.0, 'doc_count': 9455},
 {'key': 2000.0, 'doc_count': 9096},
 {'key': 2250.0, 'doc_count': 7067},
 {'key': 2500.0, 'doc_count': 6916},
 {'key': 2750.0, 'doc_count': 5446},
 {'key': 3000.0, 'doc_count': 4556},
 {'key': 3250.0, 'doc_count': 2766},
 {'key': 3500.0, 'doc_count': 2370},
 {'key': 3750.0, 'doc_count': 1492},
 {'key': 4000.0, 'doc_count': 1757},
 {'key': 4250.0, 'doc_count': 1588},
 {'key': 4500.0, 'doc_count': 642},
 {'key': 4750.0, 'doc_count': 509},
 {'key': 5000.0, 'doc_count': 341},
 {'key': 5250.0, 'doc_count': 283},
 {'key': 5500.0, 'doc_count': 56},
 {'key': 5750.0, 'doc_count': 4},
 {'key': 6000.0, 'doc_count': 2},
 {'key': 6250.0, 'doc_count': 0},
 {'key': 6500.0, 'doc_count': 1},
 {'key': 6750.0, 'doc_coun

#### 1.3.6.3. Distribution des tarifs unitaires des frais de publication par article, par année de publication, dans la  santé

violin : 1. one request for density

In [43]:
filter = "*" # *, Elsevier BV, Springer Science and Business Media LLC, Wiley...
json = {
  "size": 0,
  "query": {
    "bool": {
      "filter": [
        { "wildcard": { "publisher.keyword": filter }}
      ]
    }
  },
  "aggs": {
    "by_publication_year": {
      "terms": {
        "field": "publication_year"
      },
      "aggs": {
        "tarif": {
          "histogram": {
            "field": "amount_apc_EUR",
            "interval": 250
          }
        }
      }
    }
  }
}

requests.post(url, json=json, headers=headers).json()['aggregations']['by_publication_year']['buckets']

[{'key': 2020.0,
  'doc_count': 54302,
  'tarif': {'buckets': [{'key': 0.0, 'doc_count': 42},
    {'key': 250.0, 'doc_count': 159},
    {'key': 500.0, 'doc_count': 364},
    {'key': 750.0, 'doc_count': 414},
    {'key': 1000.0, 'doc_count': 658},
    {'key': 1250.0, 'doc_count': 1694},
    {'key': 1500.0, 'doc_count': 5061},
    {'key': 1750.0, 'doc_count': 3250},
    {'key': 2000.0, 'doc_count': 2610},
    {'key': 2250.0, 'doc_count': 1365},
    {'key': 2500.0, 'doc_count': 1125},
    {'key': 2750.0, 'doc_count': 1114},
    {'key': 3000.0, 'doc_count': 704},
    {'key': 3250.0, 'doc_count': 674},
    {'key': 3500.0, 'doc_count': 539},
    {'key': 3750.0, 'doc_count': 499},
    {'key': 4000.0, 'doc_count': 78},
    {'key': 4250.0, 'doc_count': 897},
    {'key': 4500.0, 'doc_count': 145},
    {'key': 4750.0, 'doc_count': 52},
    {'key': 5000.0, 'doc_count': 4},
    {'key': 5250.0, 'doc_count': 40},
    {'key': 5500.0, 'doc_count': 0},
    {'key': 5750.0, 'doc_count': 0},
    {'key': 60

violin : second request for percentiles

In [44]:
filter = "*" # *, Elsevier BV, Springer Science and Business Media LLC, Wiley...
json = {
  "size": 0,
  "query": {
    "bool": {
      "filter": [
        { "wildcard": { "publisher.keyword": filter }}
      ]
    }
  },
  "aggs": {
    "by_publication_year": {
      "terms": {
        "field": "publication_year"
      },
      "aggs": {
        "tarif_percentiles": {
          "percentiles": {
            "field": "amount_apc_EUR"
          }
        }
      }
    }
  }
}

requests.post(url, json=json, headers=headers).json()['aggregations']['by_publication_year']['buckets']

[{'key': 2020.0,
  'doc_count': 54302,
  'tarif_percentiles': {'values': {'1.0': 510.8770614746094,
    '5.0': 1038.800048828125,
    '25.0': 1623.1806565165289,
    '50.0': 1904.512995376615,
    '75.0': 2659.80976425974,
    '95.0': 4393.192173339844,
    '99.0': 4537.6806640625}}},
 {'key': 2019.0,
  'doc_count': 45251,
  'tarif_percentiles': {'values': {'1.0': 507.1207171122233,
    '5.0': 1016.8193654939497,
    '25.0': 1664.0688107512717,
    '50.0': 2154.255425721498,
    '75.0': 2799.4540843521404,
    '95.0': 4139.791015625,
    '99.0': 4938.76220703125}}},
 {'key': 2018.0,
  'doc_count': 43823,
  'tarif_percentiles': {'values': {'1.0': 398.63225753219047,
    '5.0': 982.2545824435167,
    '25.0': 1654.7897715871097,
    '50.0': 2337.501273092544,
    '75.0': 2987.9762242555566,
    '95.0': 4099.7890625,
    '99.0': 5147.755371093751}}},
 {'key': 2021.0,
  'doc_count': 10276,
  'tarif_percentiles': {'values': {'1.0': 500.0,
    '5.0': 1213.35498046875,
    '25.0': 1704.6048296

## 1.4. Les archives <a class="anchor" id="publications_archives"></a>

### 1.4.1. Quelle est la dynamique d’ouverture de la santé parmi les archives ? <a class="anchor" id="publications_archives_ouverture"></a>

In [53]:
# pour chaque observation year

# pour le calcul, il faut faire le ration (repository + "publisher;repository") / (doc_count)

json = {
  "size": 0,

 
      "aggs": {
        "by_publication_year": {
          "terms": {
            "field": "publication_year"
          },
          "aggs": {
            "by_is_oa": {
              "terms": {
                "field": f"oa_details.{LATEST_OBSERVATION_DATE}.oa_host_type.keyword"
              }
            }
          }
        }
      }
    }


requests.post(url, json=json, headers=headers).json()['aggregations']['by_publication_year']['buckets']

[{'key': 2020.0,
  'doc_count': 54302,
  'by_is_oa': {'doc_count_error_upper_bound': 0,
   'sum_other_doc_count': 0,
   'buckets': [{'key': 'publisher;repository', 'doc_count': 22412},
    {'key': 'closed', 'doc_count': 22034},
    {'key': 'repository', 'doc_count': 5800},
    {'key': 'publisher', 'doc_count': 4056}]}},
 {'key': 2019.0,
  'doc_count': 45251,
  'by_is_oa': {'doc_count_error_upper_bound': 0,
   'sum_other_doc_count': 0,
   'buckets': [{'key': 'publisher;repository', 'doc_count': 18949},
    {'key': 'closed', 'doc_count': 16474},
    {'key': 'repository', 'doc_count': 6624},
    {'key': 'publisher', 'doc_count': 3204}]}},
 {'key': 2018.0,
  'doc_count': 43823,
  'by_is_oa': {'doc_count_error_upper_bound': 0,
   'sum_other_doc_count': 0,
   'buckets': [{'key': 'publisher;repository', 'doc_count': 17597},
    {'key': 'closed', 'doc_count': 16461},
    {'key': 'repository', 'doc_count': 6045},
    {'key': 'publisher', 'doc_count': 3720}]}},
 {'key': 2021.0,
  'doc_count': 10

### 1.4.2. Quelles archives ouvertes sont les plus utilisées en santé ? <a class="anchor" id="publications_archives_utilisation"></a>

In [61]:
json = {
  "size": 0,
  "query": {
    "bool": {
      "filter": [{
        "term": {
          f"oa_details.{LATEST_OBSERVATION_DATE}.oa_host_type": "repository"
        }
      }]
    }
  },

      "aggs": {
        "by_repository": {
          "terms": {
            "field": f"oa_details.{LATEST_OBSERVATION_DATE}.repositories.keyword",
            "size": AGG_SIZE
          }
        }
      }
    }

requests.post(url, json=json, headers=headers).json()['aggregations']['by_repository']['buckets']

[{'key': 'www.ncbi.nlm.nih.gov', 'doc_count': 20281},
 {'key': 'HAL', 'doc_count': 7310},
 {'key': 'europepmc.org', 'doc_count': 6158},
 {'key': 'arxiv.org', 'doc_count': 1244},
 {'key': 'www.biorxiv.org', 'doc_count': 697},
 {'key': 'discovery.ucl.ac.uk', 'doc_count': 411},
 {'key': 'www.researchsquare.com', 'doc_count': 399},
 {'key': 'www.pure.ed.ac.uk', 'doc_count': 233},
 {'key': 'www.research.ed.ac.uk', 'doc_count': 229},
 {'key': 'escholarship.org', 'doc_count': 216},
 {'key': 'dipot.ulb.ac.be', 'doc_count': 204},
 {'key': 'www.repository.cam.ac.uk', 'doc_count': 195},
 {'key': 'www.zora.uzh.ch', 'doc_count': 195},
 {'key': 'helda.helsinki.fi', 'doc_count': 192},
 {'key': 'repub.eur.nl', 'doc_count': 192}]

### 1.4.3. Quelle est la dynamique de dépôt par archive ouverte en santé ? <a class="anchor" id="publications_archives_dynamique"></a>

In [62]:
json = {
  "size": 0,
      "query": {
    "bool": {
      "filter": [{
        "term": {
          f"oa_details.{LATEST_OBSERVATION_DATE}.oa_host_type": "repository"
        }
      }]
    }
  },
  "aggs": {
    "by_repository": {
      "terms": {
        "field": f"oa_details.{LATEST_OBSERVATION_DATE}.repositories.keyword",
        "missing": "N/A",
        "size": 12
      },
    
          "aggs": {
            "by_publication_year": {
              "terms": {
                "field": "publication_year"
              }
            }
          }
        }
      }
}

requests.post(url, json=json, headers=headers).json()['aggregations']['by_repository']['buckets']

[{'key': 'www.ncbi.nlm.nih.gov',
  'doc_count': 21048,
  'by_publication_year': {'doc_count_error_upper_bound': 0,
   'sum_other_doc_count': 0,
   'buckets': [{'key': 2020.0, 'doc_count': 18279},
    {'key': 2021.0, 'doc_count': 2557},
    {'key': 2019.0, 'doc_count': 194},
    {'key': 2018.0, 'doc_count': 13},
    {'key': 2016.0, 'doc_count': 4},
    {'key': 2014.0, 'doc_count': 1}]}},
 {'key': 'HAL',
  'doc_count': 7726,
  'by_publication_year': {'doc_count_error_upper_bound': 0,
   'sum_other_doc_count': 0,
   'buckets': [{'key': 2020.0, 'doc_count': 7169},
    {'key': 2021.0, 'doc_count': 500},
    {'key': 2019.0, 'doc_count': 50},
    {'key': 2018.0, 'doc_count': 6},
    {'key': 2014.0, 'doc_count': 1}]}},
 {'key': 'europepmc.org',
  'doc_count': 6852,
  'by_publication_year': {'doc_count_error_upper_bound': 0,
   'sum_other_doc_count': 0,
   'buckets': [{'key': 2020.0, 'doc_count': 6707},
    {'key': 2019.0, 'doc_count': 130},
    {'key': 2018.0, 'doc_count': 11},
    {'key': 201

### 1.4.4. Quelle place occupe HAL dans la dynamique des archives ouvertes en santé ? <a class="anchor" id="publications_archives_hal"></a>

In [63]:
json = {
  "size": 0,
  "query": {
    "bool": {
      "filter": [
        { "term": { f"oa_details.{LATEST_OBSERVATION_DATE}.oa_host_type": "repository" }}
      ]
    }
  },
  "aggs": {
    "by_discipline": {
      "terms": {
        "field": f"oa_details.{LATEST_OBSERVATION_DATE}.repositories.keyword",
        "missing": "N/A"
      }
    }
  }
}

requests.post(url, json=json, headers=headers).json()['aggregations']['by_discipline']

{'doc_count_error_upper_bound': 0,
 'sum_other_doc_count': 13783,
 'buckets': [{'key': 'www.ncbi.nlm.nih.gov', 'doc_count': 24948},
  {'key': 'europepmc.org', 'doc_count': 10340},
  {'key': 'HAL', 'doc_count': 9953},
  {'key': 'arxiv.org', 'doc_count': 1521},
  {'key': 'www.biorxiv.org', 'doc_count': 844},
  {'key': 'discovery.ucl.ac.uk', 'doc_count': 532},
  {'key': 'www.researchsquare.com', 'doc_count': 456},
  {'key': 'www.pure.ed.ac.uk', 'doc_count': 326},
  {'key': 'www.research.ed.ac.uk', 'doc_count': 320},
  {'key': 'escholarship.org', 'doc_count': 290}]}

_Idée : Should post process to group / sum all repositories other than 'HAL'_

## 1.5. Les affiliations <a class="anchor" id="publications_affiliations"></a>

### 1.5.1. Taux d’ouverture des publications françaises, dans le domaine de la santé, par millésime tous types d’établissements confondus <a class="anchor" id="publications_affiliations_ouverture"></a>

In [65]:
filter = "*" # *, university, hospital, cnrs, inserm
json = {
  "size": 0,
  "query": {
    "bool": {
      "filter": [
        { "wildcard": { "french_affiliations_types": filter }}
      ]
    }
  },
      "aggs": {
        "by_publication_year": {
          "terms": {
            "field": "publication_year"
          },
          "aggs": {
            "by_is_oa": {
              "terms": {
                "field": f"oa_details.{LATEST_OBSERVATION_DATE}.is_oa"
              }
            }
          }
        }
      }
    }

requests.post(url, json=json, headers=headers).json()['aggregations']['by_publication_year']['buckets']

[{'key': 2020.0,
  'doc_count': 48869,
  'by_is_oa': {'doc_count_error_upper_bound': 0,
   'sum_other_doc_count': 0,
   'buckets': [{'key': 1, 'key_as_string': 'true', 'doc_count': 28832},
    {'key': 0, 'key_as_string': 'false', 'doc_count': 20037}]}},
 {'key': 2019.0,
  'doc_count': 30416,
  'by_is_oa': {'doc_count_error_upper_bound': 0,
   'sum_other_doc_count': 0,
   'buckets': [{'key': 1, 'key_as_string': 'true', 'doc_count': 19377},
    {'key': 0, 'key_as_string': 'false', 'doc_count': 11039}]}},
 {'key': 2021.0,
  'doc_count': 9236,
  'by_is_oa': {'doc_count_error_upper_bound': 0,
   'sum_other_doc_count': 0,
   'buckets': [{'key': 1, 'key_as_string': 'true', 'doc_count': 4860},
    {'key': 0, 'key_as_string': 'false', 'doc_count': 4121}]}},
 {'key': 2018.0,
  'doc_count': 231,
  'by_is_oa': {'doc_count_error_upper_bound': 0,
   'sum_other_doc_count': 0,
   'buckets': [{'key': 1, 'key_as_string': 'true', 'doc_count': 185},
    {'key': 0, 'key_as_string': 'false', 'doc_count': 46

### 1.5.2. Evolution du taux d’ouverture des publications en santé par types d’établissement entre millésimes <a class="anchor" id="publications_affiliations_evolution"></a>

_Warning: Les données ne sont pas encore prêtes._

### 1.5.3. Quel impact le pays d’affiliation des auteurs a-t-il sur le taux d’ouverture en santé ? <a class="anchor" id="publications_affiliations_impact"></a>

In [66]:
json = {
  "size": 0,
  "aggs": {
    "by_publication_year": {
      "terms": {
        "field": "publication_year"
      },
      "aggs": {
        "by_author_useful_rank_fr": {
          "terms": {
            "field": "author_useful_rank_fr"
          },
          "aggs": {
            "by_is_oa": {
              "terms": {
                "field": f"oa_details.{LATEST_OBSERVATION_DATE}.is_oa"
              }
            }
          }
        }
      }
    }
  }
}

requests.post(url, json=json, headers=headers).json()['aggregations']['by_publication_year']['buckets']

[{'key': 2020.0,
  'doc_count': 54229,
  'by_author_useful_rank_fr': {'doc_count_error_upper_bound': 0,
   'sum_other_doc_count': 0,
   'buckets': [{'key': 1,
     'key_as_string': 'true',
     'doc_count': 38431,
     'by_is_oa': {'doc_count_error_upper_bound': 0,
      'sum_other_doc_count': 0,
      'buckets': [{'key': 1, 'key_as_string': 'true', 'doc_count': 21391},
       {'key': 0, 'key_as_string': 'false', 'doc_count': 17040}]}},
    {'key': 0,
     'key_as_string': 'false',
     'doc_count': 15798,
     'by_is_oa': {'doc_count_error_upper_bound': 0,
      'sum_other_doc_count': 0,
      'buckets': [{'key': 1, 'key_as_string': 'true', 'doc_count': 10838},
       {'key': 0, 'key_as_string': 'false', 'doc_count': 4960}]}}]}},
 {'key': 2019.0,
  'doc_count': 34328,
  'by_author_useful_rank_fr': {'doc_count_error_upper_bound': 0,
   'sum_other_doc_count': 0,
   'buckets': [{'key': 1,
     'key_as_string': 'true',
     'doc_count': 24324,
     'by_is_oa': {'doc_count_error_upper_boun

### 1.5.4. Classement des 10 pays d'affiliation des auteurs de rang utile avec lesquels la France collabore le plus, selon le taux d’accès ouvert de leurs publications en santé <a class="anchor" id="publications_affiliations_classement"></a>

In [67]:
json = {
  "size": 0,
  "query": {
    "bool": {
      "filter": [
        { "term": { "author_useful_rank_fr": "true" }}
      ]
    }
  },
  "aggs": {
    "by_country": {
      "terms": {
        "field": "affiliations.countries.keyword",
        "exclude": "FR"
      },
      "aggs": {
        "by_is_oa": {
          "terms": {
            "field": f"oa_details.{LATEST_OBSERVATION_DATE}.is_oa"
          }
        }
      }
    }
  }
}

requests.post(url, json=json, headers=headers).json()['aggregations']['by_country']['buckets']

[{'key': 'US',
  'doc_count': 10099,
  'by_is_oa': {'doc_count_error_upper_bound': 0,
   'sum_other_doc_count': 0,
   'buckets': [{'key': 1, 'key_as_string': 'true', 'doc_count': 6557},
    {'key': 0, 'key_as_string': 'false', 'doc_count': 3510}]}},
 {'key': 'GB',
  'doc_count': 6119,
  'by_is_oa': {'doc_count_error_upper_bound': 0,
   'sum_other_doc_count': 0,
   'buckets': [{'key': 1, 'key_as_string': 'true', 'doc_count': 4392},
    {'key': 0, 'key_as_string': 'false', 'doc_count': 1709}]}},
 {'key': 'DE',
  'doc_count': 4288,
  'by_is_oa': {'doc_count_error_upper_bound': 0,
   'sum_other_doc_count': 0,
   'buckets': [{'key': 1, 'key_as_string': 'true', 'doc_count': 3051},
    {'key': 0, 'key_as_string': 'false', 'doc_count': 1227}]}},
 {'key': 'IT',
  'doc_count': 4171,
  'by_is_oa': {'doc_count_error_upper_bound': 0,
   'sum_other_doc_count': 0,
   'buckets': [{'key': 1, 'key_as_string': 'true', 'doc_count': 2593},
    {'key': 0, 'key_as_string': 'false', 'doc_count': 1569}]}},
 {'