In [1]:
import requests

In [2]:
url = "https://cluster.elasticsearch.dataesr.ovh/bso-clinical-trials/_search"
headers = { "Authorization": open("../../.env_es_bsso", "r").read().strip() }
AGG_SIZE = 15

### Sommaire

* [2. Essais cliniques](#essais)
    * [2.1. Général](#essais_general)
        * [2.1.1. Quelle est la dynamique dd'ouverture des essais ?](#dynamique_ouverture)
        * [2.1.2. Quelles sont les trajectoires des essais ?](#trajectoires)
    * [2.2. Caractéristiques](#carac)
        * [2.2.1. Quand a eu lieu la déclaration ?](#declaration)
        * [2.2.2. Combien de temps durent les essais ?](#temps)
        * [2.2.3. Combien de patients ?](#patients)
        * [2.2.4. Quel types d'essais ?](#types)
    * [2.3. Promoteurs](#promoteurs)
        * [2.3.1. Quelle est la dynamique dd'ouverture des essais par promoteur ?](#dynamique_promoteur)
        * [2.3.2. Impact pays ?](#pays)
    * [2.4 Résultats](#resultats)
        * [2.4.1. Type de diffusion ?](#diffusion)
        * [2.4.2. Plan de partage ?](#ipd)
        * [2.4.3. Quel délai de diffusion ?](#diffusion_delai)
        * [2.4.4. Publications?](#publication)

# 2. Essais cliniques <a class="anchor" id="essais"></a>

## 2.1. Général <a class="anchor" id="essai_general"></a>

### 2.1.1. Quelle est la dynamique d’ouverture des essais ? <a class="anchor" id="dynamique_ouverture"></a>

In [49]:

json = {
  "size": 0,
  "query": {
    "bool": {
      "filter": [{
        "term": {
          "study_type.keyword": "Interventional"
        }
      }]
    }
  },
  "aggs": {
    "by_year": {
      "terms": {
        "field": "study_start_year"
      },
      "aggs": {
        "by_has_result": {
          "terms": {
            "field": "has_results_or_publications" 
          },
            "aggs": {
            "by_sponsor_type": {
              "terms": {
                "field": "lead_sponsor_type.keyword" 
              }
            }
          }
        }
      }
    }
  }
}

requests.post(url, json=json, headers=headers).json()['aggregations']['by_year']['buckets']

[{'key': 2019.0,
  'doc_count': 1566,
  'by_has_result': {'doc_count_error_upper_bound': 0,
   'sum_other_doc_count': 0,
   'buckets': [{'key': 0,
     'key_as_string': 'false',
     'doc_count': 1543,
     'by_sponsor_type': {'doc_count_error_upper_bound': 0,
      'sum_other_doc_count': 0,
      'buckets': [{'key': 'academique', 'doc_count': 841},
       {'key': 'industriel', 'doc_count': 702}]}},
    {'key': 1,
     'key_as_string': 'true',
     'doc_count': 23,
     'by_sponsor_type': {'doc_count_error_upper_bound': 0,
      'sum_other_doc_count': 0,
      'buckets': [{'key': 'industriel', 'doc_count': 22},
       {'key': 'academique', 'doc_count': 1}]}}]}},
 {'key': 2018.0,
  'doc_count': 1512,
  'by_has_result': {'doc_count_error_upper_bound': 0,
   'sum_other_doc_count': 0,
   'buckets': [{'key': 0,
     'key_as_string': 'false',
     'doc_count': 1425,
     'by_sponsor_type': {'doc_count_error_upper_bound': 0,
      'sum_other_doc_count': 0,
      'buckets': [{'key': 'academiqu

### 2.1.2. Quelles sont les trajectoires? <a class="anchor" id="trajectoire"></a>

In [13]:
# manque l'aggreg has_publications_result et has_publication_oa
json = {
  "size": 0,
  "query": {
    "bool": {
      "filter": [{
        "term": {
          "study_type.keyword": "Interventional"
        }
      }]
    }
  },
  "aggs": {
    "by_status": {
      "terms": {
        "field": "status.keyword"
      },
      "aggs": {
        "by_has_results": {
          "terms": {
            "field": "has_results",
            "missing": False
          },
      "aggs": {
        "by_has_publications_result": {
          "terms": {
            "field": "has_publications_result",
            "missing": False
          },
      "aggs": {
        "by_has_publication_oa": {
          "terms": {
            "field": "has_publication_oa",
            "missing": False
          }
        }
      }
        }
      }
        }
      }
    }
  }
}

requests.post(url, json=json, headers=headers).json()#['aggregations']['by_status']['buckets']

{'took': 73,
 'timed_out': False,
 '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0},
 'hits': {'total': {'value': 10000, 'relation': 'gte'},
  'max_score': None,
  'hits': []},
 'aggregations': {'by_status': {'doc_count_error_upper_bound': 0,
   'sum_other_doc_count': 133,
   'buckets': [{'key': 'Completed',
     'doc_count': 10884,
     'by_has_results': {'doc_count_error_upper_bound': 0,
      'sum_other_doc_count': 0,
      'buckets': [{'key': 0,
        'key_as_string': 'false',
        'doc_count': 7239,
        'by_has_publications_result': {'doc_count_error_upper_bound': 0,
         'sum_other_doc_count': 0,
         'buckets': [{'key': 0,
           'key_as_string': 'false',
           'doc_count': 7239,
           'by_has_publication_oa': {'doc_count_error_upper_bound': 0,
            'sum_other_doc_count': 0,
            'buckets': [{'key': 0,
              'key_as_string': 'false',
              'doc_count': 7113},
             {'key': 1, 'key_as_string': 

## 2.2. Caractéristiques <a class="anchor" id="carac"></a>

### 2.2.1. Quand a eu lieu la déclaration? <a class="anchor" id="declaration"></a>

Barres

In [14]:

json = {
  "size": 0,
  "query": {
    "bool": {
      "filter": [{
        "term": {
          "study_type.keyword": "Interventional"
        }
      }]
    }
  },
  "aggs": {
    "by_year": {
      "terms": {
        "field": "study_start_year"
      },
      "aggs": {
        "by_submission_temporality": {
          "terms": {
            "field": "submission_temporality.keyword" 
          }
        }
      }
    }
  }
}

requests.post(url, json=json, headers=headers).json()['aggregations']['by_year']['buckets']

[{'key': 2019.0,
  'doc_count': 1566,
  'by_submission_temporality': {'doc_count_error_upper_bound': 0,
   'sum_other_doc_count': 0,
   'buckets': [{'key': 'before_start', 'doc_count': 1315},
    {'key': 'during_study', 'doc_count': 236},
    {'key': 'after_completion', 'doc_count': 15}]}},
 {'key': 2018.0,
  'doc_count': 1512,
  'by_submission_temporality': {'doc_count_error_upper_bound': 0,
   'sum_other_doc_count': 0,
   'buckets': [{'key': 'before_start', 'doc_count': 1201},
    {'key': 'during_study', 'doc_count': 288},
    {'key': 'after_completion', 'doc_count': 23}]}},
 {'key': 2015.0,
  'doc_count': 1470,
  'by_submission_temporality': {'doc_count_error_upper_bound': 0,
   'sum_other_doc_count': 0,
   'buckets': [{'key': 'before_start', 'doc_count': 890},
    {'key': 'during_study', 'doc_count': 520},
    {'key': 'after_completion', 'doc_count': 38}]}},
 {'key': 2020.0,
  'doc_count': 1443,
  'by_submission_temporality': {'doc_count_error_upper_bound': 0,
   'sum_other_doc_cou

histo

In [19]:

json = {
  "size": 0,
  "query": {
    "bool": {
      "filter": [{
        "term": {
          "study_type.keyword": "Interventional"
        }
      }]
    }
  },
  "aggs": {
    "delay_submission_start": {
      "histogram": {
        "field": "delay_submission_start",
        "interval": 30.5
      }
    }
  }
}

requests.post(url, json=json, headers=headers).json()['aggregations']['delay_submission_start']['buckets']

[{'key': -7503.0, 'doc_count': 2},
 {'key': -7472.5, 'doc_count': 0},
 {'key': -7442.0, 'doc_count': 0},
 {'key': -7411.5, 'doc_count': 0},
 {'key': -7381.0, 'doc_count': 0},
 {'key': -7350.5, 'doc_count': 0},
 {'key': -7320.0, 'doc_count': 0},
 {'key': -7289.5, 'doc_count': 0},
 {'key': -7259.0, 'doc_count': 0},
 {'key': -7228.5, 'doc_count': 0},
 {'key': -7198.0, 'doc_count': 0},
 {'key': -7167.5, 'doc_count': 0},
 {'key': -7137.0, 'doc_count': 0},
 {'key': -7106.5, 'doc_count': 0},
 {'key': -7076.0, 'doc_count': 0},
 {'key': -7045.5, 'doc_count': 0},
 {'key': -7015.0, 'doc_count': 0},
 {'key': -6984.5, 'doc_count': 0},
 {'key': -6954.0, 'doc_count': 0},
 {'key': -6923.5, 'doc_count': 0},
 {'key': -6893.0, 'doc_count': 0},
 {'key': -6862.5, 'doc_count': 1},
 {'key': -6832.0, 'doc_count': 0},
 {'key': -6801.5, 'doc_count': 0},
 {'key': -6771.0, 'doc_count': 0},
 {'key': -6740.5, 'doc_count': 0},
 {'key': -6710.0, 'doc_count': 0},
 {'key': -6679.5, 'doc_count': 0},
 {'key': -6649.0, 'd

distribution - requete 1

In [23]:

json = {
  "size": 0,
  "query": {
    "bool": {
      "filter": [{
        "term": {
          "study_type.keyword": "Interventional"
        }
      }]
    }
  },
  "aggs": {
      "by_year": {
      "terms": {
        "field": "study_start_year"
      },
    "aggs": {
    "delay_submission_start": {
      "histogram": {
        "field": "delay_submission_start",
        "interval": 30.5
      }
    }
  }
}
}
}

requests.post(url, json=json, headers=headers).json()['aggregations']['by_year']['buckets']

[{'key': 2019.0,
  'doc_count': 1566,
  'delay_submission_start': {'buckets': [{'key': -793.0, 'doc_count': 1},
    {'key': -762.5, 'doc_count': 0},
    {'key': -732.0, 'doc_count': 1},
    {'key': -701.5, 'doc_count': 1},
    {'key': -671.0, 'doc_count': 1},
    {'key': -640.5, 'doc_count': 0},
    {'key': -610.0, 'doc_count': 2},
    {'key': -579.5, 'doc_count': 1},
    {'key': -549.0, 'doc_count': 1},
    {'key': -518.5, 'doc_count': 1},
    {'key': -488.0, 'doc_count': 2},
    {'key': -457.5, 'doc_count': 3},
    {'key': -427.0, 'doc_count': 4},
    {'key': -396.5, 'doc_count': 0},
    {'key': -366.0, 'doc_count': 2},
    {'key': -335.5, 'doc_count': 8},
    {'key': -305.0, 'doc_count': 7},
    {'key': -274.5, 'doc_count': 6},
    {'key': -244.0, 'doc_count': 6},
    {'key': -213.5, 'doc_count': 11},
    {'key': -183.0, 'doc_count': 11},
    {'key': -152.5, 'doc_count': 11},
    {'key': -122.0, 'doc_count': 16},
    {'key': -91.5, 'doc_count': 19},
    {'key': -61.0, 'doc_count': 3

distribution - requete 2

In [24]:

json = {
  "size": 0,
  "query": {
    "bool": {
      "filter": [{
        "term": {
          "study_type.keyword": "Interventional"
        }
      }]
    }
  },
  "aggs": {
      "by_year": {
      "terms": {
        "field": "study_start_year"
      },
    "aggs": {
    "delay_submission_start_perc": {
      "percentiles": {
        "field": "delay_submission_start"
      }
    }
  }
}
}
}

requests.post(url, json=json, headers=headers).json()['aggregations']['by_year']['buckets']

[{'key': 2019.0,
  'doc_count': 1566,
  'delay_submission_start_perc': {'values': {'1.0': -414.59999999999997,
    '5.0': -133.99999999999994,
    '25.0': 21.0,
    '50.0': 77.81818181818181,
    '75.0': 172.28571428571428,
    '95.0': 492.39999999999964,
    '99.0': 912.3199999999981}}},
 {'key': 2018.0,
  'doc_count': 1512,
  'delay_submission_start_perc': {'values': {'1.0': -629.1199999999999,
    '5.0': -203.0,
    '25.0': 8.0,
    '50.0': 61.2,
    '75.0': 143.94285714285715,
    '95.0': 428.89999999999986,
    '99.0': 776.7599999999998}}},
 {'key': 2015.0,
  'doc_count': 1470,
  'delay_submission_start_perc': {'values': {'1.0': -1012.8,
    '5.0': -506.0,
    '25.0': -35.956043956043956,
    '50.0': 22.31578947368421,
    '75.0': 96.2909090909091,
    '95.0': 265.0,
    '99.0': 465.59999999999945}}},
 {'key': 2020.0,
  'doc_count': 1443,
  'delay_submission_start_perc': {'values': {'1.0': -223.20999999999998,
    '5.0': -83.0625,
    '25.0': 13.528070175438595,
    '50.0': 69.351

### 2.2.2. Combien de temps durent les essais? <a class="anchor" id="temps"></a>

In [27]:

json = {
  "size": 0,
  "query": {
    "bool": {
      "filter": [{
        "term": {
          "study_type.keyword": "Interventional"
        }
      },
      {
        "term": {
          "status.keyword": "Completed"
        }
      }]
    }
  },
  "aggs": {
    "delay_start_completion": {
      "histogram": {
        "field": "delay_start_completion",
        "interval": 365
      }
    }
  }
}

requests.post(url, json=json, headers=headers).json()['aggregations']['delay_start_completion']['buckets']

[{'key': -365.0, 'doc_count': 1},
 {'key': 0.0, 'doc_count': 1308},
 {'key': 365.0, 'doc_count': 2010},
 {'key': 730.0, 'doc_count': 1971},
 {'key': 1095.0, 'doc_count': 1640},
 {'key': 1460.0, 'doc_count': 1073},
 {'key': 1825.0, 'doc_count': 662},
 {'key': 2190.0, 'doc_count': 433},
 {'key': 2555.0, 'doc_count': 276},
 {'key': 2920.0, 'doc_count': 167},
 {'key': 3285.0, 'doc_count': 98},
 {'key': 3650.0, 'doc_count': 53},
 {'key': 4015.0, 'doc_count': 32},
 {'key': 4380.0, 'doc_count': 22},
 {'key': 4745.0, 'doc_count': 23},
 {'key': 5110.0, 'doc_count': 6},
 {'key': 5475.0, 'doc_count': 6},
 {'key': 5840.0, 'doc_count': 2},
 {'key': 6205.0, 'doc_count': 2},
 {'key': 6570.0, 'doc_count': 4},
 {'key': 6935.0, 'doc_count': 0},
 {'key': 7300.0, 'doc_count': 0},
 {'key': 7665.0, 'doc_count': 0},
 {'key': 8030.0, 'doc_count': 1}]

### 2.2.3. Combien de patients? <a class="anchor" id="patients"></a>

In [35]:

json = {
  "size": 0,
  "query": {
    "bool": {
      "filter": [{
        "term": {
          "study_type.keyword": "Interventional"
        }
      }]
    }
  },
  "aggs": {
    "enrollment": {
      "histogram": {
        "field": "enrollment_count",
        "interval": 50
      }
    }
  }
}

requests.post(url, json=json, headers=headers).json()['aggregations']['enrollment']['buckets']

[{'key': 0.0, 'doc_count': 5776},
 {'key': 50.0, 'doc_count': 3776},
 {'key': 100.0, 'doc_count': 2420},
 {'key': 150.0, 'doc_count': 1362},
 {'key': 200.0, 'doc_count': 1165},
 {'key': 250.0, 'doc_count': 703},
 {'key': 300.0, 'doc_count': 741},
 {'key': 350.0, 'doc_count': 444},
 {'key': 400.0, 'doc_count': 483},
 {'key': 450.0, 'doc_count': 378},
 {'key': 500.0, 'doc_count': 360},
 {'key': 550.0, 'doc_count': 236},
 {'key': 600.0, 'doc_count': 293},
 {'key': 650.0, 'doc_count': 176},
 {'key': 700.0, 'doc_count': 201},
 {'key': 750.0, 'doc_count': 162},
 {'key': 800.0, 'doc_count': 185},
 {'key': 850.0, 'doc_count': 111},
 {'key': 900.0, 'doc_count': 124},
 {'key': 950.0, 'doc_count': 89},
 {'key': 1000.0, 'doc_count': 147},
 {'key': 1050.0, 'doc_count': 70},
 {'key': 1100.0, 'doc_count': 49},
 {'key': 1150.0, 'doc_count': 49},
 {'key': 1200.0, 'doc_count': 82},
 {'key': 1250.0, 'doc_count': 51},
 {'key': 1300.0, 'doc_count': 31},
 {'key': 1350.0, 'doc_count': 25},
 {'key': 1400.0, '

graphe 2

In [36]:

json = {
  "size": 0,
  "query": {
    "bool": {
      "filter": [{
        "term": {
          "study_type.keyword": "Interventional"
        }
      }]
    }
  },
  "aggs": {
    "by_year": {
      "terms": {
        "field": "study_start_year"
      },
      "aggs": {
        "by_design_allocation": {
          "terms": {
            "field": "design_allocation.keyword" ,
              "missing": "N/A"
          }
        }
      }
    }
  }
}

requests.post(url, json=json, headers=headers).json()['aggregations']['by_year']['buckets']

[{'key': 2019.0,
  'doc_count': 1566,
  'by_design_allocation': {'doc_count_error_upper_bound': 0,
   'sum_other_doc_count': 0,
   'buckets': [{'key': 'Randomized', 'doc_count': 823},
    {'key': 'N/A', 'doc_count': 521},
    {'key': 'Non-Randomized', 'doc_count': 222}]}},
 {'key': 2018.0,
  'doc_count': 1512,
  'by_design_allocation': {'doc_count_error_upper_bound': 0,
   'sum_other_doc_count': 0,
   'buckets': [{'key': 'Randomized', 'doc_count': 795},
    {'key': 'N/A', 'doc_count': 511},
    {'key': 'Non-Randomized', 'doc_count': 206}]}},
 {'key': 2015.0,
  'doc_count': 1470,
  'by_design_allocation': {'doc_count_error_upper_bound': 0,
   'sum_other_doc_count': 0,
   'buckets': [{'key': 'Randomized', 'doc_count': 766},
    {'key': 'N/A', 'doc_count': 498},
    {'key': 'Non-Randomized', 'doc_count': 206}]}},
 {'key': 2020.0,
  'doc_count': 1443,
  'by_design_allocation': {'doc_count_error_upper_bound': 0,
   'sum_other_doc_count': 0,
   'buckets': [{'key': 'Randomized', 'doc_count': 

### 2.2.4. Types d'essais? <a class="anchor" id="types"></a>

In [37]:

json = {
  "size": 0,
  "query": {
    "bool": {
      "filter": [{
        "term": {
          "study_type.keyword": "Interventional"
        }
      }]
    }
  },
  "aggs": {
    "by_year": {
      "terms": {
        "field": "study_start_year"
      },
      "aggs": {
        "by_intervention_type": {
          "terms": {
            "field": "intervention_type.keyword" ,
              "missing": "N/A"
          }
        }
      }
    }
  }
}

requests.post(url, json=json, headers=headers).json()['aggregations']['by_year']['buckets']

[{'key': 2019.0,
  'doc_count': 1566,
  'by_intervention_type': {'doc_count_error_upper_bound': 0,
   'sum_other_doc_count': 0,
   'buckets': [{'key': 'N/A', 'doc_count': 1334},
    {'key': 'Other', 'doc_count': 124},
    {'key': 'Biological', 'doc_count': 51},
    {'key': 'Radiation', 'doc_count': 18},
    {'key': 'Device', 'doc_count': 17},
    {'key': 'Drug', 'doc_count': 9},
    {'key': 'Combination Product', 'doc_count': 7},
    {'key': 'Behavioral', 'doc_count': 2},
    {'key': 'Genetic', 'doc_count': 2},
    {'key': 'Procedure', 'doc_count': 2}]}},
 {'key': 2018.0,
  'doc_count': 1512,
  'by_intervention_type': {'doc_count_error_upper_bound': 0,
   'sum_other_doc_count': 0,
   'buckets': [{'key': 'N/A', 'doc_count': 1296},
    {'key': 'Other', 'doc_count': 114},
    {'key': 'Biological', 'doc_count': 37},
    {'key': 'Device', 'doc_count': 22},
    {'key': 'Radiation', 'doc_count': 19},
    {'key': 'Drug', 'doc_count': 10},
    {'key': 'Combination Product', 'doc_count': 8},
   

## 2.3. Promoteurs <a class="anchor" id="promoteurs"></a>

### 2.3.1. Dynamique par promoteur ? <a class="anchor" id="dynamique_promoteur"></a>

In [51]:
promot = "*"

json = {
  "size": 0,
  "query": {
    "bool": {
      "filter": [{
        "term": {
          "study_type.keyword": "Interventional"
        }
      },
      { "wildcard": { "lead_sponsor.keyword": promot }}]
    }
  },
  "aggs": {
    "by_year": {
      "terms": {
        "field": "study_start_year"
      },
      "aggs": {
        "by_has_result": {
          "terms": {
            "field": "has_results_or_publications" 
          },
            "aggs": {
            "by_sponsor_type": {
              "terms": {
                "field": "lead_sponsor_type.keyword" 
              }
            }
          }
        }
      }
    }
  }
}

requests.post(url, json=json, headers=headers).json()['aggregations']['by_year']['buckets']

[{'key': 2019.0,
  'doc_count': 1566,
  'by_has_result': {'doc_count_error_upper_bound': 0,
   'sum_other_doc_count': 0,
   'buckets': [{'key': 0,
     'key_as_string': 'false',
     'doc_count': 1543,
     'by_sponsor_type': {'doc_count_error_upper_bound': 0,
      'sum_other_doc_count': 0,
      'buckets': [{'key': 'academique', 'doc_count': 841},
       {'key': 'industriel', 'doc_count': 702}]}},
    {'key': 1,
     'key_as_string': 'true',
     'doc_count': 23,
     'by_sponsor_type': {'doc_count_error_upper_bound': 0,
      'sum_other_doc_count': 0,
      'buckets': [{'key': 'industriel', 'doc_count': 22},
       {'key': 'academique', 'doc_count': 1}]}}]}},
 {'key': 2018.0,
  'doc_count': 1512,
  'by_has_result': {'doc_count_error_upper_bound': 0,
   'sum_other_doc_count': 0,
   'buckets': [{'key': 0,
     'key_as_string': 'false',
     'doc_count': 1425,
     'by_sponsor_type': {'doc_count_error_upper_bound': 0,
      'sum_other_doc_count': 0,
      'buckets': [{'key': 'academiqu

In [56]:
#pour avoir la liste des top promoteurs

json = {
  "size": 0,
  "query": {
    "bool": {
      "filter": [{
        "term": {
          "study_type.keyword": "Interventional"
        }
      }]
    }
  },
  "aggs": {
    "by_type": {
      "terms": {
        "field": "lead_sponsor_type.keyword"
      },
        "aggs": {
    "by_sponsor": {
      "terms": {
        "field": "lead_sponsor.keyword"
      }
    }
  }
    }
  }
}

requests.post(url, json=json, headers=headers).json()['aggregations']['by_type']['buckets']

[{'key': 'industriel',
  'doc_count': 13197,
  'by_sponsor': {'doc_count_error_upper_bound': 0,
   'sum_other_doc_count': 10127,
   'buckets': [{'key': 'Novartis Pharmaceuticals', 'doc_count': 515},
    {'key': 'Hoffmann-La Roche', 'doc_count': 508},
    {'key': 'Sanofi', 'doc_count': 313},
    {'key': 'Bristol-Myers Squibb', 'doc_count': 282},
    {'key': 'GlaxoSmithKline', 'doc_count': 270},
    {'key': 'Eli Lilly and Company', 'doc_count': 268},
    {'key': 'Pfizer', 'doc_count': 264},
    {'key': 'AstraZeneca', 'doc_count': 259},
    {'key': 'Boehringer Ingelheim', 'doc_count': 199},
    {'key': 'Bayer', 'doc_count': 192}]}},
 {'key': 'academique',
  'doc_count': 10754,
  'by_sponsor': {'doc_count_error_upper_bound': 0,
   'sum_other_doc_count': 5761,
   'buckets': [{'key': 'Assistance Publique - Hôpitaux de Paris',
     'doc_count': 1398},
    {'key': 'Hospices Civils de Lyon', 'doc_count': 724},
    {'key': 'Assistance Publique Hopitaux De Marseille', 'doc_count': 434},
    {'key

In [57]:
promot = "Novartis Pharmaceuticals"

json = {
  "size": 0,
  "query": {
    "bool": {
      "filter": [{
        "term": {
          "study_type.keyword": "Interventional"
        }
      },
      { "wildcard": { "lead_sponsor.keyword": promot }}]
    }
  },
  "aggs": {
    "by_year": {
      "terms": {
        "field": "study_start_year"
      }
    }
  }
}

requests.post(url, json=json, headers=headers).json()['aggregations']['by_year']['buckets']

[{'key': 2012.0, 'doc_count': 49},
 {'key': 2013.0, 'doc_count': 45},
 {'key': 2009.0, 'doc_count': 40},
 {'key': 2010.0, 'doc_count': 40},
 {'key': 2015.0, 'doc_count': 37},
 {'key': 2011.0, 'doc_count': 32},
 {'key': 2014.0, 'doc_count': 32},
 {'key': 2019.0, 'doc_count': 30},
 {'key': 2016.0, 'doc_count': 29},
 {'key': 2017.0, 'doc_count': 28}]

### 2.3.2. Impact pays ? <a class="anchor" id="pays"></a>

graphe 1

In [73]:
json = {
  "size": 0,
  "query": {
    "bool": {
      "filter": [{
        "term": {
          "study_type.keyword": "Interventional"
        }
      }]
    }
  },
  "aggs": {
    "by_year": {
      "terms": {
        "field": "study_start_year"
      },
      "aggs": {
        "by_fr_only": {
          "terms": {
            "field": "french_location_only"
          },
            "aggs": {
            "by_sponsor_type": {
              "terms": {
                "field": "lead_sponsor_type.keyword",
                  "missing": "N/A"
              }
            }
          }
        }
      }
    }
  }
}

requests.post(url, json=json, headers=headers).json()['aggregations']['by_year']['buckets']

[{'key': 2019.0,
  'doc_count': 1566,
  'by_fr_only': {'doc_count_error_upper_bound': 0,
   'sum_other_doc_count': 0,
   'buckets': [{'key': 1,
     'key_as_string': 'true',
     'doc_count': 1052,
     'by_sponsor_type': {'doc_count_error_upper_bound': 0,
      'sum_other_doc_count': 0,
      'buckets': [{'key': 'academique', 'doc_count': 827},
       {'key': 'industriel', 'doc_count': 225}]}},
    {'key': 0,
     'key_as_string': 'false',
     'doc_count': 514,
     'by_sponsor_type': {'doc_count_error_upper_bound': 0,
      'sum_other_doc_count': 0,
      'buckets': [{'key': 'industriel', 'doc_count': 499},
       {'key': 'academique', 'doc_count': 15}]}}]}},
 {'key': 2018.0,
  'doc_count': 1512,
  'by_fr_only': {'doc_count_error_upper_bound': 0,
   'sum_other_doc_count': 0,
   'buckets': [{'key': 1,
     'key_as_string': 'true',
     'doc_count': 964,
     'by_sponsor_type': {'doc_count_error_upper_bound': 0,
      'sum_other_doc_count': 0,
      'buckets': [{'key': 'academique', '

graphe 2

In [74]:
json = {
  "size": 0,
  "query": {
    "bool": {
      "filter": [{
        "term": {
          "study_type.keyword": "Interventional"
        }
      }]
    }
  },
  "aggs": {
    "by_country": {
      "terms": {
        "field": "location_country.keyword"
      },
      "aggs": {
        "by_has_result_or_publi": {
          "terms": {
            "field": "has_results_or_publications"
          }
        }
      }
    }
  }
}

requests.post(url, json=json, headers=headers).json()['aggregations']['by_country']['buckets']

[{'key': 'France',
  'doc_count': 20369,
  'by_has_result_or_publi': {'doc_count_error_upper_bound': 0,
   'sum_other_doc_count': 0,
   'buckets': [{'key': 0, 'key_as_string': 'false', 'doc_count': 16212},
    {'key': 1, 'key_as_string': 'true', 'doc_count': 4157}]}},
 {'key': 'Germany',
  'doc_count': 5285,
  'by_has_result_or_publi': {'doc_count_error_upper_bound': 0,
   'sum_other_doc_count': 0,
   'buckets': [{'key': 1, 'key_as_string': 'true', 'doc_count': 2829},
    {'key': 0, 'key_as_string': 'false', 'doc_count': 2456}]}},
 {'key': 'United States',
  'doc_count': 5199,
  'by_has_result_or_publi': {'doc_count_error_upper_bound': 0,
   'sum_other_doc_count': 0,
   'buckets': [{'key': 1, 'key_as_string': 'true', 'doc_count': 2877},
    {'key': 0, 'key_as_string': 'false', 'doc_count': 2322}]}},
 {'key': 'Spain',
  'doc_count': 4972,
  'by_has_result_or_publi': {'doc_count_error_upper_bound': 0,
   'sum_other_doc_count': 0,
   'buckets': [{'key': 1, 'key_as_string': 'true', 'doc_co

## 2.4. Resultats <a class="anchor" id="resultats"></a>

### 2.4.1. Diffusions des résultats ? <a class="anchor" id="diffusion"></a>

In [61]:
promot = "*"

json = {
  "size": 0,
  "query": {
    "bool": {
      "filter": [{
        "term": {
          "study_type.keyword": "Interventional"
        }
      }]
    }
  },
  "aggs": {
    "by_year": {
      "terms": {
        "field": "study_start_year"
      },
      "aggs": {
        "by_has_result": {
          "terms": {
            "field": "has_results" ,
              "missing": False
          },
            "aggs": {
            "by_has_publications_result": {
              "terms": {
                "field": "has_publications_result",
                  "missing": False
              }
            }
          }
        }
      }
    }
  }
}

requests.post(url, json=json, headers=headers).json()['aggregations']['by_year']['buckets']

[{'key': 2019.0,
  'doc_count': 1566,
  'by_has_result': {'doc_count_error_upper_bound': 0,
   'sum_other_doc_count': 0,
   'buckets': [{'key': 0,
     'key_as_string': 'false',
     'doc_count': 1543,
     'by_has_publications_result': {'doc_count_error_upper_bound': 0,
      'sum_other_doc_count': 0,
      'buckets': [{'key': 0, 'key_as_string': 'false', 'doc_count': 1543}]}},
    {'key': 1,
     'key_as_string': 'true',
     'doc_count': 23,
     'by_has_publications_result': {'doc_count_error_upper_bound': 0,
      'sum_other_doc_count': 0,
      'buckets': [{'key': 0, 'key_as_string': 'false', 'doc_count': 23}]}}]}},
 {'key': 2018.0,
  'doc_count': 1512,
  'by_has_result': {'doc_count_error_upper_bound': 0,
   'sum_other_doc_count': 0,
   'buckets': [{'key': 0,
     'key_as_string': 'false',
     'doc_count': 1425,
     'by_has_publications_result': {'doc_count_error_upper_bound': 0,
      'sum_other_doc_count': 0,
      'buckets': [{'key': 0, 'key_as_string': 'false', 'doc_count'

In [63]:
promot = "*"

json = {
  "size": 0,
  "query": {
    "bool": {
      "filter": [{
        "term": {
          "study_type.keyword": "Interventional"
        }
      }]
    }
  },
  "aggs": {
    "by_intervention_type": {
      "terms": {
        "field": "intervention_type.keyword",
          "missing": "N/A"
      },
      "aggs": {
        "by_has_result": {
          "terms": {
            "field": "has_results" ,
              "missing": False
          },
            "aggs": {
            "by_has_publications_result": {
              "terms": {
                "field": "has_publications_result",
                  "missing": False
              }
            }
          }
        }
      }
    }
  }
}

requests.post(url, json=json, headers=headers).json()['aggregations']['by_intervention_type']['buckets']

[{'key': 'N/A',
  'doc_count': 21382,
  'by_has_result': {'doc_count_error_upper_bound': 0,
   'sum_other_doc_count': 0,
   'buckets': [{'key': 0,
     'key_as_string': 'false',
     'doc_count': 16469,
     'by_has_publications_result': {'doc_count_error_upper_bound': 0,
      'sum_other_doc_count': 0,
      'buckets': [{'key': 0, 'key_as_string': 'false', 'doc_count': 16469}]}},
    {'key': 1,
     'key_as_string': 'true',
     'doc_count': 4913,
     'by_has_publications_result': {'doc_count_error_upper_bound': 0,
      'sum_other_doc_count': 0,
      'buckets': [{'key': 0,
        'key_as_string': 'false',
        'doc_count': 4913}]}}]}},
 {'key': 'Other',
  'doc_count': 1157,
  'by_has_result': {'doc_count_error_upper_bound': 0,
   'sum_other_doc_count': 0,
   'buckets': [{'key': 0,
     'key_as_string': 'false',
     'doc_count': 923,
     'by_has_publications_result': {'doc_count_error_upper_bound': 0,
      'sum_other_doc_count': 0,
      'buckets': [{'key': 0, 'key_as_string'

### 2.4.2. Plan de partage ? <a class="anchor" id="ipd"></a>

In [64]:
promot = "*"

json = {
  "size": 0,
  "query": {
    "bool": {
      "filter": [{
        "term": {
          "study_type.keyword": "Interventional"
        }
      }]
    }
  },
  "aggs": {
    "by_year": {
      "terms": {
        "field": "study_start_year"
      },
      "aggs": {
        "by_ipd": {
          "terms": {
            "field": "ipd_sharing.keyword" ,
              "missing": "N/A"
          }
        }
      }
    }
  }
}

requests.post(url, json=json, headers=headers).json()['aggregations']['by_year']['buckets']

[{'key': 2019.0,
  'doc_count': 1566,
  'by_ipd': {'doc_count_error_upper_bound': 0,
   'sum_other_doc_count': 0,
   'buckets': [{'key': 'No', 'doc_count': 603},
    {'key': 'N/A', 'doc_count': 540},
    {'key': 'Yes', 'doc_count': 273},
    {'key': 'Undecided', 'doc_count': 150}]}},
 {'key': 2018.0,
  'doc_count': 1512,
  'by_ipd': {'doc_count_error_upper_bound': 0,
   'sum_other_doc_count': 0,
   'buckets': [{'key': 'N/A', 'doc_count': 604},
    {'key': 'No', 'doc_count': 568},
    {'key': 'Undecided', 'doc_count': 174},
    {'key': 'Yes', 'doc_count': 166}]}},
 {'key': 2015.0,
  'doc_count': 1470,
  'by_ipd': {'doc_count_error_upper_bound': 0,
   'sum_other_doc_count': 0,
   'buckets': [{'key': 'N/A', 'doc_count': 1068},
    {'key': 'No', 'doc_count': 217},
    {'key': 'Yes', 'doc_count': 109},
    {'key': 'Undecided', 'doc_count': 76}]}},
 {'key': 2020.0,
  'doc_count': 1443,
  'by_ipd': {'doc_count_error_upper_bound': 0,
   'sum_other_doc_count': 0,
   'buckets': [{'key': 'No', 'd

### 2.4.3. Délai de diffusion ? <a class="anchor" id="diffusion_delai"></a>

histo

In [67]:

json = {
  "size": 0,
  "query": {
    "bool": {
      "filter": [{
        "term": {
          "study_type.keyword": "Interventional"
        }
      }]
    }
  },
  "aggs": {
    "delay_first_results_completion": {
      "histogram": {
        "field": "delay_first_results_completion",
        "interval": 183
      }
    }
  }
}

requests.post(url, json=json, headers=headers).json()['aggregations']['delay_first_results_completion']['buckets']

[{'key': -5124.0, 'doc_count': 1},
 {'key': -4941.0, 'doc_count': 0},
 {'key': -4758.0, 'doc_count': 2},
 {'key': -4575.0, 'doc_count': 3},
 {'key': -4392.0, 'doc_count': 4},
 {'key': -4209.0, 'doc_count': 7},
 {'key': -4026.0, 'doc_count': 5},
 {'key': -3843.0, 'doc_count': 11},
 {'key': -3660.0, 'doc_count': 15},
 {'key': -3477.0, 'doc_count': 16},
 {'key': -3294.0, 'doc_count': 20},
 {'key': -3111.0, 'doc_count': 22},
 {'key': -2928.0, 'doc_count': 36},
 {'key': -2745.0, 'doc_count': 37},
 {'key': -2562.0, 'doc_count': 46},
 {'key': -2379.0, 'doc_count': 53},
 {'key': -2196.0, 'doc_count': 47},
 {'key': -2013.0, 'doc_count': 57},
 {'key': -1830.0, 'doc_count': 95},
 {'key': -1647.0, 'doc_count': 85},
 {'key': -1464.0, 'doc_count': 92},
 {'key': -1281.0, 'doc_count': 101},
 {'key': -1098.0, 'doc_count': 138},
 {'key': -915.0, 'doc_count': 171},
 {'key': -732.0, 'doc_count': 282},
 {'key': -549.0, 'doc_count': 813},
 {'key': -366.0, 'doc_count': 1010},
 {'key': -183.0, 'doc_count': 25

distribution - requete 1

In [68]:

json = {
  "size": 0,
  "query": {
    "bool": {
      "filter": [{
        "term": {
          "study_type.keyword": "Interventional"
        }
      }]
    }
  },
  "aggs": {
      "by_year": {
      "terms": {
        "field": "study_start_year"
      },
    "aggs": {
    "delay_first_results_completion": {
      "histogram": {
        "field": "delay_first_results_completion",
        "interval": 183
      }
    }
  }
}
}
}

requests.post(url, json=json, headers=headers).json()['aggregations']['by_year']['buckets']

[{'key': 2019.0,
  'doc_count': 1566,
  'delay_first_results_completion': {'buckets': [{'key': -549.0,
     'doc_count': 4},
    {'key': -366.0, 'doc_count': 11},
    {'key': -183.0, 'doc_count': 2},
    {'key': 0.0, 'doc_count': 0},
    {'key': 183.0, 'doc_count': 0},
    {'key': 366.0, 'doc_count': 0},
    {'key': 549.0, 'doc_count': 1},
    {'key': 732.0, 'doc_count': 1},
    {'key': 915.0, 'doc_count': 0},
    {'key': 1098.0, 'doc_count': 0},
    {'key': 1281.0, 'doc_count': 0},
    {'key': 1464.0, 'doc_count': 1},
    {'key': 1647.0, 'doc_count': 2},
    {'key': 1830.0, 'doc_count': 0},
    {'key': 2013.0, 'doc_count': 0},
    {'key': 2196.0, 'doc_count': 2},
    {'key': 2379.0, 'doc_count': 0},
    {'key': 2562.0, 'doc_count': 0},
    {'key': 2745.0, 'doc_count': 2},
    {'key': 2928.0, 'doc_count': 0},
    {'key': 3111.0, 'doc_count': 2},
    {'key': 3294.0, 'doc_count': 1},
    {'key': 3477.0, 'doc_count': 1},
    {'key': 3660.0, 'doc_count': 0},
    {'key': 3843.0, 'doc_count'

requete 2 - quantiles

In [69]:

json = {
  "size": 0,
  "query": {
    "bool": {
      "filter": [{
        "term": {
          "study_type.keyword": "Interventional"
        }
      }]
    }
  },
  "aggs": {
      "by_year": {
      "terms": {
        "field": "study_start_year"
      },
    "aggs": {
    "delay_first_results_completion": {
      "percentiles": {
        "field": "delay_first_results_completion"
      }
    }
  }
}
}
}

requests.post(url, json=json, headers=headers).json()['aggregations']['by_year']['buckets']

[{'key': 2019.0,
  'doc_count': 1566,
  'delay_first_results_completion': {'values': {'1.0': -452.0,
    '5.0': -378.6,
    '25.0': -352.25,
    '50.0': 905.0,
    '75.0': 3294.25,
    '95.0': 4818.9,
    '99.0': 5687.0}}},
 {'key': 2018.0,
  'doc_count': 1512,
  'delay_first_results_completion': {'values': {'1.0': -548.04,
    '5.0': -382.0,
    '25.0': -359.0,
    '50.0': -252.0,
    '75.0': 1105.0,
    '95.0': 4289.6,
    '99.0': 5264.56}}},
 {'key': 2015.0,
  'doc_count': 1470,
  'delay_first_results_completion': {'values': {'1.0': -1257.89,
    '5.0': -870.5,
    '25.0': -378.8333333333333,
    '50.0': -331.6666666666667,
    '75.0': 203.75,
    '95.0': 2336.35,
    '99.0': 4312.419999999994}}},
 {'key': 2020.0,
  'doc_count': 1443,
  'delay_first_results_completion': {'values': {'1.0': -238.0,
    '5.0': -204.4,
    '25.0': 669.0,
    '50.0': 2502.0,
    '75.0': 4441.0,
    '95.0': 6330.799999999998,
    '99.0': 8278.0}}},
 {'key': 2017.0,
  'doc_count': 1422,
  'delay_first_resu

### 2.4.4. Publications ? <a class="anchor" id="publication"></a>

In [71]:
promot = "*"

json = {
  "size": 0,
  "query": {
    "bool": {
      "filter": [{
        "term": {
          "study_type.keyword": "Interventional"
        }
      }]
    }
  },
  "aggs": {
    "by_year": {
      "terms": {
        "field": "study_start_year"
      },
      "aggs": {
        "by_ipd": {
          "terms": {
            "field": "has_publication_oa"
          }
        }
      }
    }
  }
}

requests.post(url, json=json, headers=headers).json()['aggregations']['by_year']['buckets']

[{'key': 2019.0,
  'doc_count': 1566,
  'by_ipd': {'doc_count_error_upper_bound': 0,
   'sum_other_doc_count': 0,
   'buckets': [{'key': 1, 'key_as_string': 'true', 'doc_count': 13},
    {'key': 0, 'key_as_string': 'false', 'doc_count': 6}]}},
 {'key': 2018.0,
  'doc_count': 1512,
  'by_ipd': {'doc_count_error_upper_bound': 0,
   'sum_other_doc_count': 0,
   'buckets': [{'key': 1, 'key_as_string': 'true', 'doc_count': 18},
    {'key': 0, 'key_as_string': 'false', 'doc_count': 5}]}},
 {'key': 2015.0,
  'doc_count': 1470,
  'by_ipd': {'doc_count_error_upper_bound': 0,
   'sum_other_doc_count': 0,
   'buckets': [{'key': 1, 'key_as_string': 'true', 'doc_count': 36},
    {'key': 0, 'key_as_string': 'false', 'doc_count': 9}]}},
 {'key': 2020.0,
  'doc_count': 1443,
  'by_ipd': {'doc_count_error_upper_bound': 0,
   'sum_other_doc_count': 0,
   'buckets': [{'key': 1, 'key_as_string': 'true', 'doc_count': 19},
    {'key': 0, 'key_as_string': 'false', 'doc_count': 3}]}},
 {'key': 2017.0,
  'doc_