In [2]:
from dotenv import load_dotenv
from elasticsearch import Elasticsearch
import requests, os
import pandas as pd

In [3]:
# You need to add the infos in the .env file
load_dotenv()

ES_HOST = os.getenv("ES_HOST")
ES_PASSWORD = os.getenv("ES_PASSWORD")
ES_USER = os.getenv("ES_USER")
es = Elasticsearch(ES_HOST, http_auth=(ES_USER, ES_PASSWORD))
INDEX_PROJECTS = f'scanr-projects'
INDEX_ORGANIZATIONS = f'scanr-organizations'

# 1. First get the id of the instutions

In [4]:
user_query = 'le mans université'

In [5]:
payload = {
    'size': 10,
    '_source': # the fields we want to get 
    ['id', 'label.*', 'status', 'level'],
 'sort': [{'_score': {'order': 'desc'}}, {'id.keyword': {'order': 'desc'}}],
 'query': {'bool': {'must': [
     {'query_string': {'query': user_query}},
     {'terms': {'status.keyword': ['active']}},
 ]
                   }}}
                    
res = es.search(index=INDEX_ORGANIZATIONS, request_cache=False, request_timeout=6000, body=payload)
pd.DataFrame(e['_source'] for e in res['hits']['hits'])

Unnamed: 0,id,status,label,level
0,9xlel,active,{'fr': 'Le Mans Université'},EPSCP
1,200815562K,active,"{'fr': 'Themis université du Mans', 'default':...",Unité de recherche
2,576450431,active,{'default': 'GRUAU LE MANS'},
3,202023718Y,active,{'fr': 'Atelier de Recherche en Gestion de l'U...,Unité de recherche
4,480449388,active,{'default': 'LE MANS PROCESS AGRO'},
5,200615275G,active,{'fr': 'Laboratoire d'informatique de l'univer...,Unité de recherche
6,199612385J,active,{'fr': 'Laboratoire d'acoustique de l'universi...,Unité de recherche
7,202123746Z,active,{'fr': 'Centre de recherche Humanités et Socié...,Unité de recherche
8,247200132,active,{'default': 'LE MANS METROPOLE COMMUNAUTE URBA...,
9,200810693T,active,{'fr': 'Fédération de recherche mathématiques ...,Structure fédérative


In [6]:
## From the list above, find the right organizations

In [7]:
institution_id = '9xlel'

# 2. Get the grants linked to that institution_id
## Either directly in the list of participants (participants.structure.id), or in their supervisors

In [22]:
payload = {
    'size': 10000,
    '_source': # the fields we want to get 
    ['label.*', 'acronym.*',
  'project_domains',
  'participants.label',
  'participants.structure.id',
  'participants.structure.label.*',
  'participants.structure.mainAddress.*',
  'year',
  'type',
  'id',
  'keywords.*'],
 'sort': [{'_score': {'order': 'desc'}}, {'id.keyword': {'order': 'desc'}}],
 'highlight': {'number_of_fragments': 3,
  'fragment_size': 125,
  'pre_tags': ['<strong>'],
  'post_tags': ['</strong>'],
  'fields': {'label.default': {},
   'description.default': {},
   'domains.label.default': {}}},
 'query': {'bool': {
     'should': [
     {'terms': {'participants.structure.id.keyword': [institution_id]}},
     {'terms': {'participants.structure.institutions.structure.keyword': [institution_id]}},
 
 ],
  "minimum_should_match": 1
   }
          }}

In [23]:
res = es.search(index=INDEX_PROJECTS, request_cache=False, request_timeout=6000, body=payload)

In [24]:
data = [e['_source'] for e in res['hits']['hits']]

In [25]:
df_results = pd.DataFrame(data)

In [26]:
len(df_results)

285

In [27]:
df_results.type.value_counts()

ANR                  223
PIA ANR               22
Horizon Europe        16
Horizon 2020          13
DIM Ile-de-France      6
i-LAB                  4
ANSES                  1
Name: type, dtype: int64

In [28]:
pd.DataFrame(e['_source'] for e in res['hits']['hits'])

Unnamed: 0,type,year,id,acronym,label,participants,keywords,project_domains
0,Horizon Europe,2023,101168870,{'default': 'ESM'},{'default': 'Edible Soft Matter'},[{'label': {'default': 'Le Mans Université__-_...,"{'en': ['food sciences', 'food structure', 'ed...","[{'label': {'default': 'food sciences'}, 'code..."
1,Horizon Europe,2023,101148965,{'default': 'PRIMONPLANTS'},{'default': 'PeeRing into the mechanIsm of syn...,[{'label': {'default': 'University of Namur__-...,"{'en': ['bioengineering', 'foaming', 'plants',...","[{'label': {'default': 'foaming'}, 'code': 'Q2..."
2,Horizon Europe,2022,101119903,{'default': 'VAMOR'},{'default': 'Vibro-Acoustic Model Order Reduct...,[{'label': {'default': 'Tyrens sverige ab__-__...,"{'en': ['model order reduction', 'vibration an...",[{'label': {'default': 'model order reduction'...
3,Horizon Europe,2022,101094949,{'default': 'NEXUS'},{'default': 'Twinning Research and Innovation ...,[{'label': {'default': 'Bay Zoltán Foundation ...,"{'en': ['inclusiveness', 'gender equality plan...","[{'label': {'default': 'inclusiveness'}, 'code..."
4,Horizon Europe,2022,101081357,{'default': 'REWRITE'},{'default': 'REWilding and Restoration of Inte...,[{'label': {'default': 'Le Mans Université__-_...,"{'en': ['intertidal seascapes', 'soft sediment...","[{'label': {'default': 'intertidal'}, 'code': ..."
...,...,...,...,...,...,...,...,...
280,Horizon 2020,2020,101036048,{'default': 'VOYAGES'},{'default': 'Voyages'},[{'label': {'default': 'Université Marie et Lo...,"{'en': ['researchers', 'publics', 'meetings', ...","[{'label': {'default': 'voyage'}, 'code': 'Q18..."
281,Horizon 2020,2020,101025424,{'default': 'STSAW'},{'default': 'Sub-THz Surface Acoustic Waves'},[{'label': {'default': 'Le Mans Université__-_...,"{'en': ['surface acoustic waves', 'picosecond ...","[{'label': {'default': 'acoustic waves'}, 'cod..."
282,Horizon 2020,2020,101007851,{'default': 'DISCO2 STORE'},{'default': 'Discontinuities in CO2 Storage Re...,[{'label': {'default': 'Le Mans Université__-_...,"{'en': ['co2 storage', 'mechanical discontinui...","[{'label': {'default': 'discontinuities'}, 'co..."
283,Horizon 2020,2020,101007666,{'default': 'ESPERANTO'},{'default': 'Exchanges for SPEech ReseArch aNd...,[{'label': {'default': 'Université Grenoble Al...,"{'en': ['speech processing', 'neural networks'...","[{'label': {'default': 'speech processing'}, '..."
