In [5]:
import pandas as pd
import requests

input_data_file = 'fr-esr-structures-recherche-publiques-actives.csv'
output_data_file = 'fr-esr-structures-recherche-publiques-actives-with-grid.csv'
MATCHER_IP = 'http://**.***.***.***'
matcher_url = f'{MATCHER_IP}/match_api'

In [6]:
# 1. Load all structures
all_structures = pd.read_csv(input_data_file, sep=';')
print(f'Number of structures : {len(all_structures)}')

Number of structures : 4506


In [7]:
# 2. Filter to get the CNRS' structure
cnrs_structures = all_structures[all_structures.sigles_des_tutelles.str.contains('CNRS')]
print(f'Number of CNRS structures : {len(cnrs_structures)}')

Number of CNRS structures : 1640


In [8]:
# 3. Build the query and query the matcher
def get_value_from_field(row: object, field: str) -> str:
    value = row.get(field, '')
    value = '' if not isinstance(value, str) else value
    return value

def build_query(row: object) -> str:
    libelle = get_value_from_field(row=row, field='libelle')
    sigle = get_value_from_field(row=row, field='sigle')
    commune = get_value_from_field(row=row, field='commune')
    return f'{libelle} {sigle} {commune} France'

for index, row in cnrs_structures.iterrows():
    query = build_query(row=row)
    json = {'type': 'grid', 'query': query }
    results = requests.post(url=matcher_url, json=json).json().get('results', [])
    grid = results[0] if len(results) > 0 else ''
    # Add an alternativ strategy based on the structure's name, its acronym and its country
    if grid == '':
       strategies = [[['grid_name', 'grid_acronym', 'grid_country']]]
       json = {'type': 'grid', 'query': query, 'strategies': strategies }
       results = requests.post(url=matcher_url, json=json).json().get('results', [])
       grid = results[0] if len(results) > 0 else ''
    cnrs_structures.loc[index, 'grid'] = grid

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, value, pi)


In [9]:
# 4. Export results into a csv file
cnrs_structures.to_csv(output_data_file)
success_rate = len(cnrs_structures[cnrs_structures['grid'] != '']) / len(cnrs_structures) * 100
print(f'Sucess rate : {success_rate}')

Sucess rate : 43.109756097560975
