In [1]:
import pandas as pd
import requests

In [2]:
df = pd.read_csv('funders.csv')
df

Unnamed: 0,name,acronym,country,name_variant,type
0,National Institutes of Health,NIH,United States,,Funder
1,National Science Foundation,NSF,United States,,Funder
2,Natural Sciences and Engineering Research Council,NSERC,Canada,,Funder
3,Bill & Melinda Gates Foundation,Gates Foundation,United States,BMGF,Charity
4,Social Sciences and Humanities Research Council,SSHRC,Canada,,Funder
...,...,...,...,...,...
195,Prince Albert II of Monaco Foundation,,Moncao,,Charity
196,BNP Paribas Foundation,,France,,Funder
197,Jenny and Antti Wihuri Foundation,,,,Funder
198,W. M. Keck Foundation,,France,W. Keck Foundation,Funder


In [3]:
country_mapping= {
        'United States': 'US', 'Canada': 'CA', 'Netherlands': 'NL',
        'Australia': 'AU', 'Japan': 'JP', 'France': 'FR',
        'Sweden': 'SE', 'Switzerland': 'CH', 'United Kingdom': 'GB',
        'Germany': 'DE', 'Brazil': 'BR', 'European Union': 'EU',
        'Finland': 'FI', 'Denmark': 'DK', 'Portugal': 'PT',
        'Spain': 'ES', 'Italy': 'IT', 'Poland': 'PL',
        'Austria': 'AT', 'Czech Republic': 'CZ', 'China': 'CN',
        'South Korea': 'KR', 'Taiwan': 'TW', 'India': 'IN',
        'Argentina': 'AR', 'Chile': 'CL', 'Israel': 'IL',
        'Turkey': 'TR', 'Colombia': 'CO', 'Mexico': 'MX',
        'Peru': 'PE', 'Kenya': 'KE', 'Nigeria': 'NG',
        'Malaysia': 'MY', 'Thailand': 'TH', 'Vietnam': 'VN',
        'International': 'INT', 'Qatar': 'QA', 'Uruguay': 'UY',
        'South Africa': 'ZA', 'Ecuador': 'EC', 'Costa Rica': 'CR',
        'Uganda': 'UG', 'Tanzania': 'TZ', 'International (Africa)': 'AFR',
        'Indonesia': 'ID', 'Philippines': 'PH', 'Pakistan': 'PK',
        'Bangladesh': 'BD', 'Sri Lanka': 'LK', 'Nepal': 'NP',
    'Russia': 'RU'
    }

FORBIDDEN_IDS = ['https://openalex.org/F4320321693']
def search_openalex(query, country_code=None, verbose=False):
    """
    Recherche un funder dans l'API OpenAlex
    
    Args:
        query: Terme de recherche (nom ou acronyme)
        country_code: Code pays pour filtrer (optionnel)
    
    Returns:
        dict ou None: Informations du funder trouvé
    """
    url = "https://api.openalex.org/funders"
    params = {
        'search': query,
        'per_page': 5,
        'mailto': 'your-email@example.com'  # Recommandé par OpenAlex
    }

    try:
        response = requests.get(url, params=params, timeout=10)
        response.raise_for_status()
        data = response.json()
        
        if data.get('results'):
            
            if verbose:
                print(pd.DataFrame(data['results']))
            
            # Si on a un code pays, chercher correspondance exacte
            if country_code:
                for result in data['results']:
                    if isinstance(result.get('country_code'), str):
                        result_country = result.get('country_code', '').upper()
                        if result_country == country_code.upper():
                            if not(result['id'] in FORBIDDEN_IDS):
                                return result
            # Sinon, prendre le premier résultat
            if not(data['results'][0]['id'] in FORBIDDEN_IDS):
                return data['results'][0]
        return None

    except requests.exceptions.RequestException as e:
        print(f"  ⚠️  Erreur réseau: {e}")
        return None
    except Exception as e:
        print(f"  ⚠️  Erreur: {e}")
        return None


In [4]:
data = df.to_dict(orient='records')
len(data)

200

In [5]:
for d in data:
    country_code = country_mapping.get(d['country'], '')
    search_query = f"{d['name']}"
    result = search_openalex(search_query, country_code)
    if result:
        for f in ['id', 'display_name', 'alternate_titles', 'description', 'homepage_url', 'image_thumbnail_url' ]:
            d[f'openalex_{f}'] = result[f]
        if 'ids' in result:
            for t in ['ror', 'wikidata']:
                if t in result['ids']:
                    d[t] = result['ids'][t]

In [6]:
df2 = pd.DataFrame(data)
df2.to_csv('funders_with_openalex_id.csv', index=False)

In [7]:
len(df2)

200

In [8]:
df2.openalex_id.value_counts().head(10)

https://openalex.org/F4320332161    1
https://openalex.org/F4320326210    1
https://openalex.org/F4320322893    1
https://openalex.org/F4320327974    1
https://openalex.org/F4320327215    1
https://openalex.org/F4320326245    1
https://openalex.org/F4320328356    1
https://openalex.org/F4320325796    1
https://openalex.org/F4320326227    1
https://openalex.org/F4320329938    1
Name: openalex_id, dtype: int64

In [9]:
df2[['name', 'openalex_display_name']].head(200).tail(50)

Unnamed: 0,name,openalex_display_name
150,Campus France,Campus France
151,BPIFrance,Bpifrance
152,European Space Agency,European Space Agency
153,Cancer Research UK,Cancer Research UK
154,Ministry of Economy and Competitiveness,Ministerio de Economía y Competitividad
155,European Social Fund,European Social Fund
156,Knut and Alice Wallenberg Foundation,Knut och Alice Wallenbergs Stiftelse
157,Science Foundation Ireland,Science Foundation Ireland
158,Irish Research Council,Irish Research Council
159,British Heart Foundation,British Heart Foundation


In [10]:
df2[pd.isnull(df2.openalex_id)]

Unnamed: 0,name,acronym,country,name_variant,type,openalex_id,openalex_display_name,openalex_alternate_titles,openalex_description,openalex_homepage_url,openalex_image_thumbnail_url,ror,wikidata
84,Ministry of Higher Education Malaysia Grants S...,MyGRANTS,Malaysia,MOHE,Funder,,,,,,,,
111,Hubert Curien Partnerships,PHC,France,Partenariat Hubert Curien,Funder,,,,,,,,
112,Hospital Program for Clinical Research,PHRC,France,Programme Hospitalier de Recherche Clinique,Funder,,,,,,,,
113,France 2030 Investment Plan,PIA,France,Programme d’Investissements d’Avenir|Investiss...,Funder,,,,,,,,
126,Région Ile de France,,France,Région IdF,Funder,,,,,,,,
129,Région Grand Est,,France,Région Alsace|Région Lorraine,Funder,,,,,,,,
180,Ville de Paris,,France,City of Paris,Funder,,,,,,,,
181,Rennes Métropole,,France,,Funder,,,,,,,,
182,Nantes Métropole,,France,,Funder,,,,,,,,
183,Métropole Européenne de Lille,,France,,Funder,,,,,,,,
