In [65]:
import requests
import math
import pandas as pd
import json
import os

In [66]:
os.system('mkdir -p cache')

0

In [67]:
import re
from tokenizers import normalizers
from tokenizers.normalizers import BertNormalizer, Sequence, Strip
from tokenizers import pre_tokenizers
from tokenizers.pre_tokenizers import Whitespace

normalizer = Sequence([BertNormalizer(clean_text=True,
        handle_chinese_chars=True,
        strip_accents=True,
        lowercase=True), Strip()])
pre_tokenizer = pre_tokenizers.Sequence([Whitespace()])

def normalize(x, min_length = 1):
    if not isinstance(x, str):
        return ''
    normalized = normalizer.normalize_str(x)
    normalized = normalized.replace('\n', ' ')
    normalized = re.sub(' +', ' ', normalized)
    return " ".join([e[0] for e in pre_tokenizer.pre_tokenize_str(normalized) if len(e[0]) > min_length])

# Stats globale par pays

cf page 25 rapport OST sur les publis 2005-2018

In [68]:
def get_global_data():
    try:
        global_data = json.load(open('cache/global_data.json', 'r'))
    except:
        global_data = []
        for year in range(2005, 2023):
            print(year, end=',')
            r = requests.get(f'https://api.openalex.org/works?filter=publication_year:{year}&group-by=institutions.country_code').json()['group_by']
            for ix, e in enumerate(r):
                e['year'] = year
                e['rank'] = ix
            global_data += r
        json.dump(global_data, open('cache/global_data.json', 'w'))
    return global_data


In [70]:
global_data = get_global_data()
df_global = pd.DataFrame(global_data)
top_country = df_global[df_global.year==2021].head(10).key_display_name.tolist()
df_global_top = df_global[df_global.key_display_name.apply(lambda x:x in top_country)]
pd.pivot_table(df_global_top, index='year', columns="key_display_name", values='rank')

key_display_name,Brazil,China,France,Germany,India,Indonesia,Japan,United Kingdom of Great Britain and Northern Ireland,United States of America,unknown
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2005,11,2,6,5,12,50,4,3,1,0
2006,11,2,6,5,13,50,4,3,1,0
2007,11,2,5,6,12,46,4,3,1,0
2008,10,2,5,4,12,45,6,3,1,0
2009,10,2,5,4,12,44,6,3,1,0
2010,10,2,5,4,12,39,6,3,1,0
2011,9,2,5,4,11,23,6,3,1,0
2012,8,2,5,4,9,25,6,3,1,0
2013,8,2,4,5,7,24,6,3,1,0
2014,8,2,5,4,7,23,6,3,1,0


In [71]:
df_global[df_global.key_display_name=="France"]

Unnamed: 0,key,key_display_name,count,year,rank
6,FR,France,95101,2005,6
206,FR,France,110161,2006,6
405,FR,France,118395,2007,5
605,FR,France,125229,2008,5
805,FR,France,133732,2009,5
1005,FR,France,140662,2010,5
1205,FR,France,148754,2011,5
1405,FR,France,161599,2012,5
1604,FR,France,173802,2013,4
1805,FR,France,179044,2014,5


# Exploration sur une requête (affiliation)

In [130]:
first_year = 2016

In [349]:
affiliation1_str = 'beta cnrs'
affiliation1_type = 'raw_affiliation_string' 


affiliation2_str = None
affiliation2_type = 'raw_affiliation_string'
#'institutions.country_code' 

thematic = None #'zoonosis' #'military'

#user_input = 'microsoft'
#collab_fr = True


assert (affiliation1_type in ['raw_affiliation_string', 'institutions.country_code', 'institutions.ror'])

def get_filename(affiliation1_str, affiliation1_type, affiliation2_str, affiliation2_type, thematic, random_size ):
    filename = f'cache/{normalize(affiliation1_str)}_{normalize(affiliation2_str)}_sample{random_size}_{normalize(thematic)}.json'.replace(' ', '_')
    return filename



In [350]:
BASE_URL = f"https://api.openalex.org/works?filter=publication_year:{first_year}-"

In [351]:
def get_search_field(f):
    if f in ['raw_affiliation_string']:
        return 'raw_affiliation_string.search'
    return f

def get_data(affiliation1_str, affiliation1_type, affiliation2_str, affiliation2_type, thematic ):
    url = BASE_URL + f",{get_search_field(affiliation1_type)}:{affiliation1_str}"
    if affiliation2_str:
        url += f",{get_search_field(affiliation2_type)}:{affiliation2_str}"
    if thematic:
        random_size = None
        url += f"&search={thematic}"
    else:
        random_size = 1000
        url += f"&sample={random_size}&seed=0"
        
    print(url)
    filename = get_filename(affiliation1_str, affiliation1_type, affiliation2_str, affiliation2_type, 
                            thematic, random_size )
    print(filename)
    try:
        data = json.load(open(filename, 'r'))
    except:
        results = []
        res = requests.get(url).json()
        nb_results = res['meta']['count']
        print(nb_results)
        nb_page = math.ceil(nb_results / res['meta']['per_page'])
        results = res['results']
        for p in range(2, nb_page + 1):
            print(p, end=',')
            res = requests.get(url+f"&page={p}").json()
            results += res['results']
        assert(len(results) == nb_results)
        data = {'results': results}
        json.dump(data, open(filename, 'w'))
    return data['results']
        

In [352]:
x = get_data(affiliation1_str, affiliation1_type, affiliation2_str, affiliation2_type, thematic)
df = pd.DataFrame(x)

https://api.openalex.org/works?filter=publication_year:2016-,raw_affiliation_string.search:beta cnrs&sample=1000&seed=0
cache/beta_cnrs__sample1000_.json


In [353]:
df

Unnamed: 0,id,doi,title,display_name,relevance_score,publication_year,publication_date,ids,primary_location,host_venue,...,best_oa_location,alternate_host_venues,referenced_works,related_works,ngrams_url,abstract_inverted_index,cited_by_api_url,counts_by_year,updated_date,created_date
0,https://openalex.org/W2565997017,https://doi.org/10.1016/j.worlddev.2016.11.012,What has Driven Deforestation in Developing Co...,What has Driven Deforestation in Developing Co...,823.429440,2017,2017-04-01,{'openalex': 'https://openalex.org/W2565997017...,"{'is_oa': None, 'landing_page_url': 'https://d...","{'id': 'https://openalex.org/S85457386', 'issn...",...,,"[{'id': 'https://openalex.org/S85457386', 'dis...","[https://openalex.org/W182975065, https://open...","[https://openalex.org/W4083788, https://openal...",https://api.openalex.org/works/W2565997017/ngrams,"{'Summary': [0], 'Using': [3], 'newly-released...",https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2023, 'cited_by_count': 4}, {'year':...",2023-02-24T11:10:50.384717,2017-01-06
1,https://openalex.org/W2468747762,https://doi.org/10.1080/00036846.2018.1495820,The long-run impact of human capital on innova...,The long-run impact of human capital on innova...,683.878200,2019,2019-01-26,{'openalex': 'https://openalex.org/W2468747762...,"{'is_oa': True, 'landing_page_url': 'https://d...","{'id': 'https://openalex.org/S201533460', 'iss...",...,"{'is_oa': True, 'landing_page_url': 'https://d...","[{'id': 'https://openalex.org/S201533460', 'di...","[https://openalex.org/W1489749791, https://ope...","[https://openalex.org/W1980523875, https://ope...",https://api.openalex.org/works/W2468747762/ngrams,"{'Human': [0], 'capital': [1, 20, 58, 74], 'is...",https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2023, 'cited_by_count': 4}, {'year':...",2023-03-01T00:01:43.840780,2016-07-22
2,https://openalex.org/W2900558911,https://doi.org/10.1016/j.enpol.2017.10.060,Households energy consumption and transition t...,Households energy consumption and transition t...,537.262600,2018,2018-02-01,{'openalex': 'https://openalex.org/W2900558911...,"{'is_oa': None, 'landing_page_url': 'https://d...","{'id': 'https://openalex.org/S175056054', 'iss...",...,,"[{'id': 'https://openalex.org/S175056054', 'di...","[https://openalex.org/W1525696627, https://ope...","[https://openalex.org/W1975629292, https://ope...",https://api.openalex.org/works/W2900558911/ngrams,"{'The': [0], 'paper': [1], 'investigates': [2]...",https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2023, 'cited_by_count': 5}, {'year':...",2023-02-27T06:27:12.858369,2018-11-29
3,https://openalex.org/W2287383446,https://doi.org/10.1126/scisignal.aad1884,TGF-β inhibits the activation and functions of...,TGF-β inhibits the activation and functions of...,486.682460,2016,2016-02-16,{'openalex': 'https://openalex.org/W2287383446...,"{'is_oa': None, 'landing_page_url': 'https://d...","{'id': 'https://openalex.org/S155937366', 'iss...",...,,"[{'id': 'https://openalex.org/S155937366', 'di...","[https://openalex.org/W1571796034, https://ope...","[https://openalex.org/W2002636911, https://ope...",https://api.openalex.org/works/W2287383446/ngrams,"{'Blocking': [0], 'TGF-β': [1], 'signaling': [...",https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2023, 'cited_by_count': 6}, {'year':...",2023-02-16T23:35:48.612635,2016-06-24
4,https://openalex.org/W2770644476,https://doi.org/10.1016/j.techfore.2017.11.021,Open or proprietary? Choosing the right crowds...,Open or proprietary? Choosing the right crowds...,452.894740,2019,2019-07-01,{'openalex': 'https://openalex.org/W2770644476...,"{'is_oa': True, 'landing_page_url': 'https://d...","{'id': 'https://openalex.org/S39307421', 'issn...",...,"{'is_oa': True, 'landing_page_url': 'https://d...","[{'id': 'https://openalex.org/S39307421', 'dis...","[https://openalex.org/W802374166, https://open...","[https://openalex.org/W1732992956, https://ope...",https://api.openalex.org/works/W2770644476/ngrams,"{'Abstract': [0], 'Numerous': [3], 'reports': ...",https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2023, 'cited_by_count': 2}, {'year':...",2023-02-27T08:10:22.769470,2017-12-04
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
380,https://openalex.org/W3122665512,,Endogenous heterogeneity in duopoly with deter...,Endogenous heterogeneity in duopoly with deter...,1.027424,2018,2018-01-01,{'openalex': 'https://openalex.org/W3122665512...,"{'is_oa': None, 'landing_page_url': 'https://i...","{'id': 'https://openalex.org/S4306401271', 'is...",...,,"[{'id': 'https://openalex.org/S4306401271', 'd...",[],"[https://openalex.org/W1856556253, https://ope...",https://api.openalex.org/works/W3122665512/ngrams,"{'This': [0], 'paper': [1], 'examines': [2], '...",https://api.openalex.org/works?filter=cites:W3...,[],2023-02-27T21:07:27.739134,2021-02-01
381,https://openalex.org/W3140319616,,Learning to hesitate,Learning to hesitate,1.025919,2021,2021-01-01,{'openalex': 'https://openalex.org/W3140319616...,"{'is_oa': None, 'landing_page_url': 'https://i...","{'id': 'https://openalex.org/S3006283864', 'is...",...,,"[{'id': 'https://openalex.org/S3006283864', 'd...",[],"[https://openalex.org/W266661261, https://open...",https://api.openalex.org/works/W3140319616/ngrams,"{'We': [0, 69], 'investigate': [1], 'how': [2]...",https://api.openalex.org/works?filter=cites:W3...,[],2023-02-12T15:24:11.194774,2021-04-13
382,https://openalex.org/W2949818075,https://doi.org/10.2139/ssrn.2989560,Structural Changes and Growth Regimes,Structural Changes and Growth Regimes,1.019030,2017,2017-06-14,{'openalex': 'https://openalex.org/W2949818075...,"{'is_oa': None, 'landing_page_url': 'https://d...","{'id': 'https://openalex.org/S4210172589', 'is...",...,"{'is_oa': True, 'landing_page_url': 'http://sr...","[{'id': 'https://openalex.org/S4210172589', 'd...",[],"[https://openalex.org/W30335328, https://opena...",https://api.openalex.org/works/W2949818075/ngrams,"{'We': [0, 67, 99, 209], 'study': [1, 77], 'th...",https://api.openalex.org/works?filter=cites:W2...,[],2023-02-17T04:43:48.091670,2019-06-27
383,https://openalex.org/W4313648003,https://doi.org/10.1017/s1744137422000510,Integrating the exploration-exploitation dilem...,Integrating the exploration-exploitation dilem...,1.014022,2023,2023-01-06,{'openalex': 'https://openalex.org/W4313648003...,"{'is_oa': None, 'landing_page_url': 'https://d...","{'id': 'https://openalex.org/S146268463', 'iss...",...,,"[{'id': 'https://openalex.org/S146268463', 'di...","[https://openalex.org/W126931506, https://open...","[https://openalex.org/W232077089, https://open...",https://api.openalex.org/works/W4313648003/ngrams,"{'Abstract': [0], 'Contemporary': [1], 'Austri...",https://api.openalex.org/works?filter=cites:W4...,[],2023-02-24T14:21:36.241449,2023-01-07


# Nombre de publications par an

In [354]:
df.groupby('publication_year').id.count()

publication_year
2016    51
2017    43
2018    41
2019    61
2020    59
2021    77
2022    49
2023     4
Name: id, dtype: int64

# Publications par domaines et par an

In [355]:
concepts_year = []
for p in x:
    publication_year = p['publication_year']
    has_concept = False
    concepts = p.get('concepts')
    sorted(concepts, key = lambda e: e['score'])
    for c in p.get('concepts'):
        if c.get('level') == 0:
            elt = {'publication_year': publication_year}
            elt.update(c)
            concepts_year.append(elt)
            break # on ne garde que le premier concept avec le score le plus élevé
    if has_concept is False:
        concepts_year.append({'publication_year': publication_year, 'display_name': 'N/A'})
        
df_concept = pd.DataFrame(concepts_year)
pd.pivot_table(df_concept, columns=['display_name'], 
               index='publication_year', values='id', aggfunc='count', margins=True).fillna(0)

display_name,Art,Biology,Business,Chemistry,Computer science,Economics,Engineering,Environmental science,Geography,Geology,History,Mathematics,Medicine,N/A,Philosophy,Political science,Psychology,Sociology,All
publication_year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
2016,1.0,1.0,12.0,2.0,3.0,20.0,0.0,1.0,2.0,0.0,0.0,2.0,0.0,0.0,0.0,2.0,3.0,2.0,51
2017,0.0,2.0,7.0,0.0,0.0,20.0,0.0,0.0,1.0,0.0,0.0,2.0,0.0,0.0,3.0,6.0,1.0,1.0,43
2018,0.0,0.0,11.0,0.0,2.0,21.0,0.0,3.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,41
2019,1.0,4.0,13.0,0.0,4.0,28.0,0.0,3.0,1.0,1.0,0.0,1.0,0.0,0.0,2.0,1.0,2.0,0.0,61
2020,0.0,3.0,12.0,0.0,3.0,30.0,0.0,1.0,4.0,0.0,1.0,1.0,0.0,0.0,0.0,2.0,2.0,0.0,59
2021,0.0,0.0,24.0,0.0,6.0,24.0,0.0,4.0,2.0,0.0,0.0,1.0,2.0,0.0,0.0,7.0,4.0,3.0,77
2022,0.0,0.0,14.0,0.0,1.0,17.0,1.0,4.0,4.0,0.0,0.0,0.0,2.0,0.0,0.0,4.0,1.0,1.0,49
2023,0.0,0.0,3.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4
All,2.0,10.0,96.0,2.0,19.0,161.0,1.0,16.0,17.0,1.0,1.0,7.0,4.0,0.0,5.0,23.0,13.0,7.0,385


# Top revues

In [356]:
df.host_venue.apply(lambda x: x['display_name'] ).value_counts().head(20)

RePEc: Research Papers in Economics                           50
Social Science Research Network                               17
HAL (Le Centre pour la Communication Scientifique Directe)    11
Applied Economics                                             10
SpringerBriefs in well-being and quality of life research      9
Management international                                       8
India studies in business and economics                        8
Ecological Economics                                           8
Springer International Publishing eBooks                       7
Research Policy                                                7
Journal of Evolutionary Economics                              6
Environmental Modeling & Assessment                            6
Journal of Public Economic Theory                              5
European Journal of Law and Economics                          5
Frontiers in economic history                                  5
Resource and Energy Econo

# Top auteurs

In [357]:
def input_in_string(input_str, aff_str):
    if aff_str is None:
        return False
    for a in normalize(input_str).split(' '):
        if a not in normalize(aff_str):
            return False
    return True

def input_in_authorships(input_str, input_type, authorship):
    if input_type == 'raw_affiliation_string':
        return input_in_string(input_str, authorship.get('raw_affiliation_string'))
    else:
        field = input_type.split('.')[-1]
        for i in authorship.get('institutions', []):
            if i and normalize(input_str) in normalize(i.get(field)):
                return True
    return False
            

In [358]:
auteurs_data_input1 = []
auteurs_data_input2 = []
auteurs_data_other = []
for p in x:
    for a in p.get('authorships'):
        if input_in_authorships(affiliation1_str, affiliation1_type, a):
            elt = {'doi': p['doi'], 'raw_affiliation_string': a['raw_affiliation_string']}
            elt.update(a['author'])
            auteurs_data_input1.append(elt)
            break
        elif affiliation2_str and input_in_authorships(affiliation2_str, affiliation2_type, a):
            elt = {'doi': p['doi'], 'raw_affiliation_string': a['raw_affiliation_string']}
            elt.update(a['author'])
            auteurs_data_input2.append(elt)
            break
        else:
            elt = {'doi': p['doi'], 'raw_affiliation_string': a['raw_affiliation_string']}
            elt.update(a['author'])
            auteurs_data_other.append(elt)
            break
print('auteurs input1')
print()
print(pd.DataFrame(auteurs_data_input1).display_name.value_counts().head(10))
print()
print('---')
print('auteur fr input2')
print()
if auteurs_data_input2:
    print(pd.DataFrame(auteurs_data_input2).display_name.value_counts().head(10))
print('---')
print('auteur fr other')
print()
print(pd.DataFrame(auteurs_data_other).display_name.value_counts().head(10))

auteurs input1

Claude Diebolt         28
Blaise Gnimassoun      10
Olivier Damette         8
Marielle Brunette       7
Kene Boun My            6
Phu Nguyen-Van          5
Emilie Ruiz             5
Véronique Schaeffer     5
Laté Ayao Lawson        4
Derya Keles             4
Name: display_name, dtype: int64

---
auteur fr input2

---
auteur fr other

Stefano Bosi                17
Gaël Brulé                   9
Tommaso Ciarli               3
Patricia Laurens             3
Benjamin Ouvrard             3
Stefano Bianchini            3
Ambroise Descamps            3
Dramane Coulibaly            3
Valentin Bellassen           3
Stefano Horst Baruffaldi     2
Name: display_name, dtype: int64


# Top partenaires - données brutes

In [359]:
partenaires_data_brut = []
signature_data = []
known_keys = [] # on ne compte qu'une fois un couple publi x affiliation qu'il y ait 1 ou 20 auteurs
for p in x:
    for a in p.get('authorships'):
        for i in a['raw_affiliation_string'].strip().split(';'):
            if i and p['doi']:
                elt_key = p['doi']+';'+i
                elt = {'doi': p['doi'], 'raw_affiliation_string': i.strip()}
                if elt_key in known_keys:
                    continue
                else:
                    known_keys.append(elt_key)
                if affiliation1_type == 'raw_affiliation_string':
                    if input_in_string(affiliation1_str, i):
                        signature_data.append(elt)
                    else:
                        partenaires_data_brut.append(elt)
                else:
                    if input_in_authorships(affiliation1_str, affiliation1_type, a):
                        signature_data.append(elt)
                    else:
                        partenaires_data_brut.append(elt)
                    
                
            
pd.DataFrame(partenaires_data_brut).raw_affiliation_string.value_counts().head(10)

Forward College, Université de Neuchâtel, Neuchâtel, Switzerland                               9
Southampton Business School, University of Southampton, Southampton, UK                        8
(Business School, University of Roehampton, London, UK)                                        8
Department for VET and Skills, Cedefop, Thessaloniki, Greece                                   7
University of Angers - Bureau of Economic Theory and Application (BETA)                        4
Groupe d'Analyse et de Théorie Economique (GATE), CNRS, University of Lyon                     4
Toulouse School of Economics, INRA, University of Toulouse Capitole, Toulouse, France.         3
Centre d'Economie et de Sociologie Rurales Appliquées à l'Agriculture et aux Espaces Ruraux    3
Europäische Kommission, Joint Research Centre (JRC), Ispra, Italien                            3
Universal Scientific Education and Research Network (USERN)                                    3
Name: raw_affiliation_string, 

In [360]:
pd.DataFrame(signature_data).raw_affiliation_string.value_counts().head(10)

BETA/CNRS, Faculty of Economics, University of Strasbourg, Strasbourg, France                              9
Faculté des Sciences Economiques et de Gestion BETA-CNRS, Université de Strasbourg, Strasbourg, France     9
Faculty of Economics, BETA/CNRS, Strasbourg, France                                                        8
Université de Strasbourg, Université de Lorraine, CNRS, BETA, Strasbourg, France                           8
Université de Lorraine, Université de Strasbourg, AgroParisTech, CNRS, INRAE, BETA, Nancy, France          7
Université de Lorraine, Université de Strasbourg, AgroParisTech, CNRS, INRA, BETA, 54000, Nancy, France    7
Université de Lorraine, Université de Strasbourg, CNRS, BETA, Nancy, France.                               6
BETA/CNRS Université de Strasbourg, Strasbourg, France                                                     4
Université de Lorraine, Université de Strasbourg,  AgroParisTech, CNRS, Inra, BETA, Nancy, France          4
BETA-CNRS, Universi

# Top partenaires - données normalisées par OpenAlex

In [361]:
inst1, inst2, inst3 = [], [], []
known_keys1, known_keys2, known_keys3  = [], [], []
for p in x:
    for a in p.get('authorships'):
        if input_in_authorships(affiliation1_str, affiliation1_type, a):
            for i in a.get('institutions'):
                if p.get('doi') and i.get('display_name'):
                    elt_key = p['doi']+';'+i['display_name']
                    elt = {'doi': p['doi'], 'display_name': i['display_name'], 'country_code': i['country_code'], 
                          'concepts': p['concepts'] }
                    if elt_key not in known_keys1:
                        inst1.append(elt)
                        known_keys1.append(elt_key)
        elif affiliation2_str and input_in_authorships(affiliation2_str, affiliation2_type, a):
            for i in a.get('institutions'):
                if p.get('doi') and i.get('display_name'):
                    elt_key = p['doi']+';'+i['display_name']
                    elt = {'doi': p['doi'], 'display_name': i['display_name'], 'country_code': i['country_code'], 
                          'concepts': p['concepts'] }
                    if elt_key not in known_keys2:
                        inst2.append(elt)
                        known_keys2.append(elt_key)
        else:
            for i in a.get('institutions'):
                if p.get('doi') and i.get('display_name'):
                    elt_key = p['doi']+';'+i['display_name']
                    elt = {'doi': p['doi'], 'display_name': i['display_name'], 'country_code': i['country_code'], 
                          'concepts': p['concepts'] }
                    if elt_key not in known_keys3:
                        inst3.append(elt)
                        known_keys3.append(elt_key)

df_inst1 = pd.DataFrame(inst1)
print('instiutions affiliation 1')
print()
print(df_inst1.display_name.value_counts().head(10))
print()
print('-----')
df_inst2 = pd.DataFrame(inst2)
print('instiutions affiliation 2')
print()
if inst2:
    print(df_inst2.display_name.value_counts().head(10))
print()
print('-----')
df_inst3 = pd.DataFrame(inst3)
print('autres partenaires')
print()
print(df_inst3.display_name.value_counts().head(10))
print()
print('-----')

instiutions affiliation 1

University of Lorraine                                                                                     82
University of Strasbourg                                                                                   76
Beta                                                                                                       53
Bureau for Economic Theory and Applications                                                                11
BETA/CNRS, Faculty of Economics, University of Strasbourg, Strasbourg, France                               9
Agro ParisTech                                                                                              8
Université de Lorraine, Université de Strasbourg, AgroParisTech, CNRS, INRA, BETA, 54000, Nancy, France     7
Paris Dauphine University                                                                                   7
University of Upper Alsace                                                                   

# Top pays partenaires

In [362]:
df_inst3.country_code.value_counts().head(10)

FR    205
GB     49
IT     40
ES     40
DE     25
US     25
CH     17
DK     16
NL     16
CA     10
Name: country_code, dtype: int64

# Thématiques quand collab avec un pays

In [366]:
partner = 'IT'

In [367]:
concepts_with_partner = []
for row in df_inst3[df_inst3.country_code==partner].itertuples():
    current_concepts =  [c for c in row.concepts if c['level'] > 0]
    for e in current_concepts:
        e['doi'] = row.doi
    concepts_with_partner += current_concepts
pd.DataFrame(concepts_with_partner).display_name.value_counts().head(10)

Ecology                    17
Finance                    14
Law                         8
Microeconomics              6
Macroeconomics              6
Sustainability              5
Industrial organization     5
Mathematical analysis       5
Epistemology                5
Machine learning            5
Name: display_name, dtype: int64

In [365]:
#pd.DataFrame(concepts_with_partner)