In [1]:
import requests
import math
import pandas as pd
import json
import os

In [2]:
os.system('mkdir -p cache')

0

In [3]:
import re
from tokenizers import normalizers
from tokenizers.normalizers import BertNormalizer, Sequence, Strip
from tokenizers import pre_tokenizers
from tokenizers.pre_tokenizers import Whitespace

normalizer = Sequence([BertNormalizer(clean_text=True,
        handle_chinese_chars=True,
        strip_accents=True,
        lowercase=True), Strip()])
pre_tokenizer = pre_tokenizers.Sequence([Whitespace()])

def normalize(x, min_length = 1):
    if not isinstance(x, str):
        return ''
    normalized = normalizer.normalize_str(x)
    normalized = normalized.replace('\n', ' ')
    normalized = re.sub(' +', ' ', normalized)
    return " ".join([e[0] for e in pre_tokenizer.pre_tokenize_str(normalized) if len(e[0]) > min_length])

# Stats globale par pays

cf page 25 rapport OST sur les publis 2005-2018

In [4]:
def get_global_data():
    try:
        global_data = json.load(open('cache/global_data.json', 'r'))
    except:
        global_data = []
        for year in range(2005, 2023):
            print(year, end=',')
            r = requests.get(f'https://api.openalex.org/works?filter=publication_year:{year}&group-by=institutions.country_code').json()['group_by']
            for ix, e in enumerate(r):
                e['year'] = year
                e['rank'] = ix
            global_data += r
        json.dump(global_data, open('cache/global_data.json', 'w'))
    return global_data


In [70]:
global_data = get_global_data()
df_global = pd.DataFrame(global_data)
top_country = df_global[df_global.year==2021].head(10).key_display_name.tolist()
df_global_top = df_global[df_global.key_display_name.apply(lambda x:x in top_country)]
pd.pivot_table(df_global_top, index='year', columns="key_display_name", values='rank')

key_display_name,Brazil,China,France,Germany,India,Indonesia,Japan,United Kingdom of Great Britain and Northern Ireland,United States of America,unknown
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2005,11,2,6,5,12,50,4,3,1,0
2006,11,2,6,5,13,50,4,3,1,0
2007,11,2,5,6,12,46,4,3,1,0
2008,10,2,5,4,12,45,6,3,1,0
2009,10,2,5,4,12,44,6,3,1,0
2010,10,2,5,4,12,39,6,3,1,0
2011,9,2,5,4,11,23,6,3,1,0
2012,8,2,5,4,9,25,6,3,1,0
2013,8,2,4,5,7,24,6,3,1,0
2014,8,2,5,4,7,23,6,3,1,0


In [71]:
df_global[df_global.key_display_name=="France"]

Unnamed: 0,key,key_display_name,count,year,rank
6,FR,France,95101,2005,6
206,FR,France,110161,2006,6
405,FR,France,118395,2007,5
605,FR,France,125229,2008,5
805,FR,France,133732,2009,5
1005,FR,France,140662,2010,5
1205,FR,France,148754,2011,5
1405,FR,France,161599,2012,5
1604,FR,France,173802,2013,4
1805,FR,France,179044,2014,5


# Exploration sur une requête (affiliation)

In [7]:
first_year = 2016

In [8]:
affiliation1_str = 'Huawei'
affiliation1_type = 'raw_affiliation_string' 


affiliation2_str = 'france'
affiliation2_type = 'raw_affiliation_string'
#'institutions.country_code' 

thematic = None #'zoonosis' #'military'

#user_input = 'microsoft'
#collab_fr = True


assert (affiliation1_type in ['raw_affiliation_string', 'institutions.country_code', 'institutions.ror'])

def get_filename(affiliation1_str, affiliation1_type, affiliation2_str, affiliation2_type, thematic, random_size ):
    filename = f'cache/{normalize(affiliation1_str)}_{normalize(affiliation2_str)}_sample{random_size}_{normalize(thematic)}.json'.replace(' ', '_')
    return filename



In [9]:
BASE_URL = f"https://api.openalex.org/works?filter=publication_year:{first_year}-"

In [10]:
def get_search_field(f):
    if f in ['raw_affiliation_string']:
        return 'raw_affiliation_string.search'
    return f

def get_data(affiliation1_str, affiliation1_type, affiliation2_str, affiliation2_type, thematic ):
    url = BASE_URL + f",{get_search_field(affiliation1_type)}:{affiliation1_str}"
    if affiliation2_str:
        url += f",{get_search_field(affiliation2_type)}:{affiliation2_str}"
    if thematic:
        random_size = None
        url += f"&search={thematic}"
    else:
        random_size = 1000
        url += f"&sample={random_size}&seed=0"
        
    print(url)
    filename = get_filename(affiliation1_str, affiliation1_type, affiliation2_str, affiliation2_type, 
                            thematic, random_size )
    print(filename)
    try:
        data = json.load(open(filename, 'r'))
    except:
        results = []
        res = requests.get(url).json()
        nb_results = res['meta']['count']
        print(nb_results)
        nb_page = math.ceil(nb_results / res['meta']['per_page'])
        results = res['results']
        for p in range(2, nb_page + 1):
            print(p, end=',')
            res = requests.get(url+f"&page={p}").json()
            results += res['results']
        assert(len(results) == nb_results)
        data = {'results': results}
        json.dump(data, open(filename, 'w'))
    return data['results']
        

In [11]:
x = get_data(affiliation1_str, affiliation1_type, affiliation2_str, affiliation2_type, thematic)
df = pd.DataFrame(x)

https://api.openalex.org/works?filter=publication_year:2016-,raw_affiliation_string.search:Huawei,raw_affiliation_string.search:france&sample=1000&seed=0
cache/huawei_france_sample1000_.json
928
2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,

In [12]:
df

Unnamed: 0,id,doi,title,display_name,relevance_score,publication_year,publication_date,ids,primary_location,host_venue,...,best_oa_location,alternate_host_venues,referenced_works,related_works,ngrams_url,abstract_inverted_index,cited_by_api_url,counts_by_year,updated_date,created_date
0,https://openalex.org/W4283722879,https://doi.org/10.1109/codit55151.2022.9803922,High-level Colored Time Petri Nets for true co...,High-level Colored Time Petri Nets for true co...,0.999033,2022,2022-05-17,{'openalex': 'https://openalex.org/W4283722879...,"{'is_oa': None, 'landing_page_url': 'https://d...","{'id': 'https://openalex.org/S4306402512', 'is...",...,"{'is_oa': True, 'landing_page_url': 'https://h...","[{'id': 'https://openalex.org/S4306402512', 'd...","[https://openalex.org/W1498228645, https://ope...","[https://openalex.org/W1506579115, https://ope...",https://api.openalex.org/works/W4283722879/ngrams,"{'The': [0], 'control': [1], 'of': [2, 54, 108...",https://api.openalex.org/works?filter=cites:W4...,"[{'year': 2022, 'cited_by_count': 1}]",2023-02-28T03:16:45.177783,2022-07-01
1,https://openalex.org/W2952027799,,Quantized Compute-and-Forward for Limited Back...,Quantized Compute-and-Forward for Limited Back...,0.997492,2016,2016-05-21,{'openalex': 'https://openalex.org/W2952027799...,"{'is_oa': None, 'landing_page_url': None, 'pdf...","{'id': 'https://openalex.org/S4306402512', 'is...",...,,"[{'id': 'https://openalex.org/S4306402512', 'd...",[],"[https://openalex.org/W334746501, https://open...",https://api.openalex.org/works/W2952027799/ngrams,,https://api.openalex.org/works?filter=cites:W2...,[],2023-02-28T01:35:13.385691,2019-06-27
2,https://openalex.org/W3048362310,https://doi.org/10.1109/newcas49341.2020.9159799,77.8 GHz Standing-wave Oscillator Based on a T...,77.8 GHz Standing-wave Oscillator Based on a T...,0.995782,2020,2020-06-16,{'openalex': 'https://openalex.org/W3048362310...,"{'is_oa': None, 'landing_page_url': 'https://d...","{'id': 'https://openalex.org/S4306420031', 'is...",...,,"[{'id': 'https://openalex.org/S4306420031', 'd...","[https://openalex.org/W1966715801, https://ope...","[https://openalex.org/W1506906738, https://ope...",https://api.openalex.org/works/W3048362310/ngrams,"{'This': [0], 'paper': [1], 'presents': [2], '...",https://api.openalex.org/works?filter=cites:W3...,"[{'year': 2023, 'cited_by_count': 1}, {'year':...",2023-02-27T15:36:07.035830,2020-08-13
3,https://openalex.org/W2980604288,https://doi.org/10.1109/cleoe-eqec.2019.8873256,Probabilistic Shaping and its Applications for...,Probabilistic Shaping and its Applications for...,0.994932,2019,2019-06-23,{'openalex': 'https://openalex.org/W2980604288...,"{'is_oa': None, 'landing_page_url': 'https://d...","{'id': 'https://openalex.org/S4306418370', 'is...",...,,"[{'id': 'https://openalex.org/S4306418370', 'd...","[https://openalex.org/W627952176, https://open...","[https://openalex.org/W1795443043, https://ope...",https://api.openalex.org/works/W2980604288/ngrams,"{'Communication': [0], 'channels': [1], 'often...",https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2021, 'cited_by_count': 1}]",2023-02-26T19:55:17.732519,2019-10-25
4,https://openalex.org/W3004202754,https://doi.org/10.1109/tgcn.2020.2969422,QoS- and Energy-Aware Optimal Resource Allocat...,QoS- and Energy-Aware Optimal Resource Allocat...,0.991647,2020,2020-01-27,{'openalex': 'https://openalex.org/W3004202754...,"{'is_oa': None, 'landing_page_url': 'https://d...","{'id': 'https://openalex.org/S4210192662', 'is...",...,,"[{'id': 'https://openalex.org/S4210192662', 'd...","[https://openalex.org/W1994112296, https://ope...","[https://openalex.org/W1530677187, https://ope...",https://api.openalex.org/works/W3004202754/ngrams,"{'Due': [0], 'to': [1, 21, 72, 170], 'the': [2...",https://api.openalex.org/works?filter=cites:W3...,"[{'year': 2022, 'cited_by_count': 2}, {'year':...",2023-02-25T02:37:35.013342,2020-02-07
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
923,https://openalex.org/W4320351064,,Meta-learning of Black-box Solvers Using Deep ...,Meta-learning of Black-box Solvers Using Deep ...,0.006874,2022,2022-12-02,{'openalex': 'https://openalex.org/W4320351064'},"{'is_oa': True, 'landing_page_url': 'https://h...","{'id': 'https://openalex.org/S4306402512', 'is...",...,"{'is_oa': True, 'landing_page_url': 'https://h...","[{'id': 'https://openalex.org/S4306402512', 'd...",[],"[https://openalex.org/W3014300295, https://ope...",https://api.openalex.org/works/W4320351064/ngrams,,https://api.openalex.org/works?filter=cites:W4...,[],2023-03-04T03:49:10.441768,2023-02-13
924,https://openalex.org/W4293057678,https://doi.org/10.1109/vtc2022-spring54318.20...,Enhancing the 5G-V2X Sidelink Autonomous Mode ...,Enhancing the 5G-V2X Sidelink Autonomous Mode ...,0.006684,2022,2022-06-01,{'openalex': 'https://openalex.org/W4293057678...,"{'is_oa': False, 'landing_page_url': 'https://...","{'id': None, 'issn_l': None, 'issn': None, 'di...",...,,"[{'id': None, 'display_name': '2022 IEEE 95th ...","[https://openalex.org/W40343472, https://opena...","[https://openalex.org/W1573141687, https://ope...",https://api.openalex.org/works/W4293057678/ngrams,"{'Efforts': [0], 'are': [1], 'underway': [2], ...",https://api.openalex.org/works?filter=cites:W4...,[],2023-03-05T19:24:49.635750,2022-08-25
925,https://openalex.org/W4211230541,https://doi.org/10.1109/icnsc52481.2021.9702248,Approximate solutions for a special pagination...,Approximate solutions for a special pagination...,0.002686,2021,2021-12-03,{'openalex': 'https://openalex.org/W4211230541...,"{'is_oa': None, 'landing_page_url': 'https://d...","{'id': None, 'issn_l': None, 'issn': None, 'di...",...,,"[{'id': None, 'display_name': '2021 IEEE Inter...",[],"[https://openalex.org/W1973805410, https://ope...",https://api.openalex.org/works/W4211230541/ngrams,"{'In': [0], 'this': [1, 43], 'paper,': [2], 'w...",https://api.openalex.org/works?filter=cites:W4...,[],2023-02-19T17:16:38.699865,2022-02-13
926,https://openalex.org/W3183615332,,The Graph Neural Networking Challenge: A World...,The Graph Neural Networking Challenge: A World...,0.001893,2021,2021-07-26,{'openalex': 'https://openalex.org/W3183615332...,"{'is_oa': True, 'landing_page_url': 'http://ar...","{'id': 'https://openalex.org/S4306400194', 'is...",...,"{'is_oa': True, 'landing_page_url': 'http://ar...","[{'id': 'https://openalex.org/S4306400194', 'd...","[https://openalex.org/W2020634547, https://ope...","[https://openalex.org/W12204934, https://opena...",https://api.openalex.org/works/W3183615332/ngrams,"{'During': [0, 61], 'the': [1, 14, 40, 63, 70,...",https://api.openalex.org/works?filter=cites:W3...,"[{'year': 2021, 'cited_by_count': 1}]",2023-02-27T23:03:52.968932,2021-08-02


# Nombre de publications par an

In [13]:
df.groupby('publication_year').id.count()

publication_year
2016    165
2017    148
2018    162
2019    133
2020    116
2021    111
2022     86
2023      7
Name: id, dtype: int64

# Publications par domaines et par an

In [14]:
concepts_year = []
for p in x:
    publication_year = p['publication_year']
    has_concept = False
    concepts = p.get('concepts')
    sorted(concepts, key = lambda e: e['score'])
    for c in p.get('concepts'):
        if c.get('level') == 0:
            elt = {'publication_year': publication_year}
            elt.update(c)
            concepts_year.append(elt)
            break # on ne garde que le premier concept avec le score le plus élevé
    if has_concept is False:
        concepts_year.append({'publication_year': publication_year, 'display_name': 'N/A'})
        
df_concept = pd.DataFrame(concepts_year)
pd.pivot_table(df_concept, columns=['display_name'], 
               index='publication_year', values='id', aggfunc='count', margins=True).fillna(0)

display_name,Business,Computer science,Engineering,Environmental science,Materials science,Mathematics,N/A,Physics,Political science,All
publication_year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2016,0.0,155.0,0.0,0.0,1.0,6.0,0.0,3.0,0.0,165
2017,0.0,136.0,0.0,0.0,0.0,5.0,0.0,6.0,1.0,148
2018,0.0,147.0,0.0,0.0,1.0,8.0,0.0,6.0,0.0,162
2019,0.0,121.0,2.0,1.0,1.0,6.0,0.0,2.0,0.0,133
2020,0.0,100.0,1.0,0.0,4.0,8.0,0.0,3.0,0.0,116
2021,1.0,101.0,0.0,1.0,3.0,5.0,0.0,0.0,0.0,111
2022,0.0,81.0,0.0,0.0,0.0,3.0,0.0,1.0,1.0,86
2023,0.0,6.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,7
All,1.0,847.0,3.0,2.0,10.0,42.0,0.0,21.0,2.0,928


# Top revues

In [16]:
df.host_venue.apply(lambda x: x['display_name'] ).value_counts().head(20)

HAL (Le Centre pour la Communication Scientifique Directe)                         166
arXiv (Cornell University)                                                         142
International Conference on Communications                                          37
IEEE Transactions on Wireless Communications                                        24
IEEE Transactions on Communications                                                 18
IEEE Journal on Selected Areas in Communications                                    15
IEEE Transactions on Information Theory                                             14
Journal of Lightwave Technology                                                     13
IEEE Access                                                                         13
IEEE Communications Letters                                                         13
International Workshop on Signal Processing Advances in Wireless Communications     12
Personal, Indoor and Mobile Radio Communica

# Top auteurs

In [17]:
def input_in_string(input_str, aff_str):
    if aff_str is None:
        return False
    for a in normalize(input_str).split(' '):
        if a not in normalize(aff_str):
            return False
    return True

def input_in_authorships(input_str, input_type, authorship):
    if input_type == 'raw_affiliation_string':
        return input_in_string(input_str, authorship.get('raw_affiliation_string'))
    else:
        field = input_type.split('.')[-1]
        for i in authorship.get('institutions', []):
            if i and normalize(input_str) in normalize(i.get(field)):
                return True
    return False
            

In [18]:
auteurs_data_input1 = []
auteurs_data_input2 = []
auteurs_data_other = []
for p in x:
    for a in p.get('authorships'):
        if input_in_authorships(affiliation1_str, affiliation1_type, a):
            elt = {'doi': p['doi'], 'raw_affiliation_string': a['raw_affiliation_string']}
            elt.update(a['author'])
            auteurs_data_input1.append(elt)
            break
        elif affiliation2_str and input_in_authorships(affiliation2_str, affiliation2_type, a):
            elt = {'doi': p['doi'], 'raw_affiliation_string': a['raw_affiliation_string']}
            elt.update(a['author'])
            auteurs_data_input2.append(elt)
            break
        else:
            elt = {'doi': p['doi'], 'raw_affiliation_string': a['raw_affiliation_string']}
            elt.update(a['author'])
            auteurs_data_other.append(elt)
            break
print('auteurs input1')
print()
print(pd.DataFrame(auteurs_data_input1).display_name.value_counts().head(10))
print()
print('---')
print('auteur fr input2')
print()
if auteurs_data_input2:
    print(pd.DataFrame(auteurs_data_input2).display_name.value_counts().head(10))
print('---')
print('auteur fr other')
print()
print(pd.DataFrame(auteurs_data_other).display_name.value_counts().head(10))

auteurs input1

Inaki Estella Aguerri        16
Meryem Benammar              15
Apostolos Destounis          15
Valerio Bioglio              15
Italo Atzeni                 12
Paul Ferrand                 12
Georgios S. Paschos          11
George C. Alexandropoulos    10
Alexis Decurninge             9
Pierre Escamilla              8
Name: display_name, dtype: int64

---
auteur fr input2

Zheng Chen             6
Marco Di Renzo         4
A. S. Bemani           4
Ejder Bastug           4
Ali Mokh               3
Maialen Larranaga      3
Juan Antonio García    3
Zakaria Ye             3
Antonio Campello       3
George Dasoulas        3
Name: display_name, dtype: int64
---
auteur fr other

Mohammad Mozaffari       15
Deepak Mishra            10
Ahmed El Shafie           8
Alessio Zappone           7
Nikolaos I. Miridakis     7
Chongwen Huang            7
Asma Mabrouk              7
Qurrat-Ul-Ain Nadeem      6
Antonio Campello          6
Nikolaos Pappas           5
Name: display_name, dtyp

# Top partenaires - données brutes

In [19]:
partenaires_data_brut = []
signature_data = []
known_keys = [] # on ne compte qu'une fois un couple publi x affiliation qu'il y ait 1 ou 20 auteurs
for p in x:
    for a in p.get('authorships'):
        for i in a['raw_affiliation_string'].strip().split(';'):
            if i and p['doi']:
                elt_key = p['doi']+';'+i
                elt = {'doi': p['doi'], 'raw_affiliation_string': i.strip()}
                if elt_key in known_keys:
                    continue
                else:
                    known_keys.append(elt_key)
                if affiliation1_type == 'raw_affiliation_string':
                    if input_in_string(affiliation1_str, i):
                        signature_data.append(elt)
                    else:
                        partenaires_data_brut.append(elt)
                else:
                    if input_in_authorships(affiliation1_str, affiliation1_type, a):
                        signature_data.append(elt)
                    else:
                        partenaires_data_brut.append(elt)
                    
                
            
pd.DataFrame(partenaires_data_brut).raw_affiliation_string.value_counts().head(10)

[Centre for wireless communications, University of Oulu, Oulu, Finland]                                                 13
EURECOM, Sophia, Antipolis, France#TAB#                                                                                  7
Germany                                                                                                                  7
TELECOM ParisTech , Paris, France                                                                                        5
[Centre for Wireless Communications, University of Oulu, FINLAND]                                                        5
[HANA Research Laboratory, ENSI, Manouba University, Manouba, Tunisia]                                                   5
CERI/LIA, Univ. of Avignon, Avignon, France                                                                              4
Radboud University, Institute for Molecules and Materials, NL-6525 AJ Nijmegen, The Netherlands                          4
Electrical Engin

In [20]:
pd.DataFrame(signature_data).raw_affiliation_string.value_counts().head(10)

Mathematical and Algorithmic Sciences Lab, Huawei France R&D, Paris, France                                                                    23
HUAWEI Technologies France                                                                                                                     20
Mathematical and Algorithmic Sciences Lab, France Research Center, Huawei Technologies Co. Ltd                                                 20
Mathematical and Algorithmic Sciences Laboratory, Paris Research Center, Huawei Technologies France SASU, Boulogne-Billancourt, France         10
[Mathematical and Algorithmic Sciences Laboratory, France Research Center, Huawei Technologies Company, Ltd., Boulogne-Billancourt, France]    10
Mathematical and Algorithmic Sciences Lab, France Research Center, Huawei Technologies Co. Ltd., France#TAB#                                   10
Mathematical and Algorithmic Sciences Lab, France, Research Center, Huawei Technologies France SASU                         

# Top partenaires - données normalisées par OpenAlex

In [21]:
inst1, inst2, inst3 = [], [], []
known_keys1, known_keys2, known_keys3  = [], [], []
for p in x:
    for a in p.get('authorships'):
        if input_in_authorships(affiliation1_str, affiliation1_type, a):
            for i in a.get('institutions'):
                if p.get('doi') and i.get('display_name'):
                    elt_key = p['doi']+';'+i['display_name']
                    elt = {'doi': p['doi'], 'display_name': i['display_name'], 'country_code': i['country_code'], 
                          'concepts': p['concepts'] }
                    if elt_key not in known_keys1:
                        inst1.append(elt)
                        known_keys1.append(elt_key)
        elif affiliation2_str and input_in_authorships(affiliation2_str, affiliation2_type, a):
            for i in a.get('institutions'):
                if p.get('doi') and i.get('display_name'):
                    elt_key = p['doi']+';'+i['display_name']
                    elt = {'doi': p['doi'], 'display_name': i['display_name'], 'country_code': i['country_code'], 
                          'concepts': p['concepts'] }
                    if elt_key not in known_keys2:
                        inst2.append(elt)
                        known_keys2.append(elt_key)
        else:
            for i in a.get('institutions'):
                if p.get('doi') and i.get('display_name'):
                    elt_key = p['doi']+';'+i['display_name']
                    elt = {'doi': p['doi'], 'display_name': i['display_name'], 'country_code': i['country_code'], 
                          'concepts': p['concepts'] }
                    if elt_key not in known_keys3:
                        inst3.append(elt)
                        known_keys3.append(elt_key)

df_inst1 = pd.DataFrame(inst1)
print('instiutions affiliation 1')
print()
print(df_inst1.display_name.value_counts().head(10))
print()
print('-----')
df_inst2 = pd.DataFrame(inst2)
print('instiutions affiliation 2')
print()
if inst2:
    print(df_inst2.display_name.value_counts().head(10))
print()
print('-----')
df_inst3 = pd.DataFrame(inst3)
print('autres partenaires')
print()
print(df_inst3.display_name.value_counts().head(10))
print()
print('-----')

instiutions affiliation 1

Huawei Technologies                                                                                                                          438
HUAWEI Technologies France                                                                                                                    19
Huawei German Research Center                                                                                                                 16
Mathematical and Algorithmic Sciences Laboratory, Paris Research Center, Huawei Technologies France SASU, Boulogne-Billancourt, France        10
Mathematical and Algorithmic Sciences Lab, France Research Center, Huawei Technologies Co. Ltd., France#TAB#                                  10
Huawei Technologies Paris, France.                                                                                                             8
[Mathematical and Algorithmic Sciences Laboratory, France Research Center, Huawei Technologies France S

# Top pays partenaires

In [22]:
df_inst3.country_code.value_counts().head(10)

US    126
DE     90
CN     84
IT     76
GB     66
SE     50
GR     46
ES     45
FI     43
CA     36
Name: country_code, dtype: int64

# Thématiques quand collab avec un pays

In [366]:
partner = 'IT'

In [367]:
concepts_with_partner = []
for row in df_inst3[df_inst3.country_code==partner].itertuples():
    current_concepts =  [c for c in row.concepts if c['level'] > 0]
    for e in current_concepts:
        e['doi'] = row.doi
    concepts_with_partner += current_concepts
pd.DataFrame(concepts_with_partner).display_name.value_counts().head(10)

Ecology                    17
Finance                    14
Law                         8
Microeconomics              6
Macroeconomics              6
Sustainability              5
Industrial organization     5
Mathematical analysis       5
Epistemology                5
Machine learning            5
Name: display_name, dtype: int64

In [365]:
#pd.DataFrame(concepts_with_partner)