In [1]:
# The expected input file must have the following columns:
# "ID Lattes" containing the 16-digit number associated with a Lattes CV 
# "ID Scholar" containing the 12-character code associated with a Google Scholar profile
# Extra colums will be ignored.
# The order of the columns does not matter
# The file should use tabs as separator of columns
# The file should use comma as decimal separator
import pandas as pd
file = 'professores.csv'
df = pd.read_csv(file, sep='\t', decimal=',', dtype={'ID Lattes': object})

In [2]:
from ipywidgets import FloatProgress
import bibliometrics

progress = FloatProgress(min=0, max=len(df)) # instantiate the bar
print("Processing", len(df), "researchers...")
display(progress) # display the bar

bibliometrics.setup(horizon=3)
for i, row in df.iterrows():
    profile = row.to_dict()
    if not pd.isnull(profile['ID Lattes']):
        profile.update(bibliometrics.lattes(profile['ID Lattes']))
    if not pd.isnull(profile['ID Scholar']):
        profile.update(bibliometrics.scholar(profile['ID Scholar']))
    profile.update(bibliometrics.normalized(profile))
    for key, value in profile.items():
        df.at[i, key] = value
    progress.value += 1 # signal to increment the progress bar
    
print("Finished.")
df

Processing 39 researchers...


A Jupyter Widget

Finished.


Unnamed: 0,Nome,ID Lattes,ID Scholar,Ano do Doutorado,Idade Acadêmica,Participações em Projetos (total),Projetos Coordenados (total),Projetos (total),Orientações de Mestrado (total),Orientações de Doutorado (total),...,Orientações (anual),Bancas de Mestrado (anual),Bancas de Doutorado (anual),Bancas (anual),Publicações em Congressos (anual),Publicações em Periódicos (anual),Publicações Indexadas JCR (anual),Publicações (anual),Citações (anual),H-Index (anual)
0,Alexandre Plastino de Carvalho,4985266524417261,axlvTZoAAAAJ,2000.0,17.0,2.0,5.0,7.0,23.0,7.0,...,1.764706,2.294118,1.117647,3.411765,3.941176,1.470588,0.764706,5.411765,39.0,0.764706
1,Aline Marins Paes Carvalho,506389215528790,PXVlXCEAAAAJ,2011.0,6.0,8.0,4.0,12.0,2.0,1.0,...,0.5,1.0,0.333333,1.333333,5.333333,0.833333,0.5,6.166667,25.333333,1.166667
2,Anselmo Antunes Montenegro,3518240071127311,Q_FnDrcAAAAJ,2003.0,14.0,10.0,3.0,13.0,13.0,3.0,...,1.142857,2.214286,1.428571,3.642857,4.357143,1.071429,0.357143,5.428571,36.571429,0.928571
3,Antonio Augusto de Aragao Rocha,5784860269030800,C_3SJ4oAAAAJ,2010.0,7.0,6.0,6.0,12.0,6.0,0.0,...,0.857143,1.857143,0.0,1.857143,5.285714,1.0,0.571429,6.285714,64.571429,1.428571
4,Aura Conci,5601388085745497,lojRGVgAAAAJ,1988.0,29.0,3.0,22.0,25.0,42.0,14.0,...,1.931034,1.758621,1.413793,3.172414,6.586207,1.758621,0.931034,8.344828,72.827586,0.862069
5,Bruno Lopes Vieira,7793315334001237,iWm5AjYAAAAJ,2014.0,3.0,11.0,2.0,13.0,0.0,0.0,...,0.0,0.666667,0.333333,1.0,5.333333,2.333333,0.333333,7.666667,21.0,1.333333
6,Carlos Alberto de Jesus Martinhon,2822582595834942,343VizIAAAAJ,1998.0,19.0,6.0,3.0,9.0,8.0,3.0,...,0.578947,1.157895,0.842105,2.0,1.473684,0.736842,0.578947,2.210526,16.105263,0.526316
7,Célio Vinicius Neves de Albuquerque,4641684220602580,cB78oMQAAAAJ,2000.0,17.0,5.0,9.0,14.0,24.0,7.0,...,1.823529,4.411765,2.117647,6.529412,6.058824,1.823529,1.176471,7.882353,111.117647,1.235294
8,Celso da Cruz Carneiro Ribeiro,3614186131432854,yGl1MwgAAAAJ,1983.0,34.0,0.0,0.0,0.0,36.0,25.0,...,1.794118,0.294118,0.529412,0.823529,1.676471,3.970588,2.647059,5.647059,269.176471,1.382353
9,Christiano de Oliveira Braga,535266455387139,zYumbMwAAAAJ,2001.0,16.0,20.0,3.0,23.0,5.0,1.0,...,0.375,0.5,0.5,1.0,2.25,0.9375,0.3125,3.1875,35.0,0.875


In [3]:
df.to_csv(file, sep='\t', decimal=',', index=False)