# Publications in OpenAlex of "Profesores de Cátedra 2023-1" from 2019

## UdeA articles in OpenAlex from 2019 and until 10 authors

In [2]:
import requests
from IPython.display import JSON
import time
import pandas as pd
import unidecode as ud
from fuzzywuzzy import process
import getpass

In [2]:
def pagination(base_url,count_levels,results_key,page_key,per_page_key,per_page_value=100,sleep=0.1):
    '''
    Pagination for API with url:
    
     `f"{base_url}&{page_key}=1&{per_page_value}=100"`
    
    with the following minimal data scheme:
    ```
     {'level 0 to count key': dict,
         ...
         {'level n to count key': int},
      results_key: list, # with the results
     } 
    
    ```
    OpenAlex example
    ```
     {'meta': 
         {'count': 1223,...},
      'results': [...]
     } 
    ```
    → count_levels=[{'level':0,'to_count_key':'meta'},{'level':1,'to_count_key':'count'}] 
    '''
    per_page_value=per_page_value
    page=1
    r=[]
    j=requests.get(f'{base_url}&{page_key}={page}&{per_page_key}={per_page_value}')
    if j.status_code==200:
        count=j.json()
        for l in set([d.get('level') for d in count_levels]):
            count=count.get( [d.get('to_count_key') for d in count_levels if d.get('level')==l][0]  )

    if isinstance(count,int) and count:
        r = r+j.json().get(results_key) # First page
        npages=count//per_page_value
        if count%per_page_value:
            npages+=1
            
            
    for page in range(2,npages+1):
        print(page,end='\r')
        url=f'{base_url}&{page_key}={page}&{per_page_key}={per_page_value}'
        j=requests.get(url)
        time.sleep(sleep) # Avoid overload the API
        if j.status_code==200:
            r = r+j.json().get(results_key) # First page
    
    return r

In [3]:
base_url='https://api.openalex.org/works?filter=authorships.institutions.id:I35961687,authors_count:1-10,publication_year:2019-2023'
count_levels=[{'level':0,'to_count_key':'meta'},{'level':1,'to_count_key':'count'}] # data scheme for count in JSON
per_page_key='per-page'
page_key='page'
results_key='results' # Must be at the first level in the JSON output

In [4]:
r=pagination(base_url,count_levels,results_key,page_key,per_page_key,per_page_value=100,sleep=0.1)

81

In [5]:
len(r)

7933

## List of "Profesores de Cátedra 2023-1"

In [8]:
pc=pd.read_excel('https://docs.google.com/spreadsheets/d/e/2PACX-1vSLsgufZB-CrICillvQufKIFOJ0oILmrSflfud2MH6EoTn5sHsmklRq4N8LIo4SVPFjrWor0aUBSNWA/pub?output=xlsx')

### Fix wrong last names

In [9]:
d=pc[pc['Segundo apellido'].fillna('').str.contains('CARVAJAL')].iloc[0].to_dict()
d['Nombres']=d['Nombres'].replace('DAVID','A')
pa=d['Primer apellido']
d['Primer apellido']=d['Segundo apellido']
d['Segundo apellido']=pa

pc=pd.concat((pc,pd.DataFrame([d]))).reset_index(drop=True)

In [10]:
d=pc[pc['Nombres'].fillna('').str.contains('ANYERES')].iloc[0].to_dict()
pa=d['Primer apellido']
d['Primer apellido']=d['Segundo apellido']
d['Segundo apellido']=pa

pc=pd.concat((pc,pd.DataFrame([d]))).reset_index(drop=True)

### Creates auxiliary lists

In [11]:
pc['Primer nombre']=pc['Nombres'].str.split().str[0]
pc['Inicial segundo nombre']=pc['Nombres'].str.split().str[1].str[0]

In [12]:
catedra_short=pc.fillna('').apply(lambda row: ud.unidecode((row['Nombres'].strip().split()[0]+' '+row['Primer apellido'].strip()
                                ).strip().lower()),axis='columns').to_list()

In [13]:
catedra_initials=pc.fillna('').apply(lambda row: ud.unidecode((  ' '.join([n[0] for n in row['Nombres'].strip().split()])+' '+row['Primer apellido'].strip()+' '+row['Segundo apellido']
                                ).strip().lower()),axis='columns').to_list()

In [14]:
catedra_name_initial=pc.fillna('').apply(lambda row: ud.unidecode((row['Primer nombre'].strip()+' '+row['Inicial segundo nombre'].strip()+' '+row['Primer apellido'].strip()+' '+row['Segundo apellido']
                                ).strip().lower()),axis='columns').to_list()
catedra_name_initial_partial=pc.fillna('').apply(lambda row: ud.unidecode((row['Primer nombre'].strip()+' '+row['Inicial segundo nombre'].strip()+' '+row['Primer apellido'].strip()
                                ).strip().lower()),axis='columns').to_list()
catedra_name_initial_partial2=pc.fillna('').apply(lambda row: ud.unidecode((row['Primer nombre'].strip()+' '+row['Inicial segundo nombre'].strip()+' '+row['Segundo apellido'].strip()
                                ).strip().lower()),axis='columns').to_list()

In [15]:
catedra=pc.fillna('').apply(lambda row: ud.unidecode((row['Nombres'].strip()+' '+row['Primer apellido'].strip()+' '+row['Segundo apellido']
                                ).strip().lower()),axis='columns').to_list()

In [16]:
catedra_first_last_name=pc.fillna('').apply(lambda row: ud.unidecode((row['Primer apellido'].strip()
                                ).strip().lower()),axis='columns').to_list()

## Extract authors from the list "Profesores de Cátedra 2023-1"

In [17]:
inst='I35961687'
for w in r:
    aus_inst=[]
    aus_ids=[]
    ids={}
    for d in w.get('authorships'):
        if d.get('institutions'):
            aus_inst=aus_inst+[d.get('author').get('display_name') for dd in d.get('institutions') if 
                      dd.get('id') and dd.get('id').split('/')[-1]==inst]
            aus_ids=aus_ids+[d.get('author').get('id') for dd in d.get('institutions') if 
                      dd.get('id') and dd.get('id').split('/')[-1]==inst]
            
    if aus_inst:
        aus_inst=[ud.unidecode(s.lower()).replace('-',' ').replace('.','') for s in aus_inst]
        
    ids=dict(zip(aus_inst,aus_ids))
        
    for au in aus_inst:
        #if au=='cristian a carvajal':
        #    raise Exception('ar')        
        if au in catedra: # Full names
            one=[au,100]
            w['institution_authorships']=aus_inst
            w['catedra_authorships']=one[0]
            w['catedra_id']=ids.get(au)
            break
        
        if au in catedra_initials: # Initial names and full last names
            one=[catedra[catedra_initials.index(au)],100]
            w['institution_authorships']=aus_inst
            w['catedra_authorships']=one[0]
            w['catedra_id']=ids.get(au)
            break
        
        if au in catedra_short: #First name and first last name
            one=[catedra[catedra_short.index(au)],100]
            w['institution_authorships']=aus_inst
            w['catedra_authorships']=one[0]
            w['catedra_id']=ids.get(au)
            break
        if au in catedra_name_initial: #First name - second initial and full last names
            one=[catedra[catedra_name_initial.index(au)],100]
            w['institution_authorships']=aus_inst
            w['catedra_authorships']=one[0]
            w['catedra_id']=ids.get(au)
            break
        if au in catedra_name_initial_partial: #First name - second initial and first last name
            one=[catedra[catedra_name_initial_partial.index(au)],100]
            w['institution_authorships']=aus_inst
            w['catedra_authorships']=one[0]
            w['catedra_id']=ids.get(au)
            break
        if au in catedra_name_initial_partial2: #First name - second initial and second last name
            one=[catedra[catedra_name_initial_partial2.index(au)],100]
            w['institution_authorships']=aus_inst
            w['catedra_authorships']=one[0]
            w['catedra_id']=ids.get(au)
            break

            
       
        one=process.extractOne(au,catedra)
        if one and set(au.split()).intersection(one[0].split()[-2:]) and au[0] in [s[0] for s in one[0].split()]:
            if len(au.split()[0])>1:
                if not set(au.split()).intersection(one[0].split()[:-2]):
                    one=[]
            if one and len(au.split())==4 and len(one[0].split())==4: #last names
                if au.split()[-2:]!=one[0].split()[-2:]:
                    one=[]
        else:
            one=[]
        #Quality check
        if one:
            #all initials must corresponds
            if set([s[0] for s in au.split()]).difference([s[0] for s in one[0].split()]):
                one=[]
        if one:
            #Last name must be included
            if catedra_first_last_name[catedra.index(one[0])] not in au.split():
                one=[]
        
        w['institution_authorships']=aus_inst
        if one:
            w['catedra_authorships']=one[0]
            w['catedra_id']=ids.get(au)
            break
        else:
            w['catedra_authorships']=''
            w['catedra_id']=''

        
    #if w['id']=='https://openalex.org/W2940095680':
    #    raise Exception('id')
        

### Filter by [OpenAlex level 0 concepts](https://docs.openalex.org/api-entities/concepts): 
* Physics
* Computer science
* Materials science

Exclude level 0 concepts: Medicine and Psychology

In [18]:
df=pd.DataFrame(r)

In [19]:
df['concepts_0']=df['concepts'].apply(lambda L: [d.get('display_name') for d in L if d.get('level')==0])

In [28]:
pd.set_option('display.max_colwidth',500)
pd.set_option('display.max_rows', 100)

In [31]:
cdf=df[df['catedra_authorships'].apply(len)!=0].reset_index(drop=True)
cdf=cdf[ (cdf.concepts_0.apply(lambda L: 'Physics' in L)) |
         (cdf.concepts_0.apply(lambda L: 'Computer science' in L)) |
         (cdf.concepts_0.apply(lambda L: 'Materials science' in L))
       ].reset_index(drop=True)

cdf=cdf[ ~(  
       (cdf.concepts_0.apply(lambda L: 'Medicine' in L)) |
       (cdf.concepts_0.apply(lambda L: 'Psychology' in L))
      )
       ].reset_index(drop=True)

### Total results:

In [33]:
cdf.shape

(61, 38)

In [34]:
cdf['Journal']=cdf.locations.apply(lambda L: [d.get('source') for d in L]).str[0].str.get('display_name')

In [36]:
cdf=cdf[['title','Journal','doi','institution_authorships','publication_date', 
                                                 'catedra_authorships','catedra_id','concepts_0','cited_by_count']
                                           ].sort_values('catedra_authorships').reset_index(drop=True)
cdf

Unnamed: 0,title,Journal,doi,institution_authorships,publication_date,catedra_authorships,catedra_id,concepts_0,cited_by_count
0,Lepton dark matter portal in the inert Zee model,International Journal of Modern Physics A,https://doi.org/10.1142/s0217751x20501900,"[alexandra gaviria, robinson longas, andres rivera]",2020-11-10,alexandra gaviria norena,https://openalex.org/A2890818477,"[Physics, Philosophy, Mathematics]",0
1,Singlet-doublet Dirac dark matter and neutrino masses,Physical review,https://doi.org/10.1103/physrevd.100.035029,"[diego restrepo, andres rivera]",2019-08-26,andres felipe rivera romero,https://openalex.org/A4353121559,"[Physics, Philosophy]",16
2,Inert doublet as multicomponent dark matter,Nuclear Physics B,https://doi.org/10.1016/j.nuclphysb.2020.115276,[andres rivera],2021-01-01,andres felipe rivera romero,https://openalex.org/A4353121559,"[Physics, Mathematics]",9
3,Type-II two-Higgs-doublet model in noncommutative geometry,Nuclear Physics B,https://doi.org/10.1016/j.nuclphysb.2022.115923,"[jimenez, fredy, diego restrepo, andres rivera]",2022-08-01,andres felipe rivera romero,https://openalex.org/A2485835366,"[Physics, Mathematics, Philosophy, Engineering, History]",0
4,"Dirac dark matter, neutrino masses, and dark baryogenesis",Physical Review D,https://doi.org/10.1103/physrevd.106.055021,"[diego restrepo, andres rivera]",2022-09-16,andres felipe rivera romero,https://openalex.org/A4353121559,"[Physics, History]",0
5,Phenomenological consistency of the singlet-triplet scotogenic model,Journal of High Energy Physics,https://doi.org/10.1007/jhep04(2020)134,"[diego restrepo, andres rivera]",2020-04-01,andres felipe rivera romero,https://openalex.org/A4353121559,"[Physics, Mathematics, History]",7
6,Quantum phase transition and Berry phase in an extended Dicke model,European Physical Journal D,https://doi.org/10.1140/epjd/e2020-10332-0,"[camilo a estrada guerra, j mahecha gomez]",2020-10-01,camilo alberto estrada guerra,https://openalex.org/A3089404635,"[Physics, Biology]",2
7,Non-interferometric key recording applied to a joint transform cryptosystem,Optics Letters,https://doi.org/10.1364/ol.478132,"[carlos vargas castrillon, alejandro velez zea, john barrera ramirez]",2022-12-21,carlos andres vargas castrillon,https://openalex.org/A4316243382,"[Computer science, Physics, Engineering]",0
8,Orbital decay of short-period gas giants under evolving tides,Monthly Notices of the Royal Astronomical Society,https://doi.org/10.1093/mnras/stz1081,"[jaime a alvarado montes, carolina garcia carmona]",2019-07-01,carolina garcia carmona,https://openalex.org/A2943153277,"[Physics, Political science]",7
9,One-loop Dirac neutrino mass and mixed axion-WIMP dark matter,Physical review,https://doi.org/10.1103/physrevd.99.075009,"[cristian a carvajal, oscar zapata]",2019-04-09,cristian a carvajal ruiz,https://openalex.org/A4339934639,[Physics],19


In [37]:
cdf.to_excel('publicaciones_catedra.xlsx',index=False)

In [81]:
j=requests.get('http://apis.colav.co/openscienti/cvlac')

In [82]:
df=pd.DataFrame( j.json()['scrapped_data'] )

In [83]:
df[df['Nombre'].str.lower().apply(ud.unidecode).str.contains('oscar antonio restrepo gutierrez')].iloc[0].dropna().get('url')

'https://scienti.minciencias.gov.co/cvlac/visualizador/generarCurriculoCv.do?cod_rh=0000199800'

In [84]:
apikey=getpass.getpass()

 ········


In [89]:
j=requests.get(f'http://apis.colav.co/siiu/project?apikey={apikey}&participant_name=andres+felipe+rivera+romero')

In [91]:
j.json()[0].get('CODIGO')

'2018-22812'

In [88]:
41280/316300

0.1305090104331331