In [21]:
import numpy as np
import pandas as pd
import bibtexparser
import arxiv
from tqdm import tqdm
# Take first page from each PDF
from PyPDF2 import PdfFileReader, PdfFileWriter
import os

In [22]:
path = 'inspire.bib'

In [23]:
with open(path) as f:
    database = bibtexparser.load(f)

In [24]:
my_df = pd.DataFrame(database.entries)

In [25]:
my_df.shape

(215, 16)

In [26]:
my_df['journal'].unique()

array(['JHEP', 'Phys. Rev. D', 'Eur. Phys. J. C', 'Phys. Rev. Lett.',
       'Phys. Lett. B', 'Phys. Rev. C', 'JINST',
       'Comput. Softw. Big Sci.', 'Nature Phys.', 'PoS'], dtype=object)

In [27]:
qualis = {
            'Phys. Rev. D'    : ('PHYSICAL REVIEW D (PARTICLES, FIELDS, GRAVITATION AND COSMOLOGY)', 'A2'),
            'Phys. Lett. B'   : ('MODERN PHYSICS LETTERS B', 'B5'), # not allow
            'Eur. Phys. J. C' : ('The European Physical Journal C', '0'), 
            'Phys. Rev. Lett.': ('PHYSICAL REVIEW LETTERS', 'A1'),
            'Phys. Rev. C'    : ('PHYSICAL REVIEW C', 'A2'),
            'Nature Phys.'    : ('NATURE PHYSICS (PRINT)', 'A1'),
            'JHEP'            : ('Journal of High Energy Physics', 'A1' ),
            'JINST'           : ('Journal of Instrumentation', 'B1'),
            'PoS'             : ('Proceeding of science', '0'), # not allow
        }

In [28]:
my_df.shape

(215, 16)

In [29]:
def add_qualis_factor( row ):
    journal = row['journal']
    if journal in qualis.keys():
        return qualis[journal][1]
    else:
        return np.nan
def add_qualis_full_name(row):
    journal = row['journal']
    if journal in qualis.keys():
        return qualis[journal][0]
    else:
        return np.nan
    

In [30]:
my_df['factor'] = my_df.apply( add_qualis_factor , axis=1)
my_df['journal name'] = my_df.apply( add_qualis_full_name , axis=1)
my_df['arxiv link'] = ''

In [42]:
my_df = my_df.loc[my_df.archiveprefix=='arXiv']

In [47]:

for _,row in tqdm(my_df.iterrows(), total=my_df.shape[0]):
    #print(row)
    paper = next(arxiv.Search(id_list=[row.eprint]).results())
    row['arxiv link'] = paper.entry_id
    #path= paper.download_pdf()
    #name = row['Journal name'].replace(' ', '_') + '_' + row.eprint +'.pdf'  
    #os.system('mv %s %s'%(path, name ))
    #row['Paper File'] = name




100%|██████████| 214/214 [02:09<00:00,  1.65it/s]


In [48]:
my_df.head()

Unnamed: 0,year,pages,volume,journal,doi,reportnumber,primaryclass,archiveprefix,eprint,title,collaboration,author,ENTRYTYPE,ID,number,note,factor,journal name,arxiv link
0,2022,87,8,JHEP,10.1007/JHEP08(2022)087,CERN-EP-2022-025,hep-ex,arXiv,2203.01808,{Study of $ {\mathrm{B}}_{\mathrm{c}}^{+}\to \...,ATLAS,"Aad, Georges and others",article,ATLAS:2022aiy,,,A1,Journal of High Energy Physics,http://arxiv.org/abs/2203.01808v2
1,2022,5,6,JHEP,10.1007/JHEP06(2022)005,CERN-EP-2022-002,hep-ex,arXiv,2203.01009,{Search for neutral long-lived particles in $p...,ATLAS,"Aad, Georges and others",article,ATLAS:2022zhj,,,A1,Journal of High Energy Physics,http://arxiv.org/abs/2203.01009v2
2,2022,32005,106,Phys. Rev. D,10.1103/PhysRevD.106.032005,CERN-EP-2021-195,hep-ex,arXiv,2203.00587,{Search for events with a pair of displaced ve...,ATLAS,"Aad, Georges and others",article,ATLAS:2022gbw,3.0,,A2,"PHYSICAL REVIEW D (PARTICLES, FIELDS, GRAVITAT...",http://arxiv.org/abs/2203.00587v2
3,2022,32008,106,Phys. Rev. D,10.1103/PhysRevD.106.032008,CERN-EP-2021-116,hep-ex,arXiv,2202.13901,{Measurements of jet observables sensitive to ...,ATLAS,"Aad, Georges and others",article,ATLAS:2022miz,3.0,,A2,"PHYSICAL REVIEW D (PARTICLES, FIELDS, GRAVITAT...",http://arxiv.org/abs/2202.13901v2
4,2022,63,6,JHEP,10.1007/JHEP06(2022)063,CERN-EP-2022-003,hep-ex,arXiv,2202.12134,{Measurements of differential cross-sections i...,ATLAS,"Aad, Georges and others",article,ATLAS:2022xfj,,,A1,Journal of High Energy Physics,http://arxiv.org/abs/2202.12134v2


In [49]:
my_articles = my_df[ ['year', 'journal name', 'factor', 'title', 'arxiv link'] ]

In [50]:
my_articles.head()

Unnamed: 0,year,journal name,factor,title,arxiv link
0,2022,Journal of High Energy Physics,A1,{Study of $ {\mathrm{B}}_{\mathrm{c}}^{+}\to \...,http://arxiv.org/abs/2203.01808v2
1,2022,Journal of High Energy Physics,A1,{Search for neutral long-lived particles in $p...,http://arxiv.org/abs/2203.01009v2
2,2022,"PHYSICAL REVIEW D (PARTICLES, FIELDS, GRAVITAT...",A2,{Search for events with a pair of displaced ve...,http://arxiv.org/abs/2203.00587v2
3,2022,"PHYSICAL REVIEW D (PARTICLES, FIELDS, GRAVITAT...",A2,{Measurements of jet observables sensitive to ...,http://arxiv.org/abs/2202.13901v2
4,2022,Journal of High Energy Physics,A1,{Measurements of differential cross-sections i...,http://arxiv.org/abs/2202.12134v2


In [51]:
my_articles = my_articles.sort_values('year')

In [52]:
my_articles.head()

Unnamed: 0,year,journal name,factor,title,arxiv link
214,2019,Journal of High Energy Physics,A1,{Measurement of the ratio of cross sections fo...,http://arxiv.org/abs/1901.10075v2
190,2019,Journal of High Energy Physics,A1,{Search for heavy neutral leptons in decays of...,http://arxiv.org/abs/1905.09787v2
187,2019,"PHYSICAL REVIEW D (PARTICLES, FIELDS, GRAVITAT...",A2,{Search for a heavy charged boson in events wi...,http://arxiv.org/abs/1906.05609v2
186,2019,Journal of High Energy Physics,A1,{Search for diboson resonances in hadronic fin...,http://arxiv.org/abs/1906.08589v3
185,2019,"PHYSICAL REVIEW D (PARTICLES, FIELDS, GRAVITAT...",A2,{Properties of jet fragmentation using charged...,http://arxiv.org/abs/1906.09254v2


In [55]:
my_articles.to_excel('ATLAS Publications (João Victor da Fonseca Pinto).xlsx')