# Map journal titles to Frontiers Rejection Rate dataset

In [1]:
import pandas

In [2]:
# Read JCR 2014 Impact Factors
url = 'https://www.researchgate.net/file.PostFileLoader.html?id=558730995e9d9735688b4631&assetKey=AS%3A273803718922244%401442291301717'
jcr_2014_df = pandas.read_excel(url, skiprows=2, converters={'Journal Impact Factor': str})
renamer = {'Full Journal Title': 'JCR_title', 'Journal Impact Factor': 'IF_2014'}
jcr_2014_df = jcr_2014_df.rename(columns=renamer)[list(renamer.values())]

# Unique IFs only
counts = jcr_2014_df.IF_2014.value_counts()
unique_IFs = set(counts[counts == 1].index)
jcr_2014_df = jcr_2014_df[jcr_2014_df.IF_2014.isin(unique_IFs)]

jcr_2014_df.head(2)

Unnamed: 0,JCR_title,IF_2014
0,CA-A CANCER JOURNAL FOR CLINICIANS,115.84
1,NEW ENGLAND JOURNAL OF MEDICINE,55.873


In [3]:
# Read rejection rates compiled by Frontiers
# https://dx.doi.org/10.6084/m9.figshare.2060589.v1
url = 'https://ndownloader.figshare.com/files/3661236'
reject_df = pandas.read_excel(url, sheetname=1, converters={'Impact Factor (2014)': str})
reject_df = reject_df.rename(columns={'Rejection rate': 'rejection_rate', 'Impact Factor (2014)': 'IF_2014'})
n_rr = len(reject_df)
reject_df.tail(2)

Unnamed: 0,rejection_rate,IF_2014
568,0,1.919
569,0,5.84


In [4]:
# Add titles to rejection rates
reject_df = reject_df.merge(jcr_2014_df)
reject_df.tail()

Unnamed: 0,rejection_rate,IF_2014,JCR_title
112,0.133333,3.544,Frontiers in Neuroanatomy
113,0.13,7.037,FRONTIERS IN NEUROENDOCRINOLOGY
114,0.11,4.084,Frontiers in Molecular Neuroscience
115,0.03125,3.802,Frontiers in Pharmacology
116,0.01,7.117,CURRENT OPINION IN BIOTECHNOLOGY


In [5]:
# Percent of journals that uniquely mapped to a title
'{:.2%}'.format(len(reject_df) / n_rr)

'20.53%'

In [6]:
# Convert impact factor to a float and sort
reject_df.IF_2014 = reject_df.IF_2014.astype(float)
reject_df = reject_df.sort_values('rejection_rate', ascending=False)

In [7]:
# Save as TSV
reject_df.to_csv('data/rejection_rates.tsv', sep='\t', index=False)