<a href="https://colab.research.google.com/github/jlee2843/Peer-Review-22-23/blob/Albert-2022/API_Biorvix.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import sys
print("version:", sys.version)

version: 3.8.15 (default, Oct 12 2022, 19:14:39) 
[GCC 7.5.0]


In [None]:
import pandas as pd
import numpy as np
import requests
import json
import urllib, urllib.request
from concurrent.futures import ThreadPoolExecutor

**Biorxiv API** information is found [here](https://api.biorxiv.org/details/medrxiv/help).<br>
**Final edit:** December 02, 2022.<br>
**NB:** Using Jenny's notebook  as a template


In [12]:
from typing import Union

# common helper function
def get_total (url) -> int:
    json_info = get_json_data(url)
    return json_info["messages"][0]["total"]

def get_json_data (url):
    request_API = requests.get(url)
    return json.loads(request_API.text)

def get_data(file, url, loop_range):
    print(f"values: {list(loop_range)}")
    results = []
    with ThreadPoolExecutor(3) as exe:
        if file is not None:
            args = (((get_json_data(f'{url}/{cursor}'), file) for cursor in loop_range))
            print(args)
            exe.map (lambda p: json.dump(*p), args)

            file.close()
        else:
            results = exe.map(get_json_data, ((f'{url}/{cursor}') for cursor in loop_range))
    
    return results

freq_count = lambda x,y: x[y].value_counts()

flatten = lambda y: sorted([sublist for inner in y for sublist in inner],
                           key=lambda x:x[0])



In [None]:
# prepublish helper function
def process_prepublish_data(json_info, cursor):
    journal_list = []
    for entry, journal in enumerate(json_info["collection"]):
        journal_list.append([entry + cursor, journal["doi"], journal["title"], journal["authors"],
                             journal["author_corresponding"],
                             journal["author_corresponding_institution"],
                             journal["date"], journal["version"], journal["type"],
                             journal["category"], journal["jatsxml"], journal["published"]])

    return journal_list

def create_prepublish_df(results, loop_list):
    args = zip(results, loop_list)
    #print(f'results: {len(results)} loop_list: {len(list(loop_list))}')
    result_list = []
    with ThreadPoolExecutor(3) as exe:
        result_list = exe.map(lambda p: process_prepublish_data(*p), args)
    
    data = np.array(flatten(result_list))
    df = pd.DataFrame(data=data[:, 1:],
                      index=data[:, 0],
                      columns=["DOI", "Title", "Authors", "Corresponding_Authors",
                               "Institution",
                               "Date", "Version", "Type", "Category", "Xml", "Published"])
    df['Num_of_Authors'] = df.Authors.apply(lambda x: len(x.split(';')))
    df.DOI = df.DOI.astype('str')
    df.Title = df.Title.astype('str')
    df.Authors = df.Authors.astype('str')
    df.Corresponding_Authors = df.Corresponding_Authors.astype('str')
    df.Institution = df.Institution.astype('category')
    df.Date = pd.to_datetime(df.Date, format='%Y-%m-%d')
    df.Type = df.Type.astype('category')
    df.Xml = df.Xml.astype('str')

    return df

#variables
base_url: str = 'https://api.biorxiv.org'
query_type: str = '/details'
server: str = '/biorxiv'
start_interval: str = '/2020-03-01'
end_interval: str = '/2020-03-31'

#finding the number of records for a given time period
#tmp = f"{base_url}/{start_interval}/{end_interval}"
#tmp = 'https://api.biorxiv.org/details/biorxiv/2020-08-21/2020-08-28'
print(f'{base_url}{query_type}{server}{start_interval}{end_interval}')
results = get_data(None, 
         f'{base_url}{query_type}{server}{start_interval}{end_interval}', 
         range(0, get_total(f'{base_url}{query_type}{server}{start_interval}{end_interval}'), 100))

journal_df = create_prepublish_df([r for r in results], 
                                  range(0, get_total(f'{base_url}{query_type}{server}{start_interval}{end_interval}'), 100))

display(journal_df)


https://api.biorxiv.org/details/biorxiv/2020-03-01/2020-03-31
values: [0, 100, 200, 300, 400, 500, 600, 700, 800, 900, 1000, 1100, 1200, 1300, 1400, 1500, 1600, 1700, 1800, 1900, 2000, 2100, 2200, 2300, 2400, 2500, 2600, 2700, 2800, 2900, 3000, 3100, 3200, 3300, 3400, 3500, 3600, 3700, 3800, 3900, 4000, 4100, 4200]


Unnamed: 0,DOI,Title,Authors,Corresponding_Authors,Institution,Date,Version,Type,Category,Xml,Published,Num_of_Authors
0,10.1101/099697,Glucocorticoids and cortical decoding in the p...,"Schwab, S.; Federspiel, A.; Morishima, Y.; Nak...",Simon Schwab,"University of Warwick, United Kingdom",2020-03-04,6,new results,neuroscience,https://www.biorxiv.org/content/early/2020/03/...,10.1016/j.pscychresns.2020.111066,9
1,10.1101/106542,Global cropland connectivity: A risk factor fo...,"Xing, Y.; Hernandez Nopsa, J.; Andersen, K. F....",Karen Garrett,University of Florida,2020-03-10,2,new results,ecology,https://www.biorxiv.org/content/early/2020/03/...,10.1093/biosci/biaa067,18
2,10.1101/121020,A Gaussian process model of human electrocorti...,"Owen, L. L. W.; Muntianu, T. A.; Heusser, A. C...",Jeremy R. Manning,Dartmouth College,2020-03-14,3,new results,neuroscience,https://www.biorxiv.org/content/early/2020/03/...,10.1093/cercor/bhaa115,6
3,10.1101/124305,A pleiotropic chemoreceptor facilitates the co...,"Zelle, K.; Vernier, C.; Liang, X.; Halloran, S...",Yehuda Ben-Shahar,Washington University in St. Louis,2020-03-09,5,new results,genetics,https://www.biorxiv.org/content/early/2020/03/...,,6
4,10.1101/124305,Chemoreceptor pleiotropy facilitates the funct...,"Zelle, K.; Vernier, C.; Leitner, N.; Liang, X....",Yehuda Ben-Shahar,Washington University in St. Louis,2020-03-21,6,new results,genetics,https://www.biorxiv.org/content/early/2020/03/...,,7
...,...,...,...,...,...,...,...,...,...,...,...,...
4199,10.1101/2020.03.31.018358,Echocardiography-guided percutaneous left vent...,"Nong, Y.; Guo, Y.; Tomlin, A.; Zhu, X.; Wysocz...",Yibing Nong,University of Louisville School of Medicine,2020-03-31,1,new results,physiology,https://www.biorxiv.org/content/early/2020/03/...,10.1007/s11010-021-04077-6,7
4200,10.1101/2020.03.31.018366,Laser capture microdissection in combination w...,"Roudnicky, P.; Potesil, D.; Zdrahal, Z.; Gelna...",Pavel Roudnický,Masarykova univerzita Prirodovedecka Fakulta,2020-03-31,1,new results,biochemistry,https://www.biorxiv.org/content/early/2020/03/...,10.1371/journal.pone.0231681,5
4201,10.1101/2020.03.31.007492,Specialisation and plasticity in a primitively...,"Patalano, S.; Alsina, A.; Gregorio-Rodriguez, ...",Steffen Rulands,Max Planck Institute for the Physics of Comple...,2020-03-31,1,new results,biophysics,https://www.biorxiv.org/content/early/2020/03/...,,11
4202,10.1101/2020.03.30.017046,The effects of transcranial direct current sti...,"Pellegrini, M.; Zoghi, M.; Jaberzadeh, S.",Michael Pellegrini,Monash University,2020-03-31,1,new results,neuroscience,https://www.biorxiv.org/content/early/2020/03/...,10.1016/j.neures.2020.06.002,3


In [None]:
#summary of table
journal_df.describe(include='all')


  journal_df.describe(include='all')


Unnamed: 0,DOI,Title,Authors,Corresponding_Authors,Institution,Date,Version,Type,Category,Xml,Published,Num_of_Authors
count,4204,4204,4204,4204,4204,4204,4204.0,4204,4204,4204,4204.0,4204.0
unique,3853,3903,3897,3627,2412,31,11.0,4,27,4167,2665.0,
top,10.1101/2020.02.28.970814,"Frequency dependent sexual selection, mating t...","Rogers, S.; Lew, V. L.",Rui Guo,University of Oxford,2020-03-05 00:00:00,1.0,new results,neuroscience,https://www.biorxiv.org/content/early/2020/03/...,,
freq,6,4,7,8,35,290,3038.0,4128,743,3,1305.0,
first,,,,,,2020-03-01 00:00:00,,,,,,
last,,,,,,2020-03-31 00:00:00,,,,,,
mean,,,,,,,,,,,,7.52902
std,,,,,,,,,,,,5.882493
min,,,,,,,,,,,,1.0
25%,,,,,,,,,,,,4.0


In [None]:
#freq count of Num_of_Authors
freq_count(journal_df,'Num_of_Authors')

3     489
4     473
5     453
6     419
2     364
7     353
8     292
9     237
10    192
11    153
12    122
1      99
13     78
14     70
15     68
16     49
17     48
18     39
19     33
21     24
20     22
24     16
25     15
22     13
27     10
26      9
31      8
29      7
23      7
34      5
47      5
35      4
45      4
32      3
30      3
28      2
46      2
33      2
44      2
43      2
39      1
40      1
41      1
36      1
37      1
53      1
38      1
49      1
Name: Num_of_Authors, dtype: int64

In [None]:
#freq count of Corresponding_Authors
freq_count(journal_df, 'Corresponding_Authors')

Rui  Guo               8
Virgilio Leon Lew      7
Zhongyang  Tan         6
Pierre  Morisse        5
Silvia  Argimon        5
                      ..
Valentyn  Oksenych     1
Shinji  Fukuda         1
Adam P Arkin           1
Tomasz W Turowski      1
Brijesh Kumar Singh    1
Name: Corresponding_Authors, Length: 3627, dtype: int64

In [None]:
#freq count of Instituion
freq_count(journal_df, 'Institution')

University of Oxford                                                                           35
Stanford University                                                                            29
University of Cambridge                                                                        28
University of Michigan                                                                         22
University of Washington                                                                       21
                                                                                               ..
Inria                                                                                           1
Indraprastha Institute of Information Technology                                                1
Indiana University Purdue University at Indianapolis, Indiana University School of Medicine     1
Indian Institute of Technology, Kharagpur-721302, India                                         1
École polytechnique 

In [None]:
#freq count of Date
freq_count(journal_df, 'Date')

2020-03-05    290
2020-03-25    282
2020-03-18    258
2020-03-20    236
2020-03-12    203
2020-03-31    193
2020-03-29    193
2020-03-11    190
2020-03-06    175
2020-03-03    155
2020-03-26    152
2020-03-02    143
2020-03-23    143
2020-03-09    142
2020-03-04    122
2020-03-13    119
2020-03-19    119
2020-03-08    112
2020-03-30    111
2020-03-10    100
2020-03-27     99
2020-03-15     98
2020-03-24     97
2020-03-14     96
2020-03-07     88
2020-03-16     67
2020-03-17     64
2020-03-21     58
2020-03-28     49
2020-03-22     35
2020-03-01     15
Name: Date, dtype: int64

In [None]:
#freq count of number of version
freq_count(journal_df, 'Version')

1     3038
2      833
3      236
4       52
5       24
6       10
7        6
8        2
13       1
19       1
9        1
Name: Version, dtype: int64

In [None]:
#freq count of Type
freq_count(journal_df, 'Type')

new results              4128
confirmatory results       45
contradictory results      24
withdrawn                   7
Name: Type, dtype: int64

In [None]:
#freq count Category
freq_count(journal_df, 'Category')

neuroscience                              743
microbiology                              437
bioinformatics                            390
cell biology                              233
genomics                                  225
evolutionary biology                      212
ecology                                   205
biochemistry                              196
biophysics                                176
molecular biology                         171
cancer biology                            170
immunology                                163
genetics                                  150
plant biology                             142
developmental biology                     119
bioengineering                            103
systems biology                            78
animal behavior and cognition              77
physiology                                 66
pharmacology and toxicology                39
synthetic biology                          35
pathology                         

In [None]:
assert num_entries == len(journal_df)
"Total number of papers submitted and the length of the dataframe does match."

'Total number of papers submitted and the length of the dataframe does match.'

The above code runs through all papers that are submitted within selected time frame. Error discussed during the previous meeting has been fixed. 

----

Query of Published Articles for a given timefame

In [None]:
# helper functions for published articles
def process_published_data(json_info, cursor) -> list:
    journal_list = []

    for entry, journal in enumerate(json_info["collection"]):
        journal_list.append([entry + cursor, journal["preprint_doi"], 
                             journal["published_doi"],
                             journal["preprint_title"], journal["preprint_authors"],
                             journal["preprint_author_corresponding"],
                             journal["preprint_author_corresponding_institution"],
                             journal["preprint_category"], journal["published_journal"],
                             journal["preprint_date"], journal["published_date"]])

    print(f"journal_list: {len(journal_list)}\n{journal_list}")
    return journal_list

def create_published_df(results, loop_list):
    args = zip(results, loop_list)
    #print(f'results: {len(results)} loop_list: {len(list(loop_list))}')
    result_list = []
    with ThreadPoolExecutor(3) as exe:
        result_list = exe.map(lambda p: process_published_data(*p), args)

    data = np.array(flatten(result_list))
    df = pd.DataFrame(data=data[:, 1:],
                      index=data[:, 0],
                      columns=["pre_DOI", "pub_DOI",
                               "Title", "Authors", "Corresponding_Authors",
                               "Institution",
                               "Category", "Journal", "Preprint_Date", "Published_Date"])
    df['Num_of_Authors'] = df.Authors.apply(lambda x: len(x.split(';')))
    df.pre_DOI = df.pre_DOI.astype('str')
    df.pub_DOI = df.pub_DOI.astype('str')
    df.Title = df.Title.astype('str')
    df.Authors = df.Authors.astype('str')
    df.Corresponding_Authors = df.Corresponding_Authors.astype('str')
    df.Institution = df.Institution.astype('category')
    df.Category = df.Category.astype('category')
    df.Journal = df.Journal.astype('category')
    df.Preprint_Date = pd.to_datetime(df.Preprint_Date, format='%Y-%m-%d')
    df.Published_Date = pd.to_datetime(df.Published_Date, format='%Y-%m-%d')

    return df

#variables
#https://api.biorxiv.org/pubs/biorvix/2018-08-21/2018-08-28
query_type: str = '/pubs'
start_interval: str = '/2020-03-01'
end_interval: str = '/2020-03-31'

#finding the number of records for a given time period

print(f'{base_url}{query_type}{server}{start_interval}{end_interval}')
results = get_data(None, 
                   f'{base_url}{query_type}{server}{start_interval}{end_interval}', 
                   range(0, get_total(f'{base_url}{query_type}{server}{start_interval}{end_interval}'), 100))

pubs_df = create_published_df([r for r in results], 
                              range(0, get_total(f'{base_url}{query_type}{server}{start_interval}{end_interval}'), 100))

display(pubs_df)

https://api.biorxiv.org/pubs/biorxiv/2020-03-01/2020-03-31
values: [0, 100, 200, 300, 400, 500, 600, 700, 800, 900, 1000, 1100, 1200, 1300, 1400, 1500]
journal_list: 100
[[0, '10.1101/759530', '10.1101/gad.332643.119', 'Senolytic treatment targets aberrant p21-expression to restore liver regeneration in adult mice', 'Ritschka, B.; Knauer-Meyer, T.; Mas, A.; Plassat, J.-L.; Sampaio Goncalves, D.; Jacobs, H.; Pedone, E.; Di Vicino, U.; Cosma, M. P.; Keyes, W. M.', 'William M. Keyes', 'IGBMC', 'cell biology', 'Genes & Development', '2019-09-05', '2020-03-05'], [1, '10.1101/843441', '10.3389/fnins.2020.00182', 'Functional Connectivity Evoked by Orofacial Tactile Perception of Velocity', 'Wang, Y.; Sibaii, F.; Custead, R.; Oh, H.; Barlow, S. M.', 'Yingying  Wang', 'University of Nebraska-Lincoln', 'neuroscience', 'Frontiers in Neuroscience', '2019-11-16', '2020-03-06'], [2, '10.1101/681353', '10.1038/s41593-020-0603-0', 'Connecting gene regulatory relationships to neurobiological mechanisms

Unnamed: 0,pre_DOI,pub_DOI,Title,Authors,Corresponding_Authors,Institution,Category,Journal,Preprint_Date,Published_Date,Num_of_Authors
0,10.1101/759530,10.1101/gad.332643.119,Senolytic treatment targets aberrant p21-expre...,"Ritschka, B.; Knauer-Meyer, T.; Mas, A.; Plass...",William M. Keyes,IGBMC,cell biology,Genes & Development,2019-09-05,2020-03-05,10
1,10.1101/843441,10.3389/fnins.2020.00182,Functional Connectivity Evoked by Orofacial Ta...,"Wang, Y.; Sibaii, F.; Custead, R.; Oh, H.; Bar...",Yingying Wang,University of Nebraska-Lincoln,neuroscience,Frontiers in Neuroscience,2019-11-16,2020-03-06,5
2,10.1101/681353,10.1038/s41593-020-0603-0,Connecting gene regulatory relationships to ne...,"Sey, N. Y. A.; Fauni, H.; Ma, W.; Won, H.",Hyejung Won,University of North Carolina- Chapel Hill,neuroscience,Nature Neuroscience,2019-06-24,2020-03-09,4
3,10.1101/796433,10.1016/j.neuroimage.2020.116698,Intersubject consistent dynamic connectivity d...,"Di, X.; Biswal, B. B.",Bharat B Biswal,New Jersey Institute of Technology,neuroscience,NeuroImage,2019-10-07,2020-03-01,2
4,10.1101/838573,10.1111/1462-2920.14966,Genome Repository of Oiled Systems (GROS): an ...,"Karthikeyan, S.; Rodriguez-R, L. M.; Heritier-...",Konstantinos T Konstantinidis,Georgia Institute of Technology,genomics,Environmental Microbiology,2019-11-12,2020-03-02,7
...,...,...,...,...,...,...,...,...,...,...,...
1574,10.1101/588392,10.1007/s10815-020-01725-6,Ontogeny and expression profiles of steroid ho...,"Mishra, A.; Galvankar, M.; Singh, N.; Vaidya, ...",Deepak Modi,ICMR-National Institute for Research in Reprod...,pathology,Journal of Assisted Reproduction and Genetics,2019-03-25,2020-03-09,6
1575,10.1101/653675,10.1111/jeb.13608,The evolution of parasite host range in geneti...,"Gibson, A. K.; Baffoe-Bonnie, H. S.; Penley, M...",Amanda Kyle Gibson,University of Virginia,evolutionary biology,Journal of Evolutionary Biology,2019-05-29,2020-03-09,7
1576,10.1101/761106,10.15252/msb.20199083,KDML: a machine-learning framework for inferen...,"Sailem, H.; Rittscher, J.; Pelkmans, L.",Heba Sailem,University of Oxford,systems biology,Molecular Systems Biology,2019-09-08,2020-03-06,3
1577,10.1101/774265,10.1038/s41467-020-15457-9,Identification of gut microbiome markers for s...,"Zhu, F.; Ju, Y.; Wang, W.; Wang, Q.; Guo, R.; ...",Huijue Jia,BGI-Shenzhen,microbiology,Nature Communications,2019-09-19,2020-03-31,18


In [None]:
#summary of table
pubs_df.describe(include='all')


  pubs_df.describe(include='all')


Unnamed: 0,pre_DOI,pub_DOI,Title,Authors,Corresponding_Authors,Institution,Category,Journal,Preprint_Date,Published_Date,Num_of_Authors
count,1579,1579,1579,1579,1579,1579,1579,1579,1579,1579,1579.0
unique,1579,1578,1579,1579,1559,1119,27,540,492,31,
top,10.1101/759530,10.1534/g3.120.401151,Senolytic treatment targets aberrant p21-expre...,"Ritschka, B.; Knauer-Meyer, T.; Mas, A.; Plass...",Jackson Champer,University of Oxford,neuroscience,PLOS ONE,2019-09-19 00:00:00,2020-03-03 00:00:00,
freq,1,2,1,1,3,18,280,97,17,92,
first,,,,,,,,,2016-06-13 00:00:00,2020-03-01 00:00:00,
last,,,,,,,,,2020-03-29 00:00:00,2020-03-31 00:00:00,
mean,,,,,,,,,,,7.029132
std,,,,,,,,,,,5.18496
min,,,,,,,,,,,1.0
25%,,,,,,,,,,,4.0


In [None]:
#freq count of Num_of_Authors
freq_count(pubs_df,'Num_of_Authors')

3     192
5     186
4     185
6     176
7     148
2     141
8     118
9      87
10     64
11     53
13     35
12     29
1      26
14     25
15     24
16     19
17     13
18     11
21     10
22      6
19      4
26      4
30      3
24      2
31      2
20      2
46      1
43      1
41      1
34      1
28      1
32      1
40      1
25      1
45      1
29      1
27      1
33      1
44      1
38      1
Name: Num_of_Authors, dtype: int64

In [None]:
#freq count "Corresponding_Authors"
freq_count(pubs_df, 'Corresponding_Authors')

Jackson  Champer     3
Sudha  Rajamani      2
James A. Wells       2
Harshad  Ghodke      2
Kevin B Wood         2
                    ..
Natalia  Ninkina     1
Pranav  Danthi       1
Alexander  Lorenz    1
Chong  Zhang         1
Timothy M Healy      1
Name: Corresponding_Authors, Length: 1559, dtype: int64

In [None]:
#freq_count Institution
freq_count(pubs_df, 'Institution')

University of Oxford                                                                                                                                          18
Stanford University                                                                                                                                           16
University of Pennsylvania                                                                                                                                    12
University of California, Berkeley                                                                                                                            11
Harvard University                                                                                                                                            11
                                                                                                                                                              ..
Institut de Recherche Experimental

In [None]:
#freq_count Category
freq_count(pubs_df, 'Category')

neuroscience                              280
bioinformatics                            142
microbiology                              139
cell biology                               90
genomics                                   84
biophysics                                 83
evolutionary biology                       80
genetics                                   76
ecology                                    68
biochemistry                               62
plant biology                              61
cancer biology                             58
developmental biology                      51
immunology                                 48
molecular biology                          47
systems biology                            36
bioengineering                             26
physiology                                 26
animal behavior and cognition              25
pharmacology and toxicology                20
epidemiology                               19
synthetic biology                 

In [None]:
#freq count Jounral
freq_count(pubs_df, 'Journal')

PLOS ONE                                           97
Scientific Reports                                 74
eLife                                              64
Nature Communications                              61
Proceedings of the National Academy of Sciences    45
                                                   ..
Journal of Medical Genetics                         1
Journal of Mathematical Economics                   1
Journal of Mammalogy                                1
Chemical Communications                             1
ACS Applied Materials & Interfaces                  1
Name: Journal, Length: 540, dtype: int64

In [None]:
#freq count Preprint_Date
freq_count(pubs_df, 'Preprint_Date')

2019-09-19    17
2019-10-07    15
2019-11-29    15
2019-10-10    14
2019-09-23    14
              ..
2017-08-01     1
2019-05-18     1
2018-02-18     1
2017-12-29     1
2019-05-11     1
Name: Preprint_Date, Length: 492, dtype: int64

In [None]:
#freq count Published_date
freq_count(pubs_df, 'Published_Date')

2020-03-03    92
2020-03-09    91
2020-03-12    91
2020-03-19    79
2020-03-17    73
2020-03-05    72
2020-03-13    71
2020-03-06    69
2020-03-02    68
2020-03-10    68
2020-03-04    67
2020-03-11    65
2020-03-23    64
2020-03-18    63
2020-03-20    61
2020-03-31    60
2020-03-16    59
2020-03-25    59
2020-03-26    58
2020-03-30    57
2020-03-24    57
2020-03-27    40
2020-03-14    25
2020-03-28    20
2020-03-07    14
2020-03-29    12
2020-03-21    11
2020-03-01     6
2020-03-22     4
2020-03-15     2
2020-03-08     1
Name: Published_Date, dtype: int64

In [None]:
'''
                      columns=["pre_DOI", "pub_DOI",
                               "Title", "Authors", "Corresponding_Authors",
                               "Institution",
                               "Category", "Journal", "Preprint_Date", "Published_Date"])
'''
assert num_pubs == len(pubs_df)
"Total number of papers published and the length of the dataframe does match."

'Total number of papers published and the length of the dataframe does match.'

In [None]:
def get_publisher_prefix(doi: pd.Series) -> pd.Series:
    return doi.apply(lambda x: x.split("/")[0])

query_type='/publisher'

def get_publisher_data(url)
get_data(None,f'{base_url}{query_type}'
         get_publisher_prefix(pubs_df.pub_DOI).unique())

array(['10.1101', '10.3389', '10.1038', '10.1016', '10.1111', '10.1002',
       '10.1039', '10.3390', '10.1099', '10.1371', '10.1186', '10.1074',
       '10.1103', '10.7554', '10.1534', '10.1073', '10.1007', '10.1214',
       '10.1093', '10.1094', '10.15252', '10.1088', '10.1083', '10.1021',
       '10.1109', '10.1523', '10.1182', '10.5334', '10.1098', '10.1128',
       '10.1080', '10.26508', '10.1158', '10.1085', '10.1037', '10.1155',
       '10.1097', '10.1162', '10.1152', '10.1242', '10.1172', '10.1089',
       '10.1136', '10.1107', '10.1530', '10.1200', '10.1126', '10.7717',
       '10.2174', '10.1364', '10.1261', '10.3758', '10.1210', '10.1104',
       '10.2337', '10.1096', '10.1177', '10.1105', '10.7150', '10.12688',
       '10.1042', '10.18632', '10.1086', '10.3934', '10.4049', '10.1212',
       '10.1053', '10.34067', '10.1167', '10.1124', '10.1161', '10.1139',
       '10.1084', '10.1142', '10.2478', '10.4236', '10.1254', '10.4252',
       '10.2217', '10.5091', '10.3372', '10.21

**Journal API**<br>
https://www.nature.com/opensearch/<br>
https://www.biorxiv.org/content/10.1101/339747v4<br>
https://www.biorxiv.org/content/10.1101/339747v4.full.pdf<br>
https://api.biorxiv.org/details/biorxiv/10.1101/099697


----

### Junk Code
Please disregard the codes below.

In [None]:
journal_list = []

# `license`,`abstract`, and `server` are excluded from the metrics. 
for journal in json_info["collection"]:
    journal_list.append([journal["doi"], journal["title"], journal["authors"],
                         journal["author_corresponding"], 
                         journal["author_corresponding_institution"],
                         journal["date"], journal["version"], journal["type"],
                         journal["category"], journal["jatsxml"], journal["published"]])
    

In [None]:
journal_df = pd.DataFrame(data=journal_list,
                         columns=["DOI", "Title", "Authors", "Corresponding Authors",
                                  "Institution",
                                  "Date", "Version", "Type", "Category", "Xml", "Published"])
journal_df.head()

Unnamed: 0,DOI,Title,Authors,Corresponding Authors,Institution,Date,Version,Type,Category,Xml,Published
0,10.1101/402644,Resting shear elastic modulus as a marker of p...,"SIRACUSA, J.; CHARLOT, K.; MALGOYRE, A.; CONOR...",Julien SIRACUSA,IRBA,2018-08-28,1,new results,physiology,https://www.biorxiv.org/content/early/2018/08/...,
1,10.1101/402701,Enterotype-like microbiome stratification as e...,"Martin, M. A.",Miguel Angel Martin,Universidad Politecnica de Madrid Centro de Es...,2018-08-28,1,new results,systems biology,https://www.biorxiv.org/content/early/2018/08/...,10.1142/S0218348X21502108
2,10.1101/402560,An open-source software analysis package for M...,"Harink, B.; Nguyen, H.; Thorn, K.; Fordyce, P.",Polly Fordyce,Stanford University,2018-08-28,1,new results,bioengineering,https://www.biorxiv.org/content/early/2018/08/...,10.1371/journal.pone.0203725
3,10.1101/402586,Over-expression of the photoperiod response re...,"Stephenson, E.; Estrada, S.; Meng, X.; Ourada,...",Olga Danilevskaya,DuPont Pioneer,2018-08-28,1,new results,developmental biology,https://www.biorxiv.org/content/early/2018/08/...,10.1371/journal.pone.0203728
4,10.1101/402743,A natural history model for planning prostate ...,"Karlsson, A.; Jauhiainen, A.; Gulati, R.; Eklu...",Andreas Karlsson,Karolinska Institute,2018-08-28,1,new results,epidemiology,https://www.biorxiv.org/content/early/2018/08/...,10.1371/journal.pone.0211918
