In [1]:
import pandas as pd
import numpy as np
import requests
import json
import urllib, urllib.request

**Biorxiv API** information is found [here](https://api.biorxiv.org/details/medrxiv/help).
<br>**Final edit:** November 21st, 2022. 

In [2]:
request_API = requests.get("https://api.biorxiv.org/details/biorxiv/2018-08-21/2018-08-28/101")
data = request_API.text
json_info = json.loads(data)

In [3]:
total = json_info["messages"][0]["total"]
total

630

In [4]:
total = json_info["messages"][0]["total"]
current_cursor = 0
journal_list = []

while (current_cursor <= total):
    request_API = requests.get(f"https://api.biorxiv.org/details/biorxiv/2018-08-21/2018-08-28/{current_cursor}")
        
    data = request_API.text
    json_info = json.loads(data)
    
    for journal in json_info["collection"]:
        journal_list.append([journal["doi"], journal["title"], journal["authors"],
                             journal["author_corresponding"], 
                             journal["author_corresponding_institution"],
                             journal["date"], journal["version"], journal["type"],
                             journal["category"], journal["jatsxml"], journal["published"]])
        
    current_cursor += 100

In [5]:
journal_df = pd.DataFrame(data=journal_list,
                         columns=["DOI", "Title", "Authors", "Corresponding Authors",
                                  "Institution",
                                  "Date", "Version", "Type", "Category", "Xml", "Published"])
journal_df

Unnamed: 0,DOI,Title,Authors,Corresponding Authors,Institution,Date,Version,Type,Category,Xml,Published
0,10.1101/066688,Cross-scale dynamics and the evolutionary emer...,"Schreiber, S. J.; Ke, R.; Loverdo, C.; Park, M...",Sebastian J. Schreiber,"University of California, Davis",2018-08-21,2,new results,epidemiology,https://www.biorxiv.org/content/early/2018/08/...,10.1093/ve/veaa105
1,10.1101/079954,"iCARE: An R Package to Build, Validate and App...","Pal Choudhury, P.; Maas, P.; Wilcox, A.; Wheel...",Nilanjan Chatterjee,Johns Hopkins University,2018-08-23,2,new results,bioinformatics,https://www.biorxiv.org/content/early/2018/08/...,10.1371/journal.pone.0228198
2,10.1101/093120,Darwinian selection of host and bacteria suppo...,"Osmanovic, D.; Kessler, D. A.; Rabin, Y.; Soen...",Yoav Soen,Weizmann Institute of Science,2018-08-21,5,new results,evolutionary biology,https://www.biorxiv.org/content/early/2018/08/...,10.1186/s13062-018-0224-7
3,10.1101/097816,Voltage-gated Proton Channel Hv1 Supports Insu...,"Pang, H.; Wang, X.; Xi, W.; Zhao, Q.; Zhang, S...",Shu Jie Li,Nankai University,2018-08-24,3,new results,cell biology,https://www.biorxiv.org/content/early/2018/08/...,
4,10.1101/130989,On The Peopling Of The Americas: Molecular Evi...,"Yuan, D.; Huang, S.",Shi Huang,Central South University,2018-08-23,3,new results,evolutionary biology,https://www.biorxiv.org/content/early/2018/08/...,
...,...,...,...,...,...,...,...,...,...,...,...
625,10.1101/402404,Pat1 promotes processing body assembly by enha...,"Sachdev, R.; Hondele, M.; Linsenmeier, M.; Val...",Karsten Weis,ETH Zurich,2018-08-28,1,new results,biochemistry,https://www.biorxiv.org/content/early/2018/08/...,10.7554/elife.41415
626,10.1101/402420,Assessment of population differentiation and l...,"Lin, Y.-P.; Liu, C.-Y.; Chen, K.",Kaiyi Chen,National Taiwan University,2018-08-28,1,new results,genetics,https://www.biorxiv.org/content/early/2018/08/...,
627,10.1101/402909,Identification of Small Molecule Modulators of...,"Christen, M.; Kamischke, C.; Kulasekara, H. D....",Matthias Christen,Eidgenoessische Technische Hochschule (ETH) Zu...,2018-08-28,1,new results,biochemistry,https://www.biorxiv.org/content/early/2018/08/...,10.1002/cbic.201800593
628,10.1101/402362,Anti-angiogenic effects of VEGF stimulation on...,"Stratman, A. N.; Farrelly, O. M.; Mikelis, C. ...",Brant M Weinstein,NIH/NICHD,2018-08-28,1,new results,cell biology,https://www.biorxiv.org/content/early/2018/08/...,10.1038/s41467-020-14956-z


In [6]:
assert total == len(journal_df), 
"Total number of papers submitted and the length of the dataframe does not match."

The above code runs through all papers that are submitted within selected time frame. Error discussed during the previous meeting has been fixed. 

----

### Junk Code
Please disregard the codes below.

In [7]:
journal_list = []

# `license`,`abstract`, and `server` are excluded from the metrics. 
for journal in json_info["collection"]:
    journal_list.append([journal["doi"], journal["title"], journal["authors"],
                         journal["author_corresponding"], 
                         journal["author_corresponding_institution"],
                         journal["date"], journal["version"], journal["type"],
                         journal["category"], journal["jatsxml"], journal["published"]])
    

In [9]:
journal_df = pd.DataFrame(data=journal_list,
                         columns=["DOI", "Title", "Authors", "Corresponding Authors",
                                  "Institution",
                                  "Date", "Version", "Type", "Category", "Xml", "Published"])
journal_df.head()

Unnamed: 0,DOI,Title,Authors,Corresponding Authors,Institution,Date,Version,Type,Category,Xml,Published
0,10.1101/402644,Resting shear elastic modulus as a marker of p...,"SIRACUSA, J.; CHARLOT, K.; MALGOYRE, A.; CONOR...",Julien SIRACUSA,IRBA,2018-08-28,1,new results,physiology,https://www.biorxiv.org/content/early/2018/08/...,
1,10.1101/402701,Enterotype-like microbiome stratification as e...,"Martin, M. A.",Miguel Angel Martin,Universidad Politecnica de Madrid Centro de Es...,2018-08-28,1,new results,systems biology,https://www.biorxiv.org/content/early/2018/08/...,10.1142/S0218348X21502108
2,10.1101/402560,An open-source software analysis package for M...,"Harink, B.; Nguyen, H.; Thorn, K.; Fordyce, P.",Polly Fordyce,Stanford University,2018-08-28,1,new results,bioengineering,https://www.biorxiv.org/content/early/2018/08/...,10.1371/journal.pone.0203725
3,10.1101/402586,Over-expression of the photoperiod response re...,"Stephenson, E.; Estrada, S.; Meng, X.; Ourada,...",Olga Danilevskaya,DuPont Pioneer,2018-08-28,1,new results,developmental biology,https://www.biorxiv.org/content/early/2018/08/...,10.1371/journal.pone.0203728
4,10.1101/402743,A natural history model for planning prostate ...,"Karlsson, A.; Jauhiainen, A.; Gulati, R.; Eklu...",Andreas Karlsson,Karolinska Institute,2018-08-28,1,new results,epidemiology,https://www.biorxiv.org/content/early/2018/08/...,10.1371/journal.pone.0211918
