# Collecting the PubMed publications related to COVID-19

For collecting the PubMed publications related to COVID-19, we used the "pymed" library. It is avaliable on [https://pypi.org/project/pymed/].

In [1]:
# Uncomment to install the library.
# %pip install pymed

In [2]:
# Importing the required libraries.
import csv, pandas as pd
from pymed import PubMed

## 1. Getting the data from "pymed" library

In [3]:
# Creating the PubMed object.
pubmed = PubMed(tool="Covid-19", email="breno_mtb@ufrn.edu.br")

In [4]:
# Defining the query.
query = "(covid-19 OR coronavirus disease 2019 OR 2019-ncov OR novel coronavirus OR sars-cov-2 OR novel coronavirus pneumonia OR coronavirus) AND (2019[Date - Publication]:2020[Date - Publication]) AND (english[Language])"

In [5]:
# Defining the number of maximum results.
num_max = pubmed.getTotalResultsCount(query)
print("Number of records to be returned: {}.".format(num_max))

Number of records to be returned: 28818.


In [6]:
# Collecting the data.
results = pubmed.query(query, max_results=num_max)

In [7]:
# Converting the list of the PubMedArticles objects to the list of Dictionary objects.
data = [paper.toDict() for paper in results]

## 2. Saving the data collected

In [8]:
# Exporting the data to CSV file.
pd.DataFrame(data).to_csv("../../data/raw/pubmed_raw.csv", index=False, quoting=csv.QUOTE_ALL)