In [None]:
import arxiv
import numpy as np
import pandas

In [None]:
# Download queries
max_results = 600
keywords = ['crisis communication', 'emergency communication', 'handling emergencies', 'disaster communication']
titles = [f'ti:{x}' for x in keywords]
query = ' OR '.join(titles)
print("Using query\n", query)
search = arxiv.Search(
        query=query,
        max_results=max_results,
        sort_by = arxiv.SortCriterion.Relevance,
        sort_order= arxiv.SortOrder.Descending
    )

In [None]:
# Parse datapoints for pandas
data_points = []
for res in search.results():
    data = {
        'title' : res.title,
        'date' : res.published,
        'article_id' : res.entry_id,
        'url' : res.pdf_url,
        'main_topic' : res.primary_category,
        'all_topics' : res.categories,
        'authors' : [x.name for x in res.authors],
    }
    data_points.append(data)

In [None]:
print(len(data_points))
print(data_points[0])

In [None]:
# Create pandas dataframe
data = pandas.DataFrame(data_points)
data['year'] = pandas.DatetimeIndex(data['date']).year
unique_article_ids = data.article_id.unique()
article_mapping = {art:idx for idx,art in enumerate(unique_article_ids)}
data['article_id'] = data['article_id'].map(article_mapping)

print(data)

In [None]:
# To CSV
data.to_csv('publications.csv')