Skip to content
This repository has been archived by the owner on Apr 2, 2022. It is now read-only.

Commit

Permalink
Add v7 datasets in datasets.py (#15)
Browse files Browse the repository at this point in the history
  • Loading branch information
bmkramer committed Apr 14, 2020
1 parent ab11787 commit 35b0f83
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 20 deletions.
6 changes: 6 additions & 0 deletions .gitignore
@@ -1,3 +1,9 @@
# R

.Rhistory

# Python

# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
Expand Down
38 changes: 18 additions & 20 deletions asreviewcontrib/covid19/datasets.py
Expand Up @@ -9,39 +9,37 @@ class Cord19Dataset(BaseDataSet):
topic = "Covid-19"
license = "Covid dataset license"
link = "https://pages.semanticscholar.org/coronavirus-research"
last_update = "2020-04-03"
last_update = "2020-04-10"
description = "A free dataset on publications on the corona virus."
img_url = ("https://pages.semanticscholar.org/hs-fs/hubfs/"
"covid-image.png?width=300&name=covid-image.png")
link = "https://pages.semanticscholar.org/coronavirus-research"
year = 2020


class Cord19DatasetV6(Cord19Dataset):
dataset_id = "cord19-v6"
title = "CORD-19 v6"
date = "2020-04-03"
class Cord19DatasetV7(Cord19Dataset):
dataset_id = "cord19-v7"
title = "CORD-19 v7"
date = "2020-04-10"
statistics = {
"n_papers": 47298,
"n_papers": 51078,
"n_missing_titles": 158,
"n_missing_abstracts": 8250,
"n_missing_abstracts": 8726,
}
url = ("https://ai2-semanticscholar-cord-19.s3-us-west-2.amazonaws.com/2020-04-03/metadata.csv") # noqa
sha512 = ("5ba3738e603e2b23c403a46fb2620360415ba3419b09b071f5a5ca16a96422aa78a5456cba7abb18279b6510174273694961b72c28006620c7f28571125cfae2") # noqa
url = ("https://ai2-semanticscholar-cord-19.s3-us-west-2.amazonaws.com/2020-04-10/metadata.csv") # noqa


class Cord19DatasetV6_Dec2019(Cord19Dataset):
dataset_id = "cord19-v6-2020"
title = "CORD-19 v5 since Dec. 2019"
last_update = "2020-04-03"
class Cord19DatasetV7_Dec2019(Cord19Dataset):
dataset_id = "cord19-v7-2020"
title = "CORD-19 v7 since Dec. 2019"
last_update = "2020-04-10"
statistics = {
"n_papers": 4774,
"n_papers": 5753,
"n_missing_titles": 2,
"n_missing_abstracts": 1103,
"n_missing_abstracts": 1422,
}
date = "2020-04-05"
url = ("https://raw.githubusercontent.com/asreview/asreview-covid19/master/datasets/cord19_v6_20191201.csv") # noqa
sha512 = ("390c04b690abff2f824ed837367048308c6573032b2e45301056c3cfa7b04e6ea5b00ebd691e1af2066a820335bea1bdad240df736bd69be0b3c436fca629759") # noqa
date = "2020-04-14"
url = ("https://raw.githubusercontent.com/asreview/asreview-covid19/master/datasets/cord19_v7_20191201.csv") # noqa


class Covid19DataGroup(BaseDataGroup):
Expand All @@ -50,6 +48,6 @@ class Covid19DataGroup(BaseDataGroup):

def __init__(self):
super(Covid19DataGroup, self).__init__(
Cord19DatasetV6(),
Cord19DatasetV6_Dec2019(),
Cord19DatasetV7(),
Cord19DatasetV7_Dec2019(),
)

0 comments on commit 35b0f83

Please sign in to comment.