diff --git a/.github/workflows/pythonpackage.yml b/.github/workflows/pythonpackage.yml
index cf2712e..e905151 100644
--- a/.github/workflows/pythonpackage.yml
+++ b/.github/workflows/pythonpackage.yml
@@ -9,7 +9,7 @@ jobs:
     strategy:
       max-parallel: 4
       matrix:
-        python-version: [3.6, 3.7]
+        python-version: [3.6, 3.7, 3.8]
 
     steps:
     - uses: actions/checkout@v1
@@ -27,9 +27,6 @@ jobs:
         # stop the build if there are Python syntax errors or undefined names
         flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
         # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
-        flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
+        flake8 . --count --exit-zero --statistics
     - name: Test with pytest
-      run: |
-        pip install pytest
-        pip install -e .
-        pytest
+      run: python setup.py test
diff --git a/.gitignore b/.gitignore
index f2c6ad8..696b4cf 100644
--- a/.gitignore
+++ b/.gitignore
@@ -121,3 +121,4 @@ data/
 
 # json files
 *.json
+.testmondata
diff --git a/.travis.yml b/.travis.yml
deleted file mode 100644
index 3659aa5..0000000
--- a/.travis.yml
+++ /dev/null
@@ -1,27 +0,0 @@
-# Config file for automatic testing at travis-ci.org
-
-language: python
-python:
-    - 3.6
-    - 3.7
-
-# Command to install dependencies, e.g. pip install -r requirements.txt --use-mirrors
-install: pip install -U tox-travis
-
-# Command to run tests, e.g. python setup.py test
-script: tox
-
-# Assuming you have installed the travis-ci CLI tool, after you
-# create the Github repo and add it to Travis, run the
-# following command to finish PyPI deployment setup:
-# $ travis encrypt --add deploy.password
-deploy:
-    provider: pypi
-    distributions: sdist bdist_wheel
-    user: odarbelaeze
-    password:
-        secure: w9KxhyHxZq2HbWsjA/TutcgCsBKhEJQaTLOmFS82e0zZOS5vTuLAQ5dS89CeSwH8vS/whJkgdQcSwCoHQw/g72o+G7ndw6moxQz4wZKUnhh9Ls4EjCDiHM/jDl3rnZzduNzZbMl0TFTG8nw62OvwJIzmjelkvsN+DXy0Oa4hob+I4pEEvhvubvLcemxz+BCIKB+v24mD+t1OFUfkB1bxygA9ekzfRFxOHYR/ZJbwZM7J1+hEhyhsLiVBYDYU5Abx46R34x3OxS98suoW1wGeZMPi4tqBy2hYVZjhJGMAP+aIle3RgS6ld69w/R76RWggW1TDj1UYhMo6LUD77+6A5nHmSab8PEAGiP1pfrU8LDesZyv11Xrfd62Pf+jgbV/1BXnHamOG2YV07dTRVClo9KEvpvhseQnpL0KL1tQDfFW9Wbk2zFFNuhNlLBd6ER0EIu0wpuEvBOCweNnFYyQKYellMPon3P6ljPgNy2Qn1YCRlWVzCoumBpy49ej69DhT0Kt0Bi4VBf7dKAp6ETQFHSiSsJiPm3qY+DQg9UZ5KKSy1wwWd8mo5DvbVjC67uSmF5N4ap+OFhjUQBrcaqsF0/wpO87bse8hScU8e8LAKDzl9UKcyN9USZl0BY2TTTlHqeYiP7FwfJPU421kcd/lNo/Hu6tJBYboQi5MnmkbFug=
-    on:
-        tags: true
-        repo: coreofscience/python-wostools
-        python: 3.6
diff --git a/.zenodo.json b/.zenodo.json
index 937d91b..24fc819 100644
--- a/.zenodo.json
+++ b/.zenodo.json
@@ -1,8 +1,8 @@
 {
     "description": "Translates isi web of knowledge files into python objects.",
     "license": "MIT",
-    "title": "coreofscience/python-wostools: Add citation graph support",
-    "version": "v0.2.0",
+    "title": "coreofscience/python-wostools",
+    "version": "v2.0.0",
     "upload_type": "software",
     "publication_date": "2018-08-13",
     "creators": [
@@ -11,6 +11,11 @@
             "affiliation": "Core of science",
             "name": "Oscar David Arbeláe1ez E."
         },
+        {
+            "orcid": "0000-0002-1249-7128",
+            "affiliation": "Core of science",
+            "name": "Juan David Alzate Cardona"
+        },
         {
             "name": "Daniel Stiven Valencia Hernandez",
             "affiliation": "Core of science"
@@ -20,7 +25,7 @@
     "related_identifiers": [
         {
             "scheme": "url",
-            "identifier": "https://github.com/coreofscience/python-wostools/tree/v1.1.0",
+            "identifier": "https://github.com/coreofscience/python-wostools/tree/v2.0.0",
             "relation": "isSupplementTo"
         },
         {
diff --git a/AUTHORS.md b/AUTHORS.md
new file mode 100644
index 0000000..2d1388d
--- /dev/null
+++ b/AUTHORS.md
@@ -0,0 +1,11 @@
+# Credits
+
+## Development Lead
+
+-   Core of Science \<<dev@coreofscience.com>\>
+
+## Contributors
+
+-   Oscar Arbeláez \<<odarbelaeze@gmail.com>\>
+-   Juan David Alzate Cardona \<<jdalzatec@gmail.com>\>
+-   Daniel Valencia \<<dsvalenciah@unal.edu.co>\>
diff --git a/AUTHORS.rst b/AUTHORS.rst
deleted file mode 100644
index e84601f..0000000
--- a/AUTHORS.rst
+++ /dev/null
@@ -1,13 +0,0 @@
-=======
-Credits
-=======
-
-Development Lead
-----------------
-
-* Core of Science <dev@coreofscience.com>
-
-Contributors
-------------
-
-None yet. Why not be the first?
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
new file mode 100644
index 0000000..bbc149a
--- /dev/null
+++ b/CONTRIBUTING.md
@@ -0,0 +1,136 @@
+# Contributing
+
+Contributions are welcome, and they are greatly appreciated! Every
+little bit helps, and credit will always be given.
+
+You can contribute in many ways:
+
+## Types of Contributions
+
+### Report Bugs
+
+Report bugs at
+<https://github.com/coreofscience/python-wostools/issues>.
+
+If you are reporting a bug, please include:
+
+-   Your operating system name and version.
+-   Any details about your local setup that might be helpful in
+    troubleshooting.
+-   Detailed steps to reproduce the bug.
+
+### Fix Bugs
+
+Look through the GitHub issues for bugs. Anything tagged with \"bug\"
+and \"help wanted\" is open to whoever wants to implement it.
+
+### Implement Features
+
+Look through the GitHub issues for features. Anything tagged with
+\"enhancement\" and \"help wanted\" is open to whoever wants to
+implement it.
+
+### Write Documentation
+
+Python WoS tools could always use more documentation, whether as part of
+the official Python WoS tools docs, in docstrings, or even on the web in
+blog posts, articles, and such.
+
+### Submit Feedback
+
+The best way to send feedback is to file an issue at
+<https://github.com/coreofscience/python-wostools/issues>.
+
+If you are proposing a feature:
+
+-   Explain in detail how it would work.
+-   Keep the scope as narrow as possible, to make it easier to
+    implement.
+-   Remember that this is a volunteer-driven project, and that
+    contributions are welcome :)
+
+## Get Started!
+
+Ready to contribute? Here\'s how to set up [wostools]{.title-ref} for
+local development.
+
+1.  Fork the [wostools]{.title-ref} repo on GitHub.
+
+2.  Clone your fork locally:
+
+    ```bash
+    $ git clone git@github.com:your_name_here/python-wostools.git
+    ```
+
+3.  Install your local copy into a virtualenv. Assuming you have
+    virtualenvwrapper installed, this is how you set up your fork for
+    local development:
+
+    ```bash
+    $ mkvirtualenv wostools
+    $ cd wostools/
+    $ python setup.py develop
+    ```
+
+4.  Create a branch for local development:
+
+    ```bash
+    $ git checkout -b name-of-your-bugfix-or-feature
+    ```
+
+    Now you can make your changes locally.
+
+5.  When you\'re done making changes, check that your changes pass
+    flake8 and the tests, including testing other Python versions with
+    tox:
+
+    ```bash
+    $ flake8 wostools tests
+    $ python setup.py test or py.test
+    $ tox
+    ```
+
+    To get flake8 and tox, just pip install them into your virtualenv.
+
+6.  Commit your changes and push your branch to GitHub:
+
+    ```bash
+    $ git add .
+    $ git commit -m "Your detailed description of your changes."
+    $ git push origin name-of-your-bugfix-or-feature
+    ```
+
+7.  Submit a pull request through the GitHub website.
+
+## Pull Request Guidelines
+
+Before you submit a pull request, check that it meets these guidelines:
+
+1.  The pull request should include tests.
+2.  If the pull request adds functionality, the docs should be updated.
+    Put your new functionality into a function with a docstring, and add
+    the feature to the list in README.md.
+3.  The pull request should work for Python 3.6, and for PyPy. Check
+    <https://travis-ci.org/coreofscience/python-wostools/pull_requests>
+    and make sure that the tests pass for all supported Python versions.
+
+## Tips
+
+To run a subset of tests:
+
+```bash
+$ py.test tests.test_wostools
+```
+
+## Deploying
+
+A reminder for the maintainers on how to deploy. Make sure all your
+changes are committed (including an entry in HISTORY.rst). Then run:
+
+```bash
+$ bumpversion patch # possible: major / minor / patch
+$ git push
+$ git push --tags
+```
+
+Travis will then deploy to PyPI if tests pass.
diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst
deleted file mode 100644
index 13572cd..0000000
--- a/CONTRIBUTING.rst
+++ /dev/null
@@ -1,128 +0,0 @@
-.. highlight:: shell
-
-============
-Contributing
-============
-
-Contributions are welcome, and they are greatly appreciated! Every little bit
-helps, and credit will always be given.
-
-You can contribute in many ways:
-
-Types of Contributions
-----------------------
-
-Report Bugs
-~~~~~~~~~~~
-
-Report bugs at https://github.com/coreofscience/python-wostools/issues.
-
-If you are reporting a bug, please include:
-
-* Your operating system name and version.
-* Any details about your local setup that might be helpful in troubleshooting.
-* Detailed steps to reproduce the bug.
-
-Fix Bugs
-~~~~~~~~
-
-Look through the GitHub issues for bugs. Anything tagged with "bug" and "help
-wanted" is open to whoever wants to implement it.
-
-Implement Features
-~~~~~~~~~~~~~~~~~~
-
-Look through the GitHub issues for features. Anything tagged with "enhancement"
-and "help wanted" is open to whoever wants to implement it.
-
-Write Documentation
-~~~~~~~~~~~~~~~~~~~
-
-Python WoS tools could always use more documentation, whether as part of the
-official Python WoS tools docs, in docstrings, or even on the web in blog posts,
-articles, and such.
-
-Submit Feedback
-~~~~~~~~~~~~~~~
-
-The best way to send feedback is to file an issue at https://github.com/coreofscience/python-wostools/issues.
-
-If you are proposing a feature:
-
-* Explain in detail how it would work.
-* Keep the scope as narrow as possible, to make it easier to implement.
-* Remember that this is a volunteer-driven project, and that contributions
-  are welcome :)
-
-Get Started!
-------------
-
-Ready to contribute? Here's how to set up `wostools` for local development.
-
-1. Fork the `wostools` repo on GitHub.
-2. Clone your fork locally::
-
-    $ git clone git@github.com:your_name_here/python-wostools.git
-
-3. Install your local copy into a virtualenv. Assuming you have virtualenvwrapper installed, this is how you set up your fork for local development::
-
-    $ mkvirtualenv wostools
-    $ cd wostools/
-    $ python setup.py develop
-
-4. Create a branch for local development::
-
-    $ git checkout -b name-of-your-bugfix-or-feature
-
-   Now you can make your changes locally.
-
-5. When you're done making changes, check that your changes pass flake8 and the
-   tests, including testing other Python versions with tox::
-
-    $ flake8 wostools tests
-    $ python setup.py test or py.test
-    $ tox
-
-   To get flake8 and tox, just pip install them into your virtualenv.
-
-6. Commit your changes and push your branch to GitHub::
-
-    $ git add .
-    $ git commit -m "Your detailed description of your changes."
-    $ git push origin name-of-your-bugfix-or-feature
-
-7. Submit a pull request through the GitHub website.
-
-Pull Request Guidelines
------------------------
-
-Before you submit a pull request, check that it meets these guidelines:
-
-1. The pull request should include tests.
-2. If the pull request adds functionality, the docs should be updated. Put
-   your new functionality into a function with a docstring, and add the
-   feature to the list in README.rst.
-3. The pull request should work for Python 3.6, and for PyPy. Check
-   https://travis-ci.org/coreofscience/python-wostools/pull_requests
-   and make sure that the tests pass for all supported Python versions.
-
-Tips
-----
-
-To run a subset of tests::
-
-$ py.test tests.test_wostools
-
-
-Deploying
----------
-
-A reminder for the maintainers on how to deploy.
-Make sure all your changes are committed (including an entry in HISTORY.rst).
-Then run::
-
-$ bumpversion patch # possible: major / minor / patch
-$ git push
-$ git push --tags
-
-Travis will then deploy to PyPI if tests pass.
diff --git a/HISTORY.md b/HISTORY.md
new file mode 100644
index 0000000..c389fd6
--- /dev/null
+++ b/HISTORY.md
@@ -0,0 +1,17 @@
+# History
+
+## 2.0.0 (2020-08-09)
+
+-   Make the article class more concrete
+-   Make collections iterable
+-   Add cached and lazy collections for different use cases
+
+## 0.2.0 (2018-08-12)
+
+-   Add support for all WOS fields.
+-   Add graph building support.
+-   Add a little cli for common tasks.
+
+## 0.1.1 (2018-05-10)
+
+-   First release on PyPI.
diff --git a/HISTORY.rst b/HISTORY.rst
deleted file mode 100644
index e5b3e00..0000000
--- a/HISTORY.rst
+++ /dev/null
@@ -1,15 +0,0 @@
-=======
-History
-=======
-
-0.2.0 (2018-08-12)
-------------------
-
-* Add support for all WOS fields.
-* Add graph building support.
-* Add a little cli for common tasks.
-
-0.1.1 (2018-05-10)
-------------------
-
-* First release on PyPI.
diff --git a/LICENSE b/LICENSE
index 40ceae2..8f2c7f5 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,6 +1,6 @@
 MIT License
 
-Copyright (c) 2018, Core of Science
+Copyright (c) 2018-2020, Core of Science
 
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
diff --git a/MANIFEST.in b/MANIFEST.in
index 965b2dd..a93c75c 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -1,11 +1,11 @@
-include AUTHORS.rst
-include CONTRIBUTING.rst
-include HISTORY.rst
+include AUTHORS.md
+include CONTRIBUTING.md
+include HISTORY.md
 include LICENSE
-include README.rst
+include README.md
 
 recursive-include tests *
 recursive-exclude * __pycache__
 recursive-exclude * *.py[co]
 
-recursive-include docs *.rst conf.py Makefile make.bat *.jpg *.png *.gif
+recursive-include docs *.txt
diff --git a/Makefile b/Makefile
index 741b84b..915fa5a 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,8 @@
-.PHONY: clean clean-test clean-pyc clean-build docs help
+.PHONY: clean clean-test clean-pyc clean-build docs help, test-watch
 .DEFAULT_GOAL := help
 
+NOTIFY_FILE := /tmp/pytest-$$(pwd | md5sum | cut -d " " -f 1)
+
 define BROWSER_PYSCRIPT
 import os, webbrowser, sys
 
@@ -54,10 +56,15 @@ lint: ## check style with flake8
 	flake8 wostools tests
 
 test: ## run tests quickly with the default Python
-	py.test
+	python -m pytest
 
-test-all: ## run tests on every Python version with tox
-	tox
+test-watch:
+	@ptw \
+		--ext "py,feature" \
+		--onpass "coverage report --skip-empty --skip-covered -m" \
+		--onfail "notify-send.sh -R $(NOTIFY_FILE) -i face-worried --hint int:transient:1 'Test failed' 'Ooops we have a problem, not all tests passed'" \
+		--onexit "notify-send.sh -R $(NOTIFY_FILE) -i media-playback-stop --hint int:transient:1 'Test runner stopped' 'Just so you know, the test runner stopped'" \
+		--runner "coverage run --source wostools -m pytest" \
 
 coverage: ## check code coverage quickly with the default Python
 	coverage run --source wostools -m pytest
@@ -65,17 +72,6 @@ coverage: ## check code coverage quickly with the default Python
 	coverage html
 	$(BROWSER) htmlcov/index.html
 
-docs: ## generate Sphinx HTML documentation, including API docs
-	rm -f docs/wostools.rst
-	rm -f docs/modules.rst
-	sphinx-apidoc -o docs/ wostools
-	$(MAKE) -C docs clean
-	$(MAKE) -C docs html
-	$(BROWSER) docs/_build/html/index.html
-
-servedocs: docs ## compile the docs watching for changes
-	watchmedo shell-command -p '*.rst' -c '$(MAKE) -C docs html' -R -D .
-
 release: dist ## package and upload a release
 	twine upload dist/*
 
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..4223c5b
--- /dev/null
+++ b/README.md
@@ -0,0 +1,55 @@
+# Python WoS tools
+
+![Python package](https://github.com/coreofscience/python-wostools/workflows/Python%20package/badge.svg)
+[![image](https://img.shields.io/pypi/v/wostools.svg)](https://pypi.python.org/pypi/wostools)
+[![DOI: 10.5281/zenodo.1344261](https://zenodo.org/badge/94160457.svg)](https://zenodo.org/badge/latestdoi/94160457)
+
+Translates ISI Web of Knowledge files into python objects.
+
+## Quickstart
+
+Install the library by:
+
+```bash
+$ pip install wostools
+```
+
+Say you want to grab the title of all the articles in an ISI file, you
+can grab [this example file](docs/examples/bit-pattern-savedrecs.txt).
+
+```python
+>>> from wostools import CachedCollection
+>>> collection = CachedCollection.from_filenames('docs/examples/bit-pattern-savedrecs.txt')
+>>> for article in collection:
+...     print(article.title)
+In situ grazing incidence small-angle X-ray scattering study of solvent vapor annealing in lamellae-forming block copolymer thin films: Trade-off of defects in deswelling
+Structural control of ultra-fine CoPt nanodot arrays via electrodeposition process
+Porphyrin-based Pt/Pd-containing metallopolymers: Synthesis, characterization, optical property and potential application in bioimaging
+Syntheses and Controllable Self-Assembly of Luminescence Platinum(II) Plane-Coil Diblock Copolymers
+# ...
+```
+
+Never fear wostools cli is here. To help you do some common tasks right
+from your terminal.
+
+```bash
+$ wostools --help
+$ # To extract all the properties in a json file
+$ wostools to-json docs/examples/bit-pattern-savedrecs.txt --output=document.json
+```
+
+## Features
+
+-   Free software: MIT license
+-   Just parses an ISI Web of Knowledge file and produces a native
+    python object.
+-   Through the `CollectionLazy` object it can do this using the minimum
+    amount of memory it can possibly do.
+-   It has a cli to extract documents and citation pairs for you :smile:
+
+## Credits
+
+This package was created with
+[Cookiecutter](https://github.com/audreyr/cookiecutter) and the
+[audreyr/cookiecutter-pypackage](https://github.com/audreyr/cookiecutter-pypackage)
+project template.
diff --git a/README.rst b/README.rst
deleted file mode 100644
index b090cab..0000000
--- a/README.rst
+++ /dev/null
@@ -1,80 +0,0 @@
-================
-Python WoS tools
-================
-
-
-.. image:: https://pyup.io/repos/github/coreofscience/python-wostools/shield.svg
-     :target: https://pyup.io/repos/github/coreofscience/python-wostools/
-     :alt: Updates
-
-.. image:: https://img.shields.io/pypi/v/wostools.svg
-    :target: https://pypi.python.org/pypi/wostools
-
-.. image:: https://img.shields.io/travis/coreofscience/python-wostools.svg
-    :target: https://travis-ci.org/coreofscience/python-wostools
-
-.. image:: https://readthedocs.org/projects/python-wostools/badge/?version=latest
-    :target: https://python-wostools.readthedocs.io/en/latest/?badge=latest
-    :alt: Documentation Status
-
-.. image:: https://zenodo.org/badge/94160457.svg
-   :target: https://zenodo.org/badge/latestdoi/94160457
-   :alt: DOI: 10.5281/zenodo.1344261
-
-Translates ISI Web of Knowledge files into python objects.
-
-
-
-* Free software: MIT license
-* Documentation: https://python-wostools.readthedocs.io.
-
-
-Quickstart
-----------
-
-Install the library by:
-
-.. code-block:: bash
-
-   $ pip install wostools
-
-Say you want to grab the title of all the articles in an ISI file, you can grab
-`this example file`_.
-
-.. code-block:: python
-
-   >>> from wostools import CollectionLazy
-   >>> collection = CollectionLazy.from_filenames('docs/examples/bit-pattern-savedrecs.txt')
-   >>> for article in collection.articles:
-   ...     print(article.TI)
-   In situ grazing incidence small-angle X-ray scattering study of solvent vapor annealing in lamellae-forming block copolymer thin films: Trade-off of defects in deswelling
-   Structural control of ultra-fine CoPt nanodot arrays via electrodeposition process
-   Porphyrin-based Pt/Pd-containing metallopolymers: Synthesis, characterization, optical property and potential application in bioimaging
-   Syntheses and Controllable Self-Assembly of Luminescence Platinum(II) Plane-Coil Diblock Copolymers
-   # ...
-
-Never fear wostools cli is here. To help you do some common tasks right from
-your terminal.
-
-.. code-block:: bash
-
-   $ wostools --help
-   $ # To extract all the properties in a json file
-   $ wostools to-json docs/examples/bit-pattern-savedrecs.txt --output=document.json
-
-Features
---------
-
-* Just parses an ISI Web of Knowledge file and produces a native python object.
-* Through the :code:`CollectionLazy` object it can do this using the minimum
-  amount of memory it can possibly do.
-* It has a cli to extract documents and citation pairs for you :smile:
-
-Credits
--------
-
-This package was created with Cookiecutter_ and the `audreyr/cookiecutter-pypackage`_ project template.
-
-.. _Cookiecutter: https://github.com/audreyr/cookiecutter
-.. _`audreyr/cookiecutter-pypackage`: https://github.com/audreyr/cookiecutter-pypackage
-.. _`this example file`: docs/examples/bit-pattern-savedrecs.txt
diff --git a/docs/Makefile b/docs/Makefile
deleted file mode 100644
index ed385fe..0000000
--- a/docs/Makefile
+++ /dev/null
@@ -1,20 +0,0 @@
-# Minimal makefile for Sphinx documentation
-#
-
-# You can set these variables from the command line.
-SPHINXOPTS    =
-SPHINXBUILD   = python -msphinx
-SPHINXPROJ    = wostools
-SOURCEDIR     = .
-BUILDDIR      = _build
-
-# Put it first so that "make" without argument is like "make help".
-help:
-	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
-
-.PHONY: help Makefile
-
-# Catch-all target: route all unknown targets to Sphinx using the new
-# "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
-%: Makefile
-	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
diff --git a/docs/_static/.gitkeep b/docs/_static/.gitkeep
deleted file mode 100644
index e69de29..0000000
diff --git a/docs/authors.rst b/docs/authors.rst
deleted file mode 100644
index e122f91..0000000
--- a/docs/authors.rst
+++ /dev/null
@@ -1 +0,0 @@
-.. include:: ../AUTHORS.rst
diff --git a/docs/conf.py b/docs/conf.py
deleted file mode 100644
index a5e3a47..0000000
--- a/docs/conf.py
+++ /dev/null
@@ -1,160 +0,0 @@
-#!/usr/bin/env python
-#
-# wostools documentation build configuration file, created by
-# sphinx-quickstart on Fri Jun  9 13:47:02 2017.
-#
-# This file is execfile()d with the current directory set to its
-# containing dir.
-#
-# Note that not all possible configuration values are present in this
-# autogenerated file.
-#
-# All configuration values have a default; values that are commented out
-# serve to show the default.
-
-# If extensions (or modules to document with autodoc) are in another
-# directory, add these directories to sys.path here. If the directory is
-# relative to the documentation root, use os.path.abspath to make it
-# absolute, like shown here.
-#
-import os
-import sys
-
-sys.path.insert(0, os.path.abspath(".."))
-
-import wostools
-
-# -- General configuration ---------------------------------------------
-
-# If your documentation needs a minimal Sphinx version, state it here.
-#
-# needs_sphinx = '1.0'
-
-# Add any Sphinx extension module names here, as strings. They can be
-# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
-extensions = ["sphinx.ext.autodoc", "sphinx.ext.viewcode"]
-
-# Add any paths that contain templates here, relative to this directory.
-templates_path = ["_templates"]
-
-# The suffix(es) of source filenames.
-# You can specify multiple suffix as a list of string:
-#
-# source_suffix = ['.rst', '.md']
-source_suffix = ".rst"
-
-# The master toctree document.
-master_doc = "index"
-
-# General information about the project.
-project = u"Python WoS tools"
-copyright = u"2018, Core of Science"
-author = u"Core of Science"
-
-# The version info for the project you're documenting, acts as replacement
-# for |version| and |release|, also used in various other places throughout
-# the built documents.
-#
-# The short X.Y version.
-version = wostools.__version__
-# The full version, including alpha/beta/rc tags.
-release = wostools.__version__
-
-# The language for content autogenerated by Sphinx. Refer to documentation
-# for a list of supported languages.
-#
-# This is also used if you do content translation via gettext catalogs.
-# Usually you set "language" from the command line for these cases.
-language = None
-
-# List of patterns, relative to source directory, that match files and
-# directories to ignore when looking for source files.
-# This patterns also effect to html_static_path and html_extra_path
-exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]
-
-# The name of the Pygments (syntax highlighting) style to use.
-pygments_style = "sphinx"
-
-# If true, `todo` and `todoList` produce output, else they produce nothing.
-todo_include_todos = False
-
-
-# -- Options for HTML output -------------------------------------------
-
-# The theme to use for HTML and HTML Help pages.  See the documentation for
-# a list of builtin themes.
-#
-html_theme = "alabaster"
-
-# Theme options are theme-specific and customize the look and feel of a
-# theme further.  For a list of options available for each theme, see the
-# documentation.
-#
-# html_theme_options = {}
-
-# Add any paths that contain custom static files (such as style sheets) here,
-# relative to this directory. They are copied after the builtin static files,
-# so a file named "default.css" will overwrite the builtin "default.css".
-html_static_path = ["_static"]
-
-
-# -- Options for HTMLHelp output ---------------------------------------
-
-# Output file base name for HTML help builder.
-htmlhelp_basename = "wostoolsdoc"
-
-
-# -- Options for LaTeX output ------------------------------------------
-
-latex_elements = {
-    # The paper size ('letterpaper' or 'a4paper').
-    #
-    # 'papersize': 'letterpaper',
-    # The font size ('10pt', '11pt' or '12pt').
-    #
-    # 'pointsize': '10pt',
-    # Additional stuff for the LaTeX preamble.
-    #
-    # 'preamble': '',
-    # Latex figure (float) alignment
-    #
-    # 'figure_align': 'htbp',
-}
-
-# Grouping the document tree into LaTeX files. List of tuples
-# (source start file, target name, title, author, documentclass
-# [howto, manual, or own class]).
-latex_documents = [
-    (
-        master_doc,
-        "wostools.tex",
-        u"Python WoS tools Documentation",
-        u"Core of Science",
-        "manual",
-    )
-]
-
-
-# -- Options for manual page output ------------------------------------
-
-# One entry per manual page. List of tuples
-# (source start file, name, description, authors, manual section).
-man_pages = [(master_doc, "wostools", u"Python WoS tools Documentation", [author], 1)]
-
-
-# -- Options for Texinfo output ----------------------------------------
-
-# Grouping the document tree into Texinfo files. List of tuples
-# (source start file, target name, title, author,
-#  dir menu entry, description, category)
-texinfo_documents = [
-    (
-        master_doc,
-        "wostools",
-        u"Python WoS tools Documentation",
-        author,
-        "wostools",
-        "One line description of project.",
-        "Miscellaneous",
-    )
-]
diff --git a/docs/contributing.rst b/docs/contributing.rst
deleted file mode 100644
index e582053..0000000
--- a/docs/contributing.rst
+++ /dev/null
@@ -1 +0,0 @@
-.. include:: ../CONTRIBUTING.rst
diff --git a/docs/history.rst b/docs/history.rst
deleted file mode 100644
index 2506499..0000000
--- a/docs/history.rst
+++ /dev/null
@@ -1 +0,0 @@
-.. include:: ../HISTORY.rst
diff --git a/docs/index.rst b/docs/index.rst
deleted file mode 100644
index a157044..0000000
--- a/docs/index.rst
+++ /dev/null
@@ -1,20 +0,0 @@
-Welcome to Python WoS tools's documentation!
-============================================
-
-.. toctree::
-   :maxdepth: 2
-   :caption: Contents:
-
-   readme
-   installation
-   usage
-   modules
-   contributing
-   authors
-   history
-
-Indices and tables
-==================
-* :ref:`genindex`
-* :ref:`modindex`
-* :ref:`search`
diff --git a/docs/installation.rst b/docs/installation.rst
deleted file mode 100644
index 64f416c..0000000
--- a/docs/installation.rst
+++ /dev/null
@@ -1,51 +0,0 @@
-.. highlight:: shell
-
-============
-Installation
-============
-
-
-Stable release
---------------
-
-To install Python WoS tools, run this command in your terminal:
-
-.. code-block:: console
-
-    $ pip install wostools
-
-This is the preferred method to install Python WoS tools, as it will always install the most recent stable release.
-
-If you don't have `pip`_ installed, this `Python installation guide`_ can guide
-you through the process.
-
-.. _pip: https://pip.pypa.io
-.. _Python installation guide: http://docs.python-guide.org/en/latest/starting/installation/
-
-
-From sources
-------------
-
-The sources for Python WoS tools can be downloaded from the `Github repo`_.
-
-You can either clone the public repository:
-
-.. code-block:: console
-
-    $ git clone git://github.com/coreofscience/python-wostools
-
-Or download the `tarball`_:
-
-.. code-block:: console
-
-    $ curl  -OL https://github.com/coreofscience/python-wostools/tarball/master
-
-Once you have a copy of the source, you can install it with:
-
-.. code-block:: console
-
-    $ python setup.py install
-
-
-.. _Github repo: https://github.com/coreofscience/python-wostools
-.. _tarball: https://github.com/coreofscience/python-wostools/tarball/master
diff --git a/docs/make.bat b/docs/make.bat
deleted file mode 100644
index d9d9628..0000000
--- a/docs/make.bat
+++ /dev/null
@@ -1,36 +0,0 @@
-@ECHO OFF
-
-pushd %~dp0
-
-REM Command file for Sphinx documentation
-
-if "%SPHINXBUILD%" == "" (
-	set SPHINXBUILD=python -msphinx
-)
-set SOURCEDIR=.
-set BUILDDIR=_build
-set SPHINXPROJ=wostools
-
-if "%1" == "" goto help
-
-%SPHINXBUILD% >NUL 2>NUL
-if errorlevel 9009 (
-	echo.
-	echo.The Sphinx module was not found. Make sure you have Sphinx installed,
-	echo.then set the SPHINXBUILD environment variable to point to the full
-	echo.path of the 'sphinx-build' executable. Alternatively you may add the
-	echo.Sphinx directory to PATH.
-	echo.
-	echo.If you don't have Sphinx installed, grab it from
-	echo.http://sphinx-doc.org/
-	exit /b 1
-)
-
-%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
-goto end
-
-:help
-%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
-
-:end
-popd
diff --git a/docs/modules.rst b/docs/modules.rst
deleted file mode 100644
index e69dabc..0000000
--- a/docs/modules.rst
+++ /dev/null
@@ -1,7 +0,0 @@
-wostools
-========
-
-.. toctree::
-   :maxdepth: 4
-
-   wostools
diff --git a/docs/readme.rst b/docs/readme.rst
deleted file mode 100644
index 72a3355..0000000
--- a/docs/readme.rst
+++ /dev/null
@@ -1 +0,0 @@
-.. include:: ../README.rst
diff --git a/docs/usage.rst b/docs/usage.rst
deleted file mode 100644
index 2a60eb4..0000000
--- a/docs/usage.rst
+++ /dev/null
@@ -1,7 +0,0 @@
-=====
-Usage
-=====
-
-To use Python WoS tools in a project::
-
-    import wostools
diff --git a/docs/wostools.rst b/docs/wostools.rst
deleted file mode 100644
index 29a30be..0000000
--- a/docs/wostools.rst
+++ /dev/null
@@ -1,38 +0,0 @@
-wostools package
-================
-
-Submodules
-----------
-
-wostools.cli module
--------------------
-
-.. automodule:: wostools.cli
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-wostools.fields module
-----------------------
-
-.. automodule:: wostools.fields
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-wostools.wostools module
-------------------------
-
-.. automodule:: wostools.wostools
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-
-Module contents
----------------
-
-.. automodule:: wostools
-    :members:
-    :undoc-members:
-    :show-inheritance:
diff --git a/requirements_dev.txt b/requirements_dev.txt
index 0e244dd..a77f5e9 100644
--- a/requirements_dev.txt
+++ b/requirements_dev.txt
@@ -1,7 +1,10 @@
-bumpversion==0.6.0
 flake8==3.8.3
 coverage==5.2.1
 Sphinx==3.1.2
 
 pytest==6.0.1
 pytest-runner==5.2
+pytest-watch==4.2.0
+pytest-bdd==3.4.0
+
+dataclasses==0.7; python_version < "3.7"
diff --git a/setup.cfg b/setup.cfg
index ae3380f..aefb114 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,29 +1,15 @@
-[bumpversion]
-current_version = 1.1.0
-commit = True
-tag = True
-
-[bumpversion:file:setup.py]
-search = version='{current_version}'
-replace = {new_version}
-
-[bumpversion:file:.zenodo.json]
-search = v{current_version}
-replace = v{new_version}
-
-[bumpversion:file:wostools/__init__.py]
-search = __version__ = '{current_version}'
-replace = {new_version}
-
 [bdist_wheel]
 universal = 1
 
 [flake8]
 exclude = docs
+ignore = E203, E266, E501, W503
+max-line-length = 89
+max-complexity = 18
+select = B,C,E,F,W,T4,B9
 
 [aliases]
 test = pytest
 
 [tool:pytest]
 collect_ignore = ['setup.py']
-
diff --git a/setup.py b/setup.py
index 178c384..9d07288 100644
--- a/setup.py
+++ b/setup.py
@@ -2,19 +2,20 @@
 
 """The setup script."""
 
-from setuptools import setup, find_packages
+from setuptools import find_packages, setup
 
-with open("README.rst") as readme_file:
+with open("README.md") as readme_file:
     readme = readme_file.read()
 
-with open("HISTORY.rst") as history_file:
+with open("HISTORY.md") as history_file:
     history = history_file.read()
 
 requirements = ["Click>=7.0"]
 
 setup_requirements = ["pytest-runner"]
 
-test_requirements = ["pytest"]
+
+test_requirements = ["pytest", "pytest-bdd", 'dataclasses; python_version<"3.7"']
 
 setup(
     author="Core of Science",
@@ -36,11 +37,12 @@
     include_package_data=True,
     keywords="wostools",
     name="wostools",
-    packages=find_packages(include=["wostools"]),
+    packages=find_packages(include=["wostools", "wostools.*"]),
     setup_requires=setup_requirements,
     test_suite="tests",
     tests_require=test_requirements,
     url="https://github.com/coreofscience/python-wostools",
-    version="1.1.0",
+    version="2.0.0",
     zip_safe=False,
+    long_description_content_type="text/markdown",
 )
diff --git a/tests/conftest.py b/tests/conftest.py
index 7f64296..64a4386 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -2,7 +2,7 @@
 Configuration file for python-wostools tests.
 """
 
-from wostools import Article, CollectionLazy
+from wostools import Article, LazyCollection, CachedCollection
 
 import pytest
 import io
@@ -91,7 +91,7 @@ def article():
         "ER"
     )
     article_text = file.read()
-    return Article(article_text)
+    return Article.from_isi_text(article_text)
 
 
 @pytest.fixture
@@ -104,11 +104,11 @@ def filename_many_documents():
     return "docs/examples/bit-pattern-savedrecs.txt"
 
 
-@pytest.fixture
-def collection_single_document(filename_single_document):
-    return CollectionLazy.from_filenames(filename_single_document)
+@pytest.fixture(params=[CachedCollection, LazyCollection])
+def collection_single_document(request, filename_single_document):
+    return request.param.from_filenames(filename_single_document)
 
 
-@pytest.fixture
-def collection_many_documents(filename_many_documents):
-    return CollectionLazy.from_filenames(filename_many_documents)
+@pytest.fixture(params=[CachedCollection, LazyCollection])
+def collection_many_documents(request, filename_many_documents):
+    return request.param.from_filenames(filename_many_documents)
diff --git a/tests/features/article.feature b/tests/features/article.feature
new file mode 100644
index 0000000..d156351
--- /dev/null
+++ b/tests/features/article.feature
@@ -0,0 +1,57 @@
+Feature: Article manager class
+
+   Allows the user to parse and sort of dump articles
+
+   Scenario: Computing an article's label
+      Given an article with authors, year and journal
+      When I compute the label for the article
+      Then the label is a proper string
+
+   Scenario Outline: Fail to compute a label
+      Given a complete article missing <field>
+      When I try to compute the label for the article
+      Then There's an error computing the label
+
+      Examples:
+         | field   |
+         | year    |
+         | authors |
+         | journal |
+
+   Scenario: Merge two articles
+      Given a complete article
+      And theres a similar article that includes a doi
+
+      When I merge the two articles
+      And I try to compute the label for the article
+
+      Then the article's doi matches the other
+      And there's no error computing the label
+      And the label contains the doi of the other
+
+   Scenario: Parse article from isi text
+      Given some valid isi text
+      When I create an article from the isi text
+      Then the values in the isi text are part of the article
+      And the isi text itself is part of the articles sources
+
+   Scenario: Parse article from invalid isi text
+      Given some isi text with invalid lines
+      When I create an article from the isi text
+      Then an invalid line error is risen
+
+   Scenario: Turn an article to dict
+      Given a reference article
+      When I turn the article into a dict
+      Then I get a reference dict of values
+
+   Scenario: Parse article from citation
+      Given some valid isi citation
+      When I create an article from the citation
+      Then the values of the citation are part of the article
+      And the citation itself is part of the articles sources
+
+   Scenario: Parse article from an invalid citation
+      Given some invalid isi citation
+      When I create an article from the citation
+      Then an invalid reference error is risen
\ No newline at end of file
diff --git a/tests/features/cached.feature b/tests/features/cached.feature
new file mode 100644
index 0000000..7aebb9a
--- /dev/null
+++ b/tests/features/cached.feature
@@ -0,0 +1,50 @@
+Feature: cached collection
+
+   We want this kind of collection to avoid duplication at all costs
+
+   Scenario: preheat cache
+
+      Given some valid isi text
+      When I create a collection from that text
+      Then the collection's cache is preheated
+
+   Scenario: collection list articles and references
+
+      Given a valid collection
+      When I iterate over the collection
+      Then all articles and references are present
+
+   Scenario: list authors
+
+      Given a valid collection
+      When I iterate over the collection authors
+      Then all authors are included
+      And the author list include duplicates
+
+   Scenario: list coauthors
+
+      Given a valid collection
+      When I iterate over the collection coauthors
+      Then all coauthor pairs are included
+      And the coauthor list include duplicates
+
+   Scenario: duplicated articles are removed
+
+      Given some valid isi text
+      When I create a collection from that text
+      And I create a collection from that text
+      Then both collections have the same number of articles
+
+   Scenario: citation pairs
+
+      Given a valid collection
+      When I list the collection's citation pairs
+      Then all citation pairs are included
+
+   Scenario: citation pairs include complete info from references
+
+      Given some valid isi text
+      And a diferent isi record that references the former
+      When I create a collection from that text
+      And I list the collection's citation pairs
+      Then the citation always include all the available data
\ No newline at end of file
diff --git a/tests/test_article.py b/tests/test_article.py
new file mode 100644
index 0000000..ec5a7ff
--- /dev/null
+++ b/tests/test_article.py
@@ -0,0 +1,353 @@
+from copy import deepcopy
+from dataclasses import dataclass
+from typing import Dict, Optional
+
+from pytest import fixture
+from pytest_bdd import given, parsers, scenarios, then, when
+
+from wostools.article import Article
+from wostools.exceptions import InvalidIsiLine, InvalidReference
+
+from wostools._testutils import Context
+
+ISI_TEMPLATE = """
+PT J
+AU {author}
+   {second_author}
+AF {author}
+   {second_author}
+TI {title}
+SO JOURNAL OF MAGNETISM AND MAGNETIC MATERIALS
+LA English
+DT Article
+DE Electrodeposition; Structural control; Nanodot array; Bit-patterned
+   media; CoPt alloy
+ID BIT-PATTERNED MEDIA; ELECTRON-BEAM LITHOGRAPHY; RECORDING MEDIA;
+   MAGNETIC MEDIA; DENSITY; FILMS; ANISOTROPY; STORAGE
+AB CoPt nanodot arrays were fabricated by combining electrodeposition and electron beam lithography (EBL) for the use of bit-patterned media (BPM). To achieve precise control of deposition uniformity and coercivity of the CoPt nanodot arrays, their crystal structure and magnetic properties were controlled by controlling the diffusion state of metal ions from the initial deposition stage with the application of bath agitation. Following bath agitation, the composition gradient of the CoPt alloy with thickness was mitigated to have a near-ideal alloy composition of Co:Pt =80:20, which induces epitaxial-like growth from Ru substrate, thus resulting in the improvement of the crystal orientation of the hcp (002) structure from its initial deposition stages. Furthermore, the cross-sectional transmission electron microscope (TEM) analysis of the nanodots deposited with bath agitation showed CoPt growth along its c-axis oriented in the perpendicular direction, having uniform lattice fringes on the hcp (002) plane from the Ru underlayer interface, which is a significant factor to induce perpendicular magnetic anisotropy. Magnetic characterization of the CoPt nanodot arrays showed increase in the perpendicular coercivity and squareness of the hysteresis loops from 2.0 kOe and 0.64 (without agitation) to 4.0 kOe and 0.87 with bath agitation. Based on the detailed characterization of nanodot arrays, the precise crystal structure control of the nanodot arrays with ultra-high recording density by electrochemical process was successfully demonstrated.
+C1 [Wodarz, Siggi; Homma, Takayuki] Waseda Univ, Dept Appl Chem, Shinjuku Ku, Tokyo 1698555, Japan.
+   [Hasegawa, Takashi; Ishio, Shunji] Akita Univ, Dept Mat Sci, Akita 0108502, Japan.
+RP Homma, T (reprint author), Waseda Univ, Dept Appl Chem, Shinjuku Ku, Tokyo 1698555, Japan.
+EM t.homma@waseda.jp
+OI Hasegawa, Takashi/0000-0002-8178-4980
+FU JSPS KAKENHI Grant [25249104]
+FX This work was supported in part by JSPS KAKENHI Grant Number 25249104.
+CR Albrecht TR, 2013, IEEE T MAGN, V49, P773, DOI 10.1109/TMAG.2012.2227303
+   BUSCHOW KHJ, 1983, J MAGN MAGN MATER, V38, P1, DOI 10.1016/0304-8853(83)90097-5
+   Gapin AI, 2006, J APPL PHYS, V99, DOI 10.1063/1.2163289
+   Homma Takayuki, 2015, ECS Transactions, V64, P1, DOI 10.1149/06431.0001ecst
+   Kryder MH, 2008, P IEEE, V96, P1810, DOI 10.1109/JPROC.2008.2004315
+   Kubo T, 2005, J APPL PHYS, V97, DOI 10.1063/1.1855572
+   Lodder JC, 2004, J MAGN MAGN MATER, V272, P1692, DOI 10.1016/j.jmmm.2003.12.259
+   Mitsuzuka K, 2007, IEEE T MAGN, V43, P2160, DOI 10.1109/TMAG.2007.893129
+   Ouchi T, 2010, ELECTROCHIM ACTA, V55, P8081, DOI 10.1016/j.electacta.2010.02.073
+   Pattanaik G, 2006, J APPL PHYS, V99, DOI 10.1063/1.2150805
+   Pattanaik G, 2007, ELECTROCHIM ACTA, V52, P2755, DOI 10.1016/j.electacta.2006.07.062
+   Piramanayagam SN, 2009, J MAGN MAGN MATER, V321, P485, DOI 10.1016/j.jmmm.2008.05.007
+   Ross CA, 2008, MRS BULL, V33, P838, DOI 10.1557/mrs2008.179
+   Shiroishi Y, 2009, IEEE T MAGN, V45, P3816, DOI 10.1109/TMAG.2009.2024879
+   Sirtori V, 2011, ACS APPL MATER INTER, V3, P1800, DOI 10.1021/am200267u
+   Sohn JS, 2009, NANOTECHNOLOGY, V20, DOI 10.1088/0957-4484/20/2/025302
+   Sun SH, 2000, SCIENCE, V287, P1989, DOI 10.1126/science.287.5460.1989
+   Terris BD, 2007, MICROSYST TECHNOL, V13, P189, DOI 10.1007/s00542-006-0144-9
+   Wang JP, 2008, P IEEE, V96, P1847, DOI 10.1109/JPROC.2008.2004318
+   Weller D, 1999, IEEE T MAGN, V35, P4423, DOI 10.1109/20.809134
+   Weller D, 2000, IEEE T MAGN, V36, P10, DOI 10.1109/20.824418
+   Wodarz S, 2016, ELECTROCHIM ACTA, V197, P330, DOI 10.1016/j.electacta.2015.11.136
+   Xu X, 2012, J ELECTROCHEM SOC, V159, pD240, DOI 10.1149/2.090204jes
+   Yang X, 2007, J VAC SCI TECHNOL B, V25, P2202, DOI 10.1116/1.2798711
+   Yang XM, 2009, ACS NANO, V3, P1844, DOI 10.1021/nn900073r
+   Yasui N, 2003, APPL PHYS LETT, V83, P3347, DOI 10.1063/1.1622787
+   Yua H., 2009, J APPL PHYS, V105
+   Zhu JG, 2008, IEEE T MAGN, V44, P125, DOI 10.1109/TMAG.2007.911031
+NR 28
+TC 0
+Z9 0
+U1 21
+U2 21
+PU ELSEVIER SCIENCE BV
+PI AMSTERDAM
+PA PO BOX 211, 1000 AE AMSTERDAM, NETHERLANDS
+SN 0304-8853
+EI 1873-4766
+J9 {journal}
+JI J. Magn. Magn. Mater.
+PD MAY 15
+PY {year}
+VL {volume}
+BP {page}
+EP 58
+DI {doi}
+PG 7
+WC Materials Science, Multidisciplinary; Physics, Condensed Matter
+SC Materials Science; Physics
+GA EP2GP
+UT WOS:000397201600008
+ER
+""".strip()
+
+
+@dataclass
+class ArticleWrapper:
+    article: Optional[Article]
+    label: Optional[str] = None
+
+
+scenarios("features/article.feature")
+
+
+@fixture
+def attributes():
+    return {
+        "title": "some title",
+        "author": "John Doe",
+        "second_author": "Jane Doe",
+        "authors": ["John Doe", "Jane Doe"],
+        "year": 1994,
+        "page": "1330-5",
+        "journal": "J MAGN MAGN MATER",
+        "volume": "1000",
+        "doi": "10.1016/j.jmmm.2017.01.061",
+    }
+
+
+@fixture
+def citation_attributes():
+    # Kryder MH, 2008, P IEEE, V96, P1810, DOI 10.1109/JPROC.2008.2004315
+    return {
+        "author": "L Antuan",
+        "year": "2008",
+        "journal": "P IEEE",
+        "volume": "69",
+        "page": "1810",
+        "doi": "DOI 10.1109/JPROC.2008.2004315",
+    }
+
+
+@fixture
+def label_context() -> Context[str]:
+    return Context()
+
+
+@fixture
+def parse_context() -> Context[Article]:
+    return Context()
+
+
+@fixture
+def citation_parse_context() -> Context[Article]:
+    return Context()
+
+
+@fixture
+def to_dict_context() -> Context[Dict]:
+    return Context()
+
+
+@given("a complete article missing <field>", target_fixture="wrapper")
+def article_missing(field: str):
+    article = Article(
+        title=None, authors=["L, Robertson"], year=1999, journal="Science"
+    )
+    setattr(article, field, None)
+    return ArticleWrapper(article=article)
+
+
+@given("a complete article", target_fixture="wrapper")
+@given("an article with authors, year and journal", target_fixture="wrapper")
+def article_with_authors_year_and_journal():
+    return ArticleWrapper(
+        article=Article(
+            title=None, authors=["L, Robertson"], year=1999, journal="Science"
+        ),
+        label="L Robertson, 1999, Science",
+    )
+
+
+@given("theres a similar article that includes a doi", target_fixture="other")
+def similar_article_with_doi(wrapper: ArticleWrapper):
+    assert wrapper.article, "missing article to copy"
+    article = deepcopy(wrapper.article)
+    article.doi = "somedoi/123"
+    if wrapper.label:
+        return ArticleWrapper(
+            article=article, label=", ".join([wrapper.label, article.doi]),
+        )
+    return ArticleWrapper(article=article)
+
+
+@given("some valid isi text", target_fixture="isi_text")
+def valid_isi_text(attributes):
+    return ISI_TEMPLATE.format(**attributes)
+
+
+@given("some isi text with invalid lines", target_fixture="isi_text")
+def invalid_lines_in_isi_text(attributes):
+    return """
+    INVALIDKEY This value is going to die
+    """.strip()
+
+
+@given("some invalid isi citation", target_fixture="isi_citation")
+def invalid_isi_citation():
+    return "Da Lambert, Hello"
+
+
+@given("some valid isi citation", target_fixture="isi_citation")
+def valid_isi_citation(citation_attributes):
+    return "{author}, {year}, {journal}, V{volume}, P{page}, DOI {doi}".format(
+        **citation_attributes
+    )
+
+
+@given("a reference article", target_fixture="wrapper")
+def reference_article(attributes):
+    return ArticleWrapper(
+        article=Article(
+            title=attributes.get("title"),
+            authors=attributes.get("authors"),
+            year=attributes.get("year"),
+            journal=attributes.get("journal"),
+            volume=attributes.get("volume"),
+            page=attributes.get("page"),
+            doi=attributes.get("doi"),
+            references=attributes.get("references"),
+            keywords=attributes.get("keywords"),
+            sources=attributes.get("sources"),
+            extra=attributes.get("extra"),
+        )
+    )
+
+
+@when("I merge the two articles")
+def merge_articles(wrapper: ArticleWrapper, other: ArticleWrapper):
+    assert wrapper.article, "Missing article for this step"
+    assert other.article, "Missing other article for this step"
+    wrapper.article = wrapper.article.merge(other.article)
+    wrapper.label = None
+
+
+@when("I try to compute the label for the article")
+@when("I compute the label for the article")
+def try_to_compute_label(label_context: Context[str], wrapper: ArticleWrapper):
+    assert wrapper.article, "Missing article for this step"
+    with label_context.capture():
+        label_context.push(wrapper.article.label)
+
+
+@when("I turn the article into a dict")
+def try_to_go_to_dict(wrapper: ArticleWrapper, to_dict_context: Context[Dict]):
+    assert wrapper.article, "Missing article for this step"
+    with to_dict_context.capture():
+        to_dict_context.push(wrapper.article.to_dict())
+
+
+@when("I create an article from the isi text")
+def create_article_from_isi_text(isi_text, parse_context: Context[Article]):
+    assert isi_text, "define some isi text to parse"
+    with parse_context.capture():
+        parse_context.push(Article.from_isi_text(isi_text))
+
+
+@when("I create an article from the citation")
+def create_article_from_citation(
+    isi_citation, citation_parse_context: Context[Article]
+):
+    assert isi_citation, "define some isi citation to parse"
+    with citation_parse_context.capture():
+        citation_parse_context.push(Article.from_isi_citation(isi_citation))
+
+
+@then("the label is a proper string")
+def then_label_is_a_proper_string(label_context: Context[str], wrapper: ArticleWrapper):
+    with label_context.assert_data() as label:
+        assert label == wrapper.label
+
+
+@then("the label contains the doi of the other")
+def label_matches_other(label_context: Context[str], other: ArticleWrapper):
+    with label_context.assert_data() as label:
+        assert (
+            other.article and other.article.doi
+        ), "There's no doi in the other article"
+        assert other.article.doi in label
+
+
+@then("There's no error computing the label")
+@then("there's no error computing the label")
+def no_error_computing_label(label_context: Context[str]):
+    with label_context.assert_data():
+        pass
+
+
+@then("There's an error computing the label")
+def error_computing_label(label_context: Context[str]):
+    with label_context.assert_error() as error:
+        assert isinstance(error, ValueError)
+
+
+@then(parsers.parse("the article matches the {field:w} of the other"))
+@then(parsers.parse("the article's {field:w} matches the other"))
+def contais_others_field(wrapper: ArticleWrapper, other: ArticleWrapper, field: str):
+    assert wrapper.article
+    assert other.article
+    assert getattr(wrapper.article, field) == getattr(other.article, field)
+
+
+@then("the values in the isi text are part of the article")
+def values_make_it_to_the_article(parse_context: Context[Article], attributes: dict):
+    with parse_context.assert_data() as article:
+        for field in [
+            "title",
+            "authors",
+            "year",
+            "page",
+            "journal",
+            "volume",
+            "doi",
+        ]:
+            assert getattr(article, field)
+            assert getattr(article, field) == attributes[field]
+
+
+@then("the values of the citation are part of the article")
+def citation_values_make_it_to_article(
+    citation_parse_context: Context[Article], citation_attributes: dict
+):
+    with citation_parse_context.assert_data() as article:
+        assert article.authors == [citation_attributes["author"]]
+        for field in ["year", "journal", "page", "volume", "doi"]:
+            assert str(getattr(article, field)) == citation_attributes[field]
+
+
+@then("the isi text itself is part of the articles sources")
+def isi_text_in_sources(parse_context: Context[Article], isi_text: str):
+    assert parse_context.data, "no article parsed yet"
+    assert isi_text in parse_context.data.sources
+
+
+@then("the citation itself is part of the articles sources")
+def citation_in_sources(citation_parse_context: Context[Article], isi_citation: str):
+    with citation_parse_context.assert_data() as article:
+        assert isi_citation in article.sources
+
+
+@then("an invalid line error is risen")
+def invialid_isi_line_risen(parse_context: Context[Article]):
+    with parse_context.assert_error() as error:
+        assert isinstance(error, InvalidIsiLine)
+
+
+@then("an invalid reference error is risen")
+def invialid_reference_risen(citation_parse_context: Context[Article]):
+    with citation_parse_context.assert_error() as error:
+        assert isinstance(error, InvalidReference)
+
+
+@then("I get a reference dict of values")
+def get_a_reference_dict(to_dict_context: Context[Dict], attributes: Dict):
+    with to_dict_context.assert_data() as article_dict:
+        assert any(article_dict.values()), "your dict has no values son"
+        for key, value in article_dict.items():
+            assert not value or key in attributes
+            assert not value or value == attributes[key]
diff --git a/tests/test_collection_cached.py b/tests/test_collection_cached.py
new file mode 100644
index 0000000..0ef0530
--- /dev/null
+++ b/tests/test_collection_cached.py
@@ -0,0 +1,390 @@
+import io
+from typing import Collection, List, Dict, Tuple
+
+from pytest import fixture
+from pytest_bdd import scenarios, given, when, then
+
+from wostools import CachedCollection, Article
+from wostools._testutils import Context
+
+ISI_TEXT = """
+FN Thomson Reuters Web of Science™
+VR 1.0
+PT J
+AU Sun, ZW
+   Russell, TP
+AF Sun, Zhiwei
+   Russell, Thomas P.
+TI In situ grazing incidence small-angle X-ray scattering study of solvent
+   vapor annealing in lamellae-forming block copolymer thin films:
+   Trade-off of defects in deswelling
+SO JOURNAL OF POLYMER SCIENCE PART B-POLYMER PHYSICS
+LA English
+DT Article
+DE annealing; block copolymers; self-assembly; thin films; X-ray
+ID BIT-PATTERNED MEDIA; LITHOGRAPHY; GRAPHENE; ARRAYS; ORIENTATION;
+   NANOWIRES; PARALLEL; BEHAVIOR; INPLANE; DENSITY
+AB Solvent vapor annealing (SVA) is one route to prepare block copolymer (BCP) thin films with long-range lateral ordering. The lattice defects in the spin-coated BCP thin film can be effectively and rapidly reduced using SVA. The solvent evaporation after annealing was shown to have a significant impact on the in-plane ordering of BCP microdomains. However, the effect of solvent evaporation on the out-of-plane defects in BCPs has not been considered. Using grazing-incidence x-ray scattering, the morphology evolution of lamellae-forming poly(2-vinlypyridine)-b-polystyrene-b-poly(2vinylpyridine) triblock copolymers, having lamellar microdomains oriented normal to substrate surface during SVA, was studied in this work. A micelle to lamellae transformation was observed during solvent uptake. The influence of solvent swelling ratio and solvent removal rate on both the in-plane and out-of-plane defect density was studied. It shows that there is a trade-off between the in-plane and out-of-plane defect densities during solvent evaporation. (c) 2017 Wiley Periodicals, Inc. J. Polym. Sci., Part B: Polym. Phys. 2017, 55, 980-989
+C1 [Sun, Zhiwei; Russell, Thomas P.] Univ Massachusetts Amherst, Dept Polymer Sci & Engn, Amherst, MA 01003 USA.
+   [Russell, Thomas P.] Lawrence Berkeley Natl Lab, Div Mat Sci, Berkeley, CA 94720 USA.
+   [Russell, Thomas P.] Beijing Univ Chem Technol, Beijing Adv Innovat Ctr Soft Matter Sci & Engn, Beijing, Peoples R China.
+RP Russell, TP (reprint author), Univ Massachusetts Amherst, Dept Polymer Sci & Engn, Amherst, MA 01003 USA.; Russell, TP (reprint author), Lawrence Berkeley Natl Lab, Div Mat Sci, Berkeley, CA 94720 USA.; Russell, TP (reprint author), Beijing Univ Chem Technol, Beijing Adv Innovat Ctr Soft Matter Sci & Engn, Beijing, Peoples R China.
+EM russell@mail.pse.umass.edu
+FU U.S. Department of Energy BES [BES-DE-FG02-96ER45612]; Director of the
+   Office of Science, Office of Basic Energy Sciences, of the U.S.
+   Department of Energy [DE-AC02-05CH11231]; Office of Science, Office of
+   Basic Energy Sciences, of the U.S. Department of Energy
+   [DE-AC02-05CH11231]
+FX The authors acknowledge the facility support in Advanced Light Source
+   and Molecular Foundry in Lawrence Berkeley National Laboratory. This
+   work was supported by the U.S. Department of Energy BES under contract
+   BES-DE-FG02-96ER45612. The GISAXS characterization in beamline 7.3.3 of
+   the Advanced Light Source is supported by the Director of the Office of
+   Science, Office of Basic Energy Sciences, of the U.S. Department of
+   Energy under contract no. DE-AC02-05CH11231. The SEM and AFM
+   characterization in the Molecular Foundry was supported by the Office of
+   Science, Office of Basic Energy Sciences, of the U.S. Department of
+   Energy under contract no. DE-AC02-05CH11231.
+CR Bai W, 2015, MACROMOLECULES, V48, P8574, DOI 10.1021/acs.macromol.5b02174
+   Bosworth JK, 2011, MACROMOLECULES, V44, P9196, DOI 10.1021/ma201967a
+   Bosworth JK, 2010, J PHOTOPOLYM SCI TEC, V23, P145, DOI 10.2494/photopolymer.23.145
+   Chai J, 2008, ACS NANO, V2, P489, DOI 10.1021/nn700341s
+   Chai J, 2007, NAT NANOTECHNOL, V2, P500, DOI 10.1038/nnano.2007.227
+   Choi S, 2012, SOFT MATTER, V8, P3463, DOI 10.1039/c2sm07297a
+   Di ZY, 2012, MACROMOLECULES, V45, P5185, DOI 10.1021/ma3004136
+   Farrell RA, 2012, NANOSCALE, V4, P3228, DOI 10.1039/c2nr00018k
+   Gowd E. B., 2010, IOP C SER MAT SCI EN, V14
+   Gu XD, 2014, ADV MATER, V26, P273, DOI 10.1002/adma.201302562
+   Gunkel I, 2016, J POLYM SCI POL PHYS, V54, P331, DOI 10.1002/polb.23933
+   Ilavsky J, 2012, J APPL CRYSTALLOGR, V45, P324, DOI 10.1107/S0021889812004037
+   Jeong SJ, 2010, NANO LETT, V10, P3500, DOI 10.1021/nl101637f
+   Ji S, 2008, MACROMOLECULES, V41, P9098, DOI 10.1021/ma801861h
+   Khaira GS, 2014, ACS MACRO LETT, V3, P747, DOI 10.1021/mz5002349
+   Kikitsu A, 2013, IEEE T MAGN, V49, P693, DOI 10.1109/TMAG.2012.2226566
+   Kim BH, 2011, ADV MATER, V23, P5618, DOI 10.1002/adma.201103650
+   Kim BH, 2010, ACS NANO, V4, P5464, DOI 10.1021/nn101491g
+   Kurihara M, 2013, JPN J APPL PHYS, V52, DOI 10.7567/JJAP.52.086201
+   Liu GX, 2012, ACS NANO, V6, P6786, DOI 10.1021/nn301515a
+   Mahadevapuram N, 2016, J POLYM SCI POL PHYS, V54, P339, DOI 10.1002/polb.23937
+   Paik MY, 2010, MACROMOLECULES, V43, P4253, DOI 10.1021/ma902646t
+   Sinturel C, 2014, ACS APPL MATER INTER, V6, P12146, DOI 10.1021/am504086x
+   Sun ZW, 2015, ADV MATER, V27, P4364, DOI 10.1002/adma.201501585
+   Vu T, 2011, MACROMOLECULES, V44, P6121, DOI 10.1021/ma2009222
+   Thurn-Albrecht T, 2000, SCIENCE, V290, P2126, DOI 10.1126/science.290.5499.2126
+   Wan L., 2012, MOEMS, V11, P31405
+   Wang JY, 2008, LANGMUIR, V24, P3545, DOI 10.1021/la703559q
+   Xiao S., 2013, MOEMS, V12
+   Xiao SG, 2014, ACS NANO, V8, P11854, DOI 10.1021/nn505630t
+   Xiao SG, 2014, J POLYM SCI POL PHYS, V52, P361, DOI 10.1002/polb.23433
+   Yamamoto R, 2014, IEEE T MAGN, V50, DOI 10.1109/TMAG.2013.2284474
+   Yang X., 2014, MOEMS, V13
+   Yang X., 2013, J MATER RES, V2013, P1
+   Yang XM, 2014, NANOTECHNOLOGY, V25, DOI 10.1088/0957-4484/25/39/395301
+   Yang XM, 2009, ACS NANO, V3, P1844, DOI 10.1021/nn900073r
+   Zhang JQ, 2014, MACROMOLECULES, V47, P5711, DOI 10.1021/ma500633b
+NR 37
+TC 0
+Z9 0
+U1 1
+U2 1
+PU WILEY
+PI HOBOKEN
+PA 111 RIVER ST, HOBOKEN 07030-5774, NJ USA
+SN 0887-6266
+EI 1099-0488
+J9 J POLYM SCI POL PHYS
+JI J. Polym. Sci. Pt. B-Polym. Phys.
+PD JUL 1
+PY 2017
+VL 55
+IS 13
+BP 980
+EP 989
+DI 10.1002/polb.24346
+PG 10
+WC Polymer Science
+SC Polymer Science
+GA EU7BQ
+UT WOS:000401190100002
+ER
+
+EF
+""".strip()
+
+ISI_TEXT_DIFFERENT_RECORD = """
+FN Thomson Reuters Web of Science™
+VR 1.0
+PT J
+AU Bosworth, JK
+   Dobisz, EA
+   Hellwig, O
+   Ruiz, R
+AF Bosworth, Joan K.
+   Dobisz, Elizabeth A.
+   Hellwig, Olav
+   Ruiz, Ricardo
+TI Impact of Out-of-Plane Translational Order in Block Copolymer
+   Lithography
+SO MACROMOLECULES
+LA English
+DT Article
+ID BIT-PATTERNED MEDIA; DENSITY MULTIPLICATION; TERNARY BLENDS; THIN-FILMS;
+   DIMENSIONS; ROUGHNESS; DOMAINS; SHAPES
+AB In block copolymer lithography, subtle distortions in the self-assembled domains, such as tilting or bending, have a strong impact on the quality of the lithographic features upon pattern transfer. We compared the feature size distribution observed at the top-surface of block copolymer thin films with the size distribution that the self-assembled structures project at the substrate interface, i.e., the lithographic image. We performed the comparison for films of perpendicularly oriented cylindrical block copolymer domains with various degrees of lateral order. We found that the size distribution of the projected image does not mimic the well-known Gaussian distribution observed at the top surface. Instead, the lithographic features display a skewed distribution with a long tail toward smaller feature dimensions, a shift of the median and a reduced number of transferred features. The distortions are more pronounced for films with shorter correlation lengths. We propose a simplified model that explains the observed shifts in the size distribution of the projected image by considering the tilting that cylinders undergo in the vicinity of dislocations. The presence of defects disrupting the in-plane orientational order riot only impacts the size distribution of the self-assembled features, but also induces nearby cylinder tilting and some general loss of out-of-plane translational order which, upon pattern transfer, is responsible for the observed distortions on the feature size distribution,
+C1 [Bosworth, Joan K.; Dobisz, Elizabeth A.; Hellwig, Olav; Ruiz, Ricardo] Hitachi Global Storage Technol, San Jose Res Ctr, San Jose, CA 95135 USA.
+RP Ruiz, R (reprint author), Hitachi Global Storage Technol, San Jose Res Ctr, 3403 Yerba Buena Rd, San Jose, CA 95135 USA.
+EM ricardo.ruiz@hitachigst.com
+OI Ruiz, Ricardo/0000-0002-1698-4281
+CR ALBRECHT T, 2009, NANOSCALE MAGNETIC M
+   BATES FS, 1990, ANNU REV PHYS CHEM, V41, P525, DOI 10.1146/annurev.pc.41.100190.002521
+   Black CT, 2007, IBM J RES DEV, V51, P605
+   Cheng JY, 2008, ADV MATER, V20, P3155, DOI 10.1002/adma.200800826
+   Cheng JY, 2010, ACS NANO, V4, P4815, DOI 10.1021/nn100686v
+   Detcheverry FA, 2010, MACROMOLECULES, V43, P3446, DOI 10.1021/ma902332h
+   Edwards EW, 2007, MACROMOLECULES, V40, P90, DOI 10.1021/ma0607564
+   Guarini KW, 2002, ADV MATER, V14, P1290, DOI 10.1002/1521-4095(20020916)14:18<1290::AID-ADMA1290>3.0.CO;2-N
+   Hammond MR, 2003, MACROMOLECULES, V36, P8712, DOI 10.1021/ma026001o
+   Harrison C, 2004, EUROPHYS LETT, V67, P800, DOI 10.1209/epl/i2004-10126-5
+   Harrison C, 2002, PHYS REV E, V66, DOI 10.1103/PhysRevE.66.011706
+   Hellwig O, 2010, APPL PHYS LETT, V96, DOI 10.1063/1.3293301
+   HO CS, 1983, IEEE T PATTERN ANAL, V5, P593
+   *INTRS, LITH
+   Ji SX, 2011, MACROMOLECULES, V44, P4291, DOI 10.1021/ma2005734
+   Kleman M., 2003, SOFT MATTER PHYS INT
+   LIU CC, 2010, J VAC SCI TECHNOL B, V34
+   Liu G, 2010, J VAC SCI TECHNOL B, V28
+   Nagpal U, 2011, ACS NANO, V5, P5673, DOI 10.1021/nn201335v
+   Ruiz R, 2008, PHYS REV B, V77, DOI 10.1103/PhysRevB.77.054204
+   Ruiz R, 2008, SCIENCE, V321, P936, DOI 10.1126/science.1157626
+   Segalman RA, 2005, MAT SCI ENG R, V48, P191, DOI 10.1016/j.mser.2004.12.003
+   Segalman RA, 2003, PHYS REV LETT, V91, DOI 10.1103/PhysRevLett.91.196101
+   Segalman RA, 2003, MACROMOLECULES, V36, P3272, DOI 10.1021/ma021367m
+   Stipe BC, 2010, NAT PHOTONICS, V4, P484, DOI 10.1038/nphoton.2010.90
+   Stoykovich MP, 2010, MACROMOLECULES, V43, P2334, DOI 10.1021/ma902494v
+   Stuen KO, 2009, MACROMOLECULES, V42, P5139, DOI 10.1021/ma900520v
+   Tada Y, 2009, POLYMER, V50, P4250, DOI 10.1016/j.polymer.2009.06.039
+   Welander AM, 2008, MACROMOLECULES, V41, P2759, DOI 10.1021/ma800056s
+   Welander AM, 2008, J VAC SCI TECHNOL B, V26, P2484, DOI 10.1116/1.2987963
+   Xiao SG, 2007, J VAC SCI TECHNOL B, V25, P1953, DOI 10.1116/1.2801860
+   Yang XM, 2009, ACS NANO, V3, P1844, DOI 10.1021/nn900073r
+NR 32
+TC 11
+Z9 11
+U1 4
+U2 22
+PU AMER CHEMICAL SOC
+PI WASHINGTON
+PA 1155 16TH ST, NW, WASHINGTON, DC 20036 USA
+SN 0024-9297
+J9 MACROMOLECULES
+JI Macromolecules
+PD DEC 13
+PY 2011
+VL 44
+IS 23
+BP 9196
+EP 9204
+DI 10.1021/ma201967a
+PG 9
+WC Polymer Science
+SC Polymer Science
+GA 855ZG
+UT WOS:000297604200016
+ER
+
+EF
+""".strip()
+
+scenarios("features/cached.feature")
+
+
+@fixture
+def collection_context() -> Context[CachedCollection]:
+    return Context()
+
+
+@fixture
+def iterate_collection_context() -> Context[List[Article]]:
+    return Context()
+
+
+@fixture
+def iterate_authors_collection_context() -> Context[List[str]]:
+    return Context()
+
+
+@fixture
+def iterate_coauthors_collection_context() -> Context[List[Tuple[str, str]]]:
+    return Context()
+
+
+@fixture
+def iterate_citation_pairs_collection_context() -> Context[
+    List[Tuple[Article, Article]]
+]:
+    return Context()
+
+
+@given("some valid isi text", target_fixture="isi_text")
+def valid_isi_text():
+    return [ISI_TEXT]
+
+
+@given("a diferent isi record that references the former", target_fixture="isi_text")
+def isi_text_different_record(isi_text):
+    return [*isi_text, ISI_TEXT_DIFFERENT_RECORD]
+
+
+@when("I create a collection from that text")
+def create_collection(isi_text, collection_context: Context[CachedCollection]):
+    with collection_context.capture():
+        collection = CachedCollection(*(io.StringIO(doc) for doc in isi_text))
+        collection_context.push(collection)
+    return collection_context
+
+
+@given("a valid collection")
+def context_valid_collection(collection_context):
+    collection = CachedCollection(io.StringIO(ISI_TEXT))
+    collection_context.push(collection)
+
+
+@then("the collection's cache is preheated")
+def the_collection_cache_is_preheated(collection_context: Context[CachedCollection]):
+    with collection_context.assert_data() as collection:
+        assert collection._cache
+
+
+@when("I iterate over the collection")
+def iterate_over_collection(
+    collection_context: Context[CachedCollection],
+    iterate_collection_context: Context[List[Article]],
+):
+    with collection_context.assert_data() as collection:
+        with iterate_collection_context.capture():
+            iterate_collection_context.push(list(collection))
+
+
+@then("all articles and references are present")
+def all_articles_and_references_are_present(
+    iterate_collection_context: Context[List[Article]],
+):
+    with iterate_collection_context.assert_data() as articles:
+        assert len(articles) == 38
+        for article in articles:
+            assert article
+            assert article.label
+
+
+@when("I iterate over the collection authors")
+def iterate_over_collection_authors(
+    collection_context: Context[CachedCollection],
+    iterate_authors_collection_context: Context[List[str]],
+):
+    with collection_context.assert_data() as collection:
+        with iterate_authors_collection_context.capture():
+            iterate_authors_collection_context.push(list(collection.authors))
+
+
+@then("all authors are included")
+@then("the author list include duplicates")
+def all_authors_included_even_duplicates(
+    iterate_authors_collection_context: Context[List[str]],
+):
+    with iterate_authors_collection_context.assert_data() as authors:
+        assert authors
+
+        authors_count: Dict[str, int] = {}
+        for author in authors:
+            authors_count[author] = authors_count.get(author, 0) + 1
+            assert author
+
+        for author, count in authors_count.items():
+            assert author in ISI_TEXT
+            assert count >= 1
+
+
+@when("I iterate over the collection coauthors")
+def iterate_over_collection_coauthors(
+    collection_context: Context[CachedCollection],
+    iterate_coauthors_collection_context: Context[List[Tuple[str, str]]],
+):
+    with collection_context.assert_data() as collection:
+        with iterate_coauthors_collection_context.capture():
+            iterate_coauthors_collection_context.push(list(collection.coauthors))
+
+
+@then("all coauthor pairs are included")
+@then("the coauthor list include duplicates")
+def all_coauthors_pairs_included_even_duplicates(
+    iterate_coauthors_collection_context: Context[List[Tuple[str, str]]],
+):
+    with iterate_coauthors_collection_context.assert_data() as coauthors:
+        assert coauthors
+
+        coauthors_count: Dict[Tuple[str, str], int] = {}
+        for pair in coauthors:
+            coauthors_count[pair] = coauthors_count.get(pair, 0) + 1
+
+            author, coauthor = pair
+            assert author
+            assert coauthor
+
+        for pair, count in coauthors_count.items():
+            author, coauthor = pair
+            assert author in ISI_TEXT
+            assert coauthor in ISI_TEXT
+            assert count >= 1
+
+
+@then("both collections have the same number of articles")
+def same_number_of_articles(collection_context: Context[CachedCollection]):
+
+    with collection_context.assert_data() as collection:
+        with collection_context.assert_history(1) as latest:
+            print(latest)
+            assert len(collection) == len(latest[0])
+
+
+@when("I list the collection's citation pairs")
+def list_collection_citation_pairs(
+    collection_context: Context[CachedCollection],
+    iterate_citation_pairs_collection_context: Context[List[Tuple[Article, Article]]],
+):
+    with collection_context.assert_data() as collection:
+        with iterate_citation_pairs_collection_context.capture():
+            iterate_citation_pairs_collection_context.push(
+                list(collection.citation_pairs())
+            )
+
+
+@then("all citation pairs are included")
+def all_citation_pairs_are_included(
+    iterate_citation_pairs_collection_context: Context[List[Tuple[Article, Article]]]
+):
+    with iterate_citation_pairs_collection_context.assert_data() as citation_pairs:
+        assert len(citation_pairs) == 37
+        for article, reference in citation_pairs:
+            assert isinstance(article, Article)
+            assert isinstance(reference, Article)
+
+
+@then("the citation always include all the available data")
+def iterate_over_citation_pairs_two_isi_files(
+    iterate_citation_pairs_collection_context: Context[List[Tuple[Article, Article]]]
+):
+    with iterate_citation_pairs_collection_context.assert_data() as citation_pairs:
+        assert len(citation_pairs) == 68
+
+        having_keywords = False
+        for article, reference in citation_pairs:
+            assert isinstance(article, Article)
+            assert isinstance(reference, Article)
+
+            if (
+                article.to_dict()["doi"] == "10.1002/polb.24346"
+                and reference.to_dict()["doi"] == "10.1021/ma201967a"
+            ):
+                having_keywords = bool(article.keywords and reference.keywords)
+
+        assert having_keywords
diff --git a/tests/test_fields.py b/tests/test_fields.py
new file mode 100644
index 0000000..04097ed
--- /dev/null
+++ b/tests/test_fields.py
@@ -0,0 +1,39 @@
+import pytest
+
+from wostools.fields import joined, delimited, integer, parse
+
+
+def test_joined_joins_sequences():
+    assert joined(["hello", "world"]) == "hello world"
+
+
+def test_delimited_split_strings():
+    assert delimited(["key; word;", "more; words"]) == ["key", "word", "more", "words"]
+
+
+def test_delimited_split_strings_no_semi_at_the_end():
+    assert delimited(["key; word", "more; words"]) == ["key", "word", "more", "words"]
+
+
+def test_integer_integer_makes_an_integer():
+    assert integer(["1"]) == 1
+
+
+def test_integer_raises_if_more_than_one_value_is_passed():
+    with pytest.raises(ValueError):
+        integer(["", ""])
+
+
+@pytest.mark.parametrize("header", ["VR", "FN"])
+def test_parse_ignores_headers(header):
+    assert parse(header, ["value", "value"]) == {}
+
+
+def test_parse_raises_on_unknown_fields():
+    with pytest.raises(ValueError):
+        assert parse("FG", ["value", "value"]) == {}
+
+
+def test_parse_raises_on_invalid_values():
+    with pytest.raises(ValueError):
+        assert parse("PY", ["1994b"]) == {}
diff --git a/tests/test_wostools.py b/tests/test_wostools.py
deleted file mode 100644
index 9db8f17..0000000
--- a/tests/test_wostools.py
+++ /dev/null
@@ -1,1007 +0,0 @@
-"""Tests for `wostools` package."""
-
-from click.testing import CliRunner
-
-from wostools import CollectionLazy
-from wostools import cli
-from wostools import Article
-import pytest
-import io
-
-
-def test_article_label(article):
-    """
-    Test label value of article.
-    """
-    assert article.label == (
-        "Wodarz S, 2017, J MAGN MAGN MATER, V430, P52, DOI 10.1016/j.jmmm.2017.01.061"
-    )
-
-
-def test_aliases(article):
-    if hasattr(article, "AB"):
-        assert article.AB == article.abstract
-    else:
-        with pytest.raises(AttributeError):
-            article.AB
-    if hasattr(article, "AF"):
-        assert article.AF == article.author_full_names
-    else:
-        with pytest.raises(AttributeError):
-            article.AF
-    if hasattr(article, "AR"):
-        assert article.AR == article.article_number
-    else:
-        with pytest.raises(AttributeError):
-            article.AR
-    if hasattr(article, "AU"):
-        assert article.AU == article.authors
-    else:
-        with pytest.raises(AttributeError):
-            article.AU
-    if hasattr(article, "BA"):
-        assert article.BA == article.book_authors
-    else:
-        with pytest.raises(AttributeError):
-            article.BA
-    if hasattr(article, "BE"):
-        assert article.BE == article.editors
-    else:
-        with pytest.raises(AttributeError):
-            article.BE
-    if hasattr(article, "BF"):
-        assert article.BF == article.book_authors_full_name
-    else:
-        with pytest.raises(AttributeError):
-            article.BF
-    if hasattr(article, "BN"):
-        assert article.BN == article.international_standard_book_number
-    else:
-        with pytest.raises(AttributeError):
-            article.BN
-    if hasattr(article, "BP"):
-        assert article.BP == article.beginning_page
-    else:
-        with pytest.raises(AttributeError):
-            article.BP
-    if hasattr(article, "BS"):
-        assert article.BS == article.book_series_subtitle
-    else:
-        with pytest.raises(AttributeError):
-            article.BS
-    if hasattr(article, "C1"):
-        assert article.C1 == article.author_address
-    else:
-        with pytest.raises(AttributeError):
-            article.C1
-    if hasattr(article, "CA"):
-        assert article.CA == article.group_authors
-    else:
-        with pytest.raises(AttributeError):
-            article.CA
-    if hasattr(article, "CL"):
-        assert article.CL == article.conference_location
-    else:
-        with pytest.raises(AttributeError):
-            article.CL
-    if hasattr(article, "CR"):
-        assert article.CR == article.cited_references
-    else:
-        with pytest.raises(AttributeError):
-            article.CR
-    if hasattr(article, "CR"):
-        assert article.CR == article.references
-    else:
-        with pytest.raises(AttributeError):
-            article.CR
-    if hasattr(article, "CR"):
-        assert article.CR == article.citations
-    else:
-        with pytest.raises(AttributeError):
-            article.CR
-    if hasattr(article, "CT"):
-        assert article.CT == article.conference_title
-    else:
-        with pytest.raises(AttributeError):
-            article.CT
-    if hasattr(article, "CY"):
-        assert article.CY == article.conference_date
-    else:
-        with pytest.raises(AttributeError):
-            article.CY
-    if hasattr(article, "DE"):
-        assert article.DE == article.author_keywords
-    else:
-        with pytest.raises(AttributeError):
-            article.DE
-    if hasattr(article, "DI"):
-        assert article.DI == article.digital_object_identifier
-    else:
-        with pytest.raises(AttributeError):
-            article.DI
-    if hasattr(article, "DT"):
-        assert article.DT == article.document_type
-    else:
-        with pytest.raises(AttributeError):
-            article.DT
-    if hasattr(article, "D2"):
-        assert article.D2 == article.book_digital_object_identifier
-    else:
-        with pytest.raises(AttributeError):
-            article.D2
-    if hasattr(article, "ED"):
-        assert article.ED == article.editors
-    else:
-        with pytest.raises(AttributeError):
-            article.ED
-    if hasattr(article, "EM"):
-        assert article.EM == article.email_address
-    else:
-        with pytest.raises(AttributeError):
-            article.EM
-    if hasattr(article, "EI"):
-        assert article.EI == article.eissn
-    else:
-        with pytest.raises(AttributeError):
-            article.EI
-    if hasattr(article, "EP"):
-        assert article.EP == article.ending_page
-    else:
-        with pytest.raises(AttributeError):
-            article.EP
-    if hasattr(article, "FU"):
-        assert article.FU == article.funding_agency_and_grant_number
-    else:
-        with pytest.raises(AttributeError):
-            article.FU
-    if hasattr(article, "FX"):
-        assert article.FX == article.funding_text
-    else:
-        with pytest.raises(AttributeError):
-            article.FX
-    if hasattr(article, "GA"):
-        assert article.GA == article.document_delivery_number
-    else:
-        with pytest.raises(AttributeError):
-            article.GA
-    if hasattr(article, "GP"):
-        assert article.GP == article.book_group_authors
-    else:
-        with pytest.raises(AttributeError):
-            article.GP
-    if hasattr(article, "HO"):
-        assert article.HO == article.conference_host
-    else:
-        with pytest.raises(AttributeError):
-            article.HO
-    if hasattr(article, "ID"):
-        assert article.ID == article.keywords_plus
-    else:
-        with pytest.raises(AttributeError):
-            article.ID
-    if hasattr(article, "ID"):
-        assert article.ID == article.keywords
-    else:
-        with pytest.raises(AttributeError):
-            article.ID
-    if hasattr(article, "IS"):
-        assert article.IS == article.issue
-    else:
-        with pytest.raises(AttributeError):
-            article.IS
-    if hasattr(article, "J9"):
-        assert article.J9 == article.source_abbreviation
-    else:
-        with pytest.raises(AttributeError):
-            article.J9
-    if hasattr(article, "JI"):
-        assert article.JI == article.iso_source_abbreviation
-    else:
-        with pytest.raises(AttributeError):
-            article.JI
-    if hasattr(article, "LA"):
-        assert article.LA == article.language
-    else:
-        with pytest.raises(AttributeError):
-            article.LA
-    if hasattr(article, "MA"):
-        assert article.MA == article.meeting_abstract
-    else:
-        with pytest.raises(AttributeError):
-            article.MA
-    if hasattr(article, "NR"):
-        assert article.NR == article.cited_reference_count
-    else:
-        with pytest.raises(AttributeError):
-            article.NR
-    if hasattr(article, "OI"):
-        assert article.OI == article.orcid_identifier
-    else:
-        with pytest.raises(AttributeError):
-            article.OI
-    if hasattr(article, "P2"):
-        assert article.P2 == article.chapter_count
-    else:
-        with pytest.raises(AttributeError):
-            article.P2
-    if hasattr(article, "PA"):
-        assert article.PA == article.publisher_address
-    else:
-        with pytest.raises(AttributeError):
-            article.PA
-    if hasattr(article, "PD"):
-        assert article.PD == article.publication_date
-    else:
-        with pytest.raises(AttributeError):
-            article.PD
-    if hasattr(article, "PG"):
-        assert article.PG == article.page_count
-    else:
-        with pytest.raises(AttributeError):
-            article.PG
-    if hasattr(article, "PI"):
-        assert article.PI == article.publisher_city
-    else:
-        with pytest.raises(AttributeError):
-            article.PI
-    if hasattr(article, "PM"):
-        assert article.PM == article.pubmed_id
-    else:
-        with pytest.raises(AttributeError):
-            article.PM
-    if hasattr(article, "PN"):
-        assert article.PN == article.part_number
-    else:
-        with pytest.raises(AttributeError):
-            article.PN
-    if hasattr(article, "PT"):
-        assert article.PT == article.publication_type
-    else:
-        with pytest.raises(AttributeError):
-            article.PT
-    if hasattr(article, "PU"):
-        assert article.PU == article.publisher
-    else:
-        with pytest.raises(AttributeError):
-            article.PU
-    if hasattr(article, "PY"):
-        assert article.PY == article.year_published
-    else:
-        with pytest.raises(AttributeError):
-            article.PY
-    if hasattr(article, "RI"):
-        assert article.RI == article.researcherid_number
-    else:
-        with pytest.raises(AttributeError):
-            article.RI
-    if hasattr(article, "RP"):
-        assert article.RP == article.reprint_address
-    else:
-        with pytest.raises(AttributeError):
-            article.RP
-    if hasattr(article, "SC"):
-        assert article.SC == article.research_areas
-    else:
-        with pytest.raises(AttributeError):
-            article.SC
-    if hasattr(article, "SE"):
-        assert article.SE == article.book_series_title
-    else:
-        with pytest.raises(AttributeError):
-            article.SE
-    if hasattr(article, "SI"):
-        assert article.SI == article.special_issue
-    else:
-        with pytest.raises(AttributeError):
-            article.SI
-    if hasattr(article, "SN"):
-        assert article.SN == article.issn
-    else:
-        with pytest.raises(AttributeError):
-            article.SN
-    if hasattr(article, "SP"):
-        assert article.SP == article.conference_sponsors
-    else:
-        with pytest.raises(AttributeError):
-            article.SP
-    if hasattr(article, "SU"):
-        assert article.SU == article.supplement
-    else:
-        with pytest.raises(AttributeError):
-            article.SU
-    if hasattr(article, "TC"):
-        assert article.TC == article.wos_times_cited_count
-    else:
-        with pytest.raises(AttributeError):
-            article.TC
-    if hasattr(article, "TC"):
-        assert article.TC == article.wos_times_cited
-    else:
-        with pytest.raises(AttributeError):
-            article.TC
-    if hasattr(article, "TI"):
-        assert article.TI == article.title
-    else:
-        with pytest.raises(AttributeError):
-            article.TI
-    if hasattr(article, "U1"):
-        assert article.U1 == article.usage_count
-    else:
-        with pytest.raises(AttributeError):
-            article.U1
-    if hasattr(article, "U2"):
-        assert article.U2 == article.usage_count
-    else:
-        with pytest.raises(AttributeError):
-            article.U2
-    if hasattr(article, "UT"):
-        assert article.UT == article.unique_article_identifier
-    else:
-        with pytest.raises(AttributeError):
-            article.UT
-    if hasattr(article, "VL"):
-        assert article.VL == article.volume
-    else:
-        with pytest.raises(AttributeError):
-            article.VL
-    if hasattr(article, "WC"):
-        assert article.WC == article.web_of_science_categories
-    else:
-        with pytest.raises(AttributeError):
-            article.WC
-    if hasattr(article, "Z9"):
-        assert article.Z9 == article.total_times_cited_count
-    else:
-        with pytest.raises(AttributeError):
-            article.Z9
-    if hasattr(article, "Z9"):
-        assert article.Z9 == article.times_cited
-    else:
-        with pytest.raises(AttributeError):
-            article.Z9
-
-
-def test_parsers(article):
-    assert article.PT == "J"
-    assert article.AU == ["Wodarz, S", "Hasegawa, T", "Ishio, S", "Homma, T"]
-    assert article.AF == [
-        "Wodarz, Siggi",
-        "Hasegawa, Takashi",
-        "Ishio, Shunji",
-        "Homma, Takayuki",
-    ]
-    assert (
-        article.TI
-        == "Structural control of ultra-fine CoPt nanodot arrays via electrodeposition process"
-    )
-    assert article.SO == "JOURNAL OF MAGNETISM AND MAGNETIC MATERIALS"
-    assert article.LA == "English"
-    assert article.DT == "Article"
-    assert article.DE == [
-        "Electrodeposition",
-        "Structural control",
-        "Nanodot array",
-        "Bit-patterned media",
-        "CoPt alloy",
-    ]
-    assert article.ID == [
-        "BIT-PATTERNED MEDIA",
-        "ELECTRON-BEAM LITHOGRAPHY",
-        "RECORDING MEDIA",
-        "MAGNETIC MEDIA",
-        "DENSITY",
-        "FILMS",
-        "ANISOTROPY",
-        "STORAGE",
-    ]
-    assert (
-        article.AB
-        == "CoPt nanodot arrays were fabricated by combining electrodeposition and electron beam lithography (EBL) for the use of bit-patterned media (BPM). To achieve precise control of deposition uniformity and coercivity of the CoPt nanodot arrays, their crystal structure and magnetic properties were controlled by controlling the diffusion state of metal ions from the initial deposition stage with the application of bath agitation. Following bath agitation, the composition gradient of the CoPt alloy with thickness was mitigated to have a near-ideal alloy composition of Co:Pt =80:20, which induces epitaxial-like growth from Ru substrate, thus resulting in the improvement of the crystal orientation of the hcp (002) structure from its initial deposition stages. Furthermore, the cross-sectional transmission electron microscope (TEM) analysis of the nanodots deposited with bath agitation showed CoPt growth along its c-axis oriented in the perpendicular direction, having uniform lattice fringes on the hcp (002) plane from the Ru underlayer interface, which is a significant factor to induce perpendicular magnetic anisotropy. Magnetic characterization of the CoPt nanodot arrays showed increase in the perpendicular coercivity and squareness of the hysteresis loops from 2.0 kOe and 0.64 (without agitation) to 4.0 kOe and 0.87 with bath agitation. Based on the detailed characterization of nanodot arrays, the precise crystal structure control of the nanodot arrays with ultra-high recording density by electrochemical process was successfully demonstrated."
-    )
-    assert article.C1 == [
-        "[Wodarz, Siggi; Homma, Takayuki] Waseda Univ, Dept Appl Chem, Shinjuku Ku, Tokyo 1698555, Japan.",
-        "[Hasegawa, Takashi; Ishio, Shunji] Akita Univ, Dept Mat Sci, Akita 0108502, Japan.",
-    ]
-    assert (
-        article.RP
-        == "Homma, T (reprint author), Waseda Univ, Dept Appl Chem, Shinjuku Ku, Tokyo 1698555, Japan."
-    )
-    assert article.EM == ["t.homma@waseda.jp"]
-    assert article.OI == ["Hasegawa, Takashi/0000-0002-8178-4980"]
-    assert article.FU == ["JSPS KAKENHI Grant [25249104]"]
-    assert (
-        article.FX
-        == "This work was supported in part by JSPS KAKENHI Grant Number 25249104."
-    )
-    assert article.CR == [
-        "Albrecht TR, 2013, IEEE T MAGN, V49, P773, DOI 10.1109/TMAG.2012.2227303",
-        "BUSCHOW KHJ, 1983, J MAGN MAGN MATER, V38, P1, DOI 10.1016/0304-8853(83)90097-5",
-        "Gapin AI, 2006, J APPL PHYS, V99, DOI 10.1063/1.2163289",
-        "Homma Takayuki, 2015, ECS Transactions, V64, P1, DOI 10.1149/06431.0001ecst",
-        "Kryder MH, 2008, P IEEE, V96, P1810, DOI 10.1109/JPROC.2008.2004315",
-        "Kubo T, 2005, J APPL PHYS, V97, DOI 10.1063/1.1855572",
-        "Lodder JC, 2004, J MAGN MAGN MATER, V272, P1692, DOI 10.1016/j.jmmm.2003.12.259",
-        "Mitsuzuka K, 2007, IEEE T MAGN, V43, P2160, DOI 10.1109/TMAG.2007.893129",
-        "Ouchi T, 2010, ELECTROCHIM ACTA, V55, P8081, DOI 10.1016/j.electacta.2010.02.073",
-        "Pattanaik G, 2006, J APPL PHYS, V99, DOI 10.1063/1.2150805",
-        "Pattanaik G, 2007, ELECTROCHIM ACTA, V52, P2755, DOI 10.1016/j.electacta.2006.07.062",
-        "Piramanayagam SN, 2009, J MAGN MAGN MATER, V321, P485, DOI 10.1016/j.jmmm.2008.05.007",
-        "Ross CA, 2008, MRS BULL, V33, P838, DOI 10.1557/mrs2008.179",
-        "Shiroishi Y, 2009, IEEE T MAGN, V45, P3816, DOI 10.1109/TMAG.2009.2024879",
-        "Sirtori V, 2011, ACS APPL MATER INTER, V3, P1800, DOI 10.1021/am200267u",
-        "Sohn JS, 2009, NANOTECHNOLOGY, V20, DOI 10.1088/0957-4484/20/2/025302",
-        "Sun SH, 2000, SCIENCE, V287, P1989, DOI 10.1126/science.287.5460.1989",
-        "Terris BD, 2007, MICROSYST TECHNOL, V13, P189, DOI 10.1007/s00542-006-0144-9",
-        "Wang JP, 2008, P IEEE, V96, P1847, DOI 10.1109/JPROC.2008.2004318",
-        "Weller D, 1999, IEEE T MAGN, V35, P4423, DOI 10.1109/20.809134",
-        "Weller D, 2000, IEEE T MAGN, V36, P10, DOI 10.1109/20.824418",
-        "Wodarz S, 2016, ELECTROCHIM ACTA, V197, P330, DOI 10.1016/j.electacta.2015.11.136",
-        "Xu X, 2012, J ELECTROCHEM SOC, V159, pD240, DOI 10.1149/2.090204jes",
-        "Yang X, 2007, J VAC SCI TECHNOL B, V25, P2202, DOI 10.1116/1.2798711",
-        "Yang XM, 2009, ACS NANO, V3, P1844, DOI 10.1021/nn900073r",
-        "Yasui N, 2003, APPL PHYS LETT, V83, P3347, DOI 10.1063/1.1622787",
-        "Yua H., 2009, J APPL PHYS, V105",
-        "Zhu JG, 2008, IEEE T MAGN, V44, P125, DOI 10.1109/TMAG.2007.911031",
-    ]
-    assert article.NR == 28
-    assert article.TC == 0
-    assert article.Z9 == 0
-    assert article.U1 == 21
-    assert article.U2 == 21
-    assert article.PU == "ELSEVIER SCIENCE BV"
-    assert article.PI == "AMSTERDAM"
-    assert article.PA == "PO BOX 211, 1000 AE AMSTERDAM, NETHERLANDS"
-    assert article.SN == "0304-8853"
-    assert article.EI == "1873-4766"
-    assert article.J9 == "J MAGN MAGN MATER"
-    assert article.JI == "J. Magn. Magn. Mater."
-    assert article.PD == "MAY 15"
-    assert article.PY == 2017
-    assert article.VL == "430"
-    assert article.BP == "52"
-    assert article.EP == "58"
-    assert article.DI == "10.1016/j.jmmm.2017.01.061"
-    assert article.PG == 7
-    assert article.WC == [
-        "Materials Science, Multidisciplinary",
-        "Physics, Condensed Matter",
-    ]
-    assert article.SC == ["Materials Science", "Physics"]
-    assert article.GA == "EP2GP"
-    assert article.UT == "WOS:000397201600008"
-
-
-def test_article_attributes(article):
-    assert set(article.keys()) == {
-        "PT",
-        "AU",
-        "AF",
-        "TI",
-        "SO",
-        "LA",
-        "DT",
-        "DE",
-        "ID",
-        "AB",
-        "C1",
-        "RP",
-        "EM",
-        "OI",
-        "FU",
-        "FX",
-        "CR",
-        "NR",
-        "TC",
-        "Z9",
-        "U1",
-        "U2",
-        "PU",
-        "PI",
-        "PA",
-        "SN",
-        "EI",
-        "J9",
-        "JI",
-        "PD",
-        "PY",
-        "VL",
-        "BP",
-        "EP",
-        "DI",
-        "PG",
-        "WC",
-        "SC",
-        "GA",
-        "UT",
-    }
-
-
-def test_article_raw_data(article):
-    raw_data = article.raw_data
-    assert "ER" not in raw_data
-    assert raw_data["PT"] == ["J"]
-    assert raw_data["AU"] == ["Wodarz, S", "Hasegawa, T", "Ishio, S", "Homma, T"]
-    assert raw_data["AF"] == [
-        "Wodarz, Siggi",
-        "Hasegawa, Takashi",
-        "Ishio, Shunji",
-        "Homma, Takayuki",
-    ]
-    assert raw_data["TI"] == [
-        "Structural control of ultra-fine CoPt nanodot arrays via",
-        "electrodeposition process",
-    ]
-    assert raw_data["SO"] == ["JOURNAL OF MAGNETISM AND MAGNETIC MATERIALS"]
-    assert raw_data["LA"] == ["English"]
-    assert raw_data["DT"] == ["Article"]
-    assert raw_data["DE"] == [
-        "Electrodeposition; Structural control; Nanodot array; Bit-patterned",
-        "media; CoPt alloy",
-    ]
-    assert raw_data["ID"] == [
-        "BIT-PATTERNED MEDIA; ELECTRON-BEAM LITHOGRAPHY; RECORDING MEDIA;",
-        "MAGNETIC MEDIA; DENSITY; FILMS; ANISOTROPY; STORAGE",
-    ]
-    assert raw_data["AB"] == [
-        "CoPt nanodot arrays were fabricated by combining electrodeposition and electron beam lithography (EBL) for the use of bit-patterned media (BPM). To achieve precise control of deposition uniformity and coercivity of the CoPt nanodot arrays, their crystal structure and magnetic properties were controlled by controlling the diffusion state of metal ions from the initial deposition stage with the application of bath agitation. Following bath agitation, the composition gradient of the CoPt alloy with thickness was mitigated to have a near-ideal alloy composition of Co:Pt =80:20, which induces epitaxial-like growth from Ru substrate, thus resulting in the improvement of the crystal orientation of the hcp (002) structure from its initial deposition stages. Furthermore, the cross-sectional transmission electron microscope (TEM) analysis of the nanodots deposited with bath agitation showed CoPt growth along its c-axis oriented in the perpendicular direction, having uniform lattice fringes on the hcp (002) plane from the Ru underlayer interface, which is a significant factor to induce perpendicular magnetic anisotropy. Magnetic characterization of the CoPt nanodot arrays showed increase in the perpendicular coercivity and squareness of the hysteresis loops from 2.0 kOe and 0.64 (without agitation) to 4.0 kOe and 0.87 with bath agitation. Based on the detailed characterization of nanodot arrays, the precise crystal structure control of the nanodot arrays with ultra-high recording density by electrochemical process was successfully demonstrated."
-    ]
-    assert raw_data["C1"] == [
-        "[Wodarz, Siggi; Homma, Takayuki] Waseda Univ, Dept Appl Chem, Shinjuku Ku, Tokyo 1698555, Japan.",
-        "[Hasegawa, Takashi; Ishio, Shunji] Akita Univ, Dept Mat Sci, Akita 0108502, Japan.",
-    ]
-    assert raw_data["RP"] == [
-        "Homma, T (reprint author), Waseda Univ, Dept Appl Chem, Shinjuku Ku, Tokyo 1698555, Japan."
-    ]
-    assert raw_data["EM"] == ["t.homma@waseda.jp"]
-    assert raw_data["OI"] == ["Hasegawa, Takashi/0000-0002-8178-4980"]
-    assert raw_data["FU"] == ["JSPS KAKENHI Grant [25249104]"]
-    assert raw_data["FX"] == [
-        "This work was supported in part by JSPS KAKENHI Grant Number 25249104."
-    ]
-    assert raw_data["CR"] == [
-        "Albrecht TR, 2013, IEEE T MAGN, V49, P773, DOI 10.1109/TMAG.2012.2227303",
-        "BUSCHOW KHJ, 1983, J MAGN MAGN MATER, V38, P1, DOI 10.1016/0304-8853(83)90097-5",
-        "Gapin AI, 2006, J APPL PHYS, V99, DOI 10.1063/1.2163289",
-        "Homma Takayuki, 2015, ECS Transactions, V64, P1, DOI 10.1149/06431.0001ecst",
-        "Kryder MH, 2008, P IEEE, V96, P1810, DOI 10.1109/JPROC.2008.2004315",
-        "Kubo T, 2005, J APPL PHYS, V97, DOI 10.1063/1.1855572",
-        "Lodder JC, 2004, J MAGN MAGN MATER, V272, P1692, DOI 10.1016/j.jmmm.2003.12.259",
-        "Mitsuzuka K, 2007, IEEE T MAGN, V43, P2160, DOI 10.1109/TMAG.2007.893129",
-        "Ouchi T, 2010, ELECTROCHIM ACTA, V55, P8081, DOI 10.1016/j.electacta.2010.02.073",
-        "Pattanaik G, 2006, J APPL PHYS, V99, DOI 10.1063/1.2150805",
-        "Pattanaik G, 2007, ELECTROCHIM ACTA, V52, P2755, DOI 10.1016/j.electacta.2006.07.062",
-        "Piramanayagam SN, 2009, J MAGN MAGN MATER, V321, P485, DOI 10.1016/j.jmmm.2008.05.007",
-        "Ross CA, 2008, MRS BULL, V33, P838, DOI 10.1557/mrs2008.179",
-        "Shiroishi Y, 2009, IEEE T MAGN, V45, P3816, DOI 10.1109/TMAG.2009.2024879",
-        "Sirtori V, 2011, ACS APPL MATER INTER, V3, P1800, DOI 10.1021/am200267u",
-        "Sohn JS, 2009, NANOTECHNOLOGY, V20, DOI 10.1088/0957-4484/20/2/025302",
-        "Sun SH, 2000, SCIENCE, V287, P1989, DOI 10.1126/science.287.5460.1989",
-        "Terris BD, 2007, MICROSYST TECHNOL, V13, P189, DOI 10.1007/s00542-006-0144-9",
-        "Wang JP, 2008, P IEEE, V96, P1847, DOI 10.1109/JPROC.2008.2004318",
-        "Weller D, 1999, IEEE T MAGN, V35, P4423, DOI 10.1109/20.809134",
-        "Weller D, 2000, IEEE T MAGN, V36, P10, DOI 10.1109/20.824418",
-        "Wodarz S, 2016, ELECTROCHIM ACTA, V197, P330, DOI 10.1016/j.electacta.2015.11.136",
-        "Xu X, 2012, J ELECTROCHEM SOC, V159, pD240, DOI 10.1149/2.090204jes",
-        "Yang X, 2007, J VAC SCI TECHNOL B, V25, P2202, DOI 10.1116/1.2798711",
-        "Yang XM, 2009, ACS NANO, V3, P1844, DOI 10.1021/nn900073r",
-        "Yasui N, 2003, APPL PHYS LETT, V83, P3347, DOI 10.1063/1.1622787",
-        "Yua H., 2009, J APPL PHYS, V105",
-        "Zhu JG, 2008, IEEE T MAGN, V44, P125, DOI 10.1109/TMAG.2007.911031",
-    ]
-    assert raw_data["NR"] == ["28"]
-    assert raw_data["TC"] == ["0"]
-    assert raw_data["Z9"] == ["0"]
-    assert raw_data["U1"] == ["21"]
-    assert raw_data["U2"] == ["21"]
-    assert raw_data["PU"] == ["ELSEVIER SCIENCE BV"]
-    assert raw_data["PI"] == ["AMSTERDAM"]
-    assert raw_data["PA"] == ["PO BOX 211, 1000 AE AMSTERDAM, NETHERLANDS"]
-    assert raw_data["SN"] == ["0304-8853"]
-    assert raw_data["EI"] == ["1873-4766"]
-    assert raw_data["J9"] == ["J MAGN MAGN MATER"]
-    assert raw_data["JI"] == ["J. Magn. Magn. Mater."]
-    assert raw_data["PD"] == ["MAY 15"]
-    assert raw_data["PY"] == ["2017"]
-    assert raw_data["VL"] == ["430"]
-    assert raw_data["BP"] == ["52"]
-    assert raw_data["EP"] == ["58"]
-    assert raw_data["DI"] == ["10.1016/j.jmmm.2017.01.061"]
-    assert raw_data["PG"] == ["7"]
-    assert raw_data["WC"] == [
-        "Materials Science, Multidisciplinary; Physics, Condensed Matter"
-    ]
-    assert raw_data["SC"] == ["Materials Science; Physics"]
-    assert raw_data["GA"] == ["EP2GP"]
-    assert raw_data["UT"] == ["WOS:000397201600008"]
-
-
-def test_article_data(article):
-    data = article.data
-    assert data.get("AB") == data.get("abstract")
-    assert data.get("AF") == data.get("author_full_names")
-    assert data.get("AR") == data.get("article_number")
-    assert data.get("AU") == data.get("authors")
-    assert data.get("BA") == data.get("book_authors")
-    assert data.get("BE") == data.get("editors")
-    assert data.get("BF") == data.get("book_authors_full_name")
-    assert data.get("BN") == data.get("international_standard_book_number")
-    assert data.get("BP") == data.get("beginning_page")
-    assert data.get("BS") == data.get("book_series_subtitle")
-    assert data.get("C1") == data.get("author_address")
-    assert data.get("CA") == data.get("group_authors")
-    assert data.get("CL") == data.get("conference_location")
-    assert data.get("CR") == data.get("cited_references")
-    assert data.get("CR") == data.get("references")
-    assert data.get("CR") == data.get("citations")
-    assert data.get("CT") == data.get("conference_title")
-    assert data.get("CY") == data.get("conference_date")
-    assert data.get("DE") == data.get("author_keywords")
-    assert data.get("DI") == data.get("digital_object_identifier")
-    assert data.get("DT") == data.get("document_type")
-    assert data.get("D2") == data.get("book_digital_object_identifier")
-    assert data.get("ED") == data.get("editors")
-    assert data.get("EM") == data.get("email_address")
-    assert data.get("EI") == data.get("eissn")
-    assert data.get("EP") == data.get("ending_page")
-    assert data.get("FU") == data.get("funding_agency_and_grant_number")
-    assert data.get("FX") == data.get("funding_text")
-    assert data.get("GA") == data.get("document_delivery_number")
-    assert data.get("GP") == data.get("book_group_authors")
-    assert data.get("HO") == data.get("conference_host")
-    assert data.get("ID") == data.get("keywords_plus")
-    assert data.get("ID") == data.get("keywords")
-    assert data.get("IS") == data.get("issue")
-    assert data.get("J9") == data.get("source_abbreviation")
-    assert data.get("JI") == data.get("iso_source_abbreviation")
-    assert data.get("LA") == data.get("language")
-    assert data.get("MA") == data.get("meeting_abstract")
-    assert data.get("NR") == data.get("cited_reference_count")
-    assert data.get("OI") == data.get("orcid_identifier")
-    assert data.get("P2") == data.get("chapter_count")
-    assert data.get("PA") == data.get("publisher_address")
-    assert data.get("PD") == data.get("publication_date")
-    assert data.get("PG") == data.get("page_count")
-    assert data.get("PI") == data.get("publisher_city")
-    assert data.get("PM") == data.get("pubmed_id")
-    assert data.get("PN") == data.get("part_number")
-    assert data.get("PT") == data.get("publication_type")
-    assert data.get("PU") == data.get("publisher")
-    assert data.get("PY") == data.get("year_published")
-    assert data.get("RI") == data.get("researcherid_number")
-    assert data.get("RP") == data.get("reprint_address")
-    assert data.get("SC") == data.get("research_areas")
-    assert data.get("SE") == data.get("book_series_title")
-    assert data.get("SI") == data.get("special_issue")
-    assert data.get("SN") == data.get("issn")
-    assert data.get("SP") == data.get("conference_sponsors")
-    assert data.get("SU") == data.get("supplement")
-    assert data.get("TC") == data.get("wos_times_cited_count")
-    assert data.get("TC") == data.get("wos_times_cited")
-    assert data.get("TI") == data.get("title")
-    assert data.get("U1") == data.get("usage_count")
-    assert data.get("U2") == data.get("usage_count")
-    assert data.get("UT") == data.get("unique_article_identifier")
-    assert data.get("VL") == data.get("volume")
-    assert data.get("WC") == data.get("web_of_science_categories")
-    assert data.get("Z9") == data.get("total_times_cited_count")
-    assert data.get("Z9") == data.get("times_cited")
-
-
-def test_article_properties(article):
-    assert isinstance(article.text, str)
-    assert isinstance(article.raw_data, dict)
-    assert isinstance(article.data, dict)
-
-
-def test_collection_from_filenames(collection_many_documents):
-    for article in collection_many_documents.articles:
-        assert isinstance(article, Article)
-
-    for file in collection_many_documents.files:
-        assert hasattr(file, "read")
-        assert isinstance(file, (io.StringIO, io.TextIOWrapper))
-        assert file.tell() == 0
-
-
-def test_collection_from_glob():
-    collection = CollectionLazy.from_glob("docs/examples/*.txt")
-    for article in collection.articles:
-        assert isinstance(article, Article)
-
-    assert len(list(collection.articles)) == 500
-
-    for file in collection.files:
-        assert hasattr(file, "read")
-        assert isinstance(file, (io.StringIO, io.TextIOWrapper))
-        assert file.tell() == 0
-
-
-def test_collection_from_streams(filename_single_document):
-    with open(filename_single_document) as file:
-        _ = file.read()
-
-        collection = CollectionLazy(file)
-        for article in collection.articles:
-            assert isinstance(article, Article)
-
-        for file in collection.files:
-            assert hasattr(file, "read")
-            assert isinstance(file, (io.StringIO, io.TextIOWrapper))
-            assert file.tell() == 0
-
-
-def test_collection_with_duplicated(filename_single_document, filename_many_documents):
-    collection = CollectionLazy.from_filenames(
-        filename_single_document, filename_single_document, filename_single_document
-    )
-    assert len(list(collection.files)) == 3
-    assert len(list(collection.articles)) == 1
-
-    collection = CollectionLazy.from_filenames(
-        filename_many_documents, filename_many_documents, filename_many_documents
-    )
-    assert len(list(collection.files)) == 3
-    assert len(list(collection.articles)) == 500
-
-
-def test_collection_authors(collection_single_document):
-    authors = collection_single_document.authors
-    assert next(authors) == "Wodarz, Siggi"
-    assert next(authors) == "Hasegawa, Takashi"
-    assert next(authors) == "Ishio, Shunji"
-    assert next(authors) == "Homma, Takayuki"
-
-
-def test_collection_coauthors(collection_single_document):
-    coauthors = collection_single_document.coauthors
-    assert next(coauthors) == ("Hasegawa, Takashi", "Homma, Takayuki")
-    assert next(coauthors) == ("Hasegawa, Takashi", "Ishio, Shunji")
-    assert next(coauthors) == ("Hasegawa, Takashi", "Wodarz, Siggi")
-    assert next(coauthors) == ("Homma, Takayuki", "Ishio, Shunji")
-    assert next(coauthors) == ("Homma, Takayuki", "Wodarz, Siggi")
-    assert next(coauthors) == ("Ishio, Shunji", "Wodarz, Siggi")
-
-
-def test_collection_completeness_single_article(collection_single_document):
-    assert collection_single_document.completeness() == {
-        "PT": 1,
-        "AU": 1,
-        "AF": 1,
-        "TI": 1,
-        "SO": 1,
-        "LA": 1,
-        "DT": 1,
-        "DE": 1,
-        "ID": 1,
-        "AB": 1,
-        "C1": 1,
-        "RP": 1,
-        "EM": 1,
-        "OI": 1,
-        "FU": 1,
-        "FX": 1,
-        "CR": 1,
-        "NR": 1,
-        "TC": 1,
-        "Z9": 1,
-        "U1": 1,
-        "U2": 1,
-        "PU": 1,
-        "PI": 1,
-        "PA": 1,
-        "SN": 1,
-        "EI": 1,
-        "J9": 1,
-        "JI": 1,
-        "PD": 1,
-        "PY": 1,
-        "VL": 1,
-        "BP": 1,
-        "EP": 1,
-        "DI": 1,
-        "PG": 1,
-        "WC": 1,
-        "SC": 1,
-        "GA": 1,
-        "UT": 1,
-    }
-
-
-def test_collection_completeness_many_articles(collection_many_documents):
-    assert collection_many_documents.completeness() == {
-        "AB": 497 / 500,
-        "AF": 500 / 500,
-        "AR": 216 / 500,
-        "AU": 500 / 500,
-        "BP": 281 / 500,
-        "C1": 500 / 500,
-        "CL": 152 / 500,
-        "CR": 500 / 500,
-        "CT": 152 / 500,
-        "CY": 152 / 500,
-        "DE": 336 / 500,
-        "DI": 486 / 500,
-        "DT": 500 / 500,
-        "EI": 262 / 500,
-        "EM": 469 / 500,
-        "EP": 281 / 500,
-        "FU": 270 / 500,
-        "FX": 270 / 500,
-        "GA": 500 / 500,
-        "HO": 24 / 500,
-        "ID": 440 / 500,
-        "IS": 458 / 500,
-        "J9": 500 / 500,
-        "JI": 500 / 500,
-        "LA": 500 / 500,
-        "NR": 500 / 500,
-        "OI": 168 / 500,
-        "PA": 500 / 500,
-        "PD": 469 / 500,
-        "PG": 500 / 500,
-        "PI": 500 / 500,
-        "PM": 60 / 500,
-        "PN": 60 / 500,
-        "PT": 500 / 500,
-        "PU": 500 / 500,
-        "PY": 500 / 500,
-        "RI": 172 / 500,
-        "RP": 498 / 500,
-        "SC": 500 / 500,
-        "SI": 23 / 500,
-        "SN": 500 / 500,
-        "SO": 500 / 500,
-        "SP": 88 / 500,
-        "SU": 2 / 500,
-        "TC": 500 / 500,
-        "TI": 500 / 500,
-        "U1": 500 / 500,
-        "U2": 500 / 500,
-        "UT": 500 / 500,
-        "VL": 495 / 500,
-        "WC": 500 / 500,
-        "Z9": 500 / 500,
-    }
-
-
-def test_collection_citation_pairs(collection_single_document):
-    pairs = [
-        (
-            "Wodarz S, 2017, J MAGN MAGN MATER, V430, P52, DOI 10.1016/j.jmmm.2017.01.061",
-            "Albrecht TR, 2013, IEEE T MAGN, V49, P773, DOI 10.1109/TMAG.2012.2227303",
-        ),
-        (
-            "Wodarz S, 2017, J MAGN MAGN MATER, V430, P52, DOI 10.1016/j.jmmm.2017.01.061",
-            "BUSCHOW KHJ, 1983, J MAGN MAGN MATER, V38, P1, DOI 10.1016/0304-8853(83)90097-5",
-        ),
-        (
-            "Wodarz S, 2017, J MAGN MAGN MATER, V430, P52, DOI 10.1016/j.jmmm.2017.01.061",
-            "Gapin AI, 2006, J APPL PHYS, V99, DOI 10.1063/1.2163289",
-        ),
-        (
-            "Wodarz S, 2017, J MAGN MAGN MATER, V430, P52, DOI 10.1016/j.jmmm.2017.01.061",
-            "Homma Takayuki, 2015, ECS Transactions, V64, P1, DOI 10.1149/06431.0001ecst",
-        ),
-        (
-            "Wodarz S, 2017, J MAGN MAGN MATER, V430, P52, DOI 10.1016/j.jmmm.2017.01.061",
-            "Kryder MH, 2008, P IEEE, V96, P1810, DOI 10.1109/JPROC.2008.2004315",
-        ),
-        (
-            "Wodarz S, 2017, J MAGN MAGN MATER, V430, P52, DOI 10.1016/j.jmmm.2017.01.061",
-            "Kubo T, 2005, J APPL PHYS, V97, DOI 10.1063/1.1855572",
-        ),
-        (
-            "Wodarz S, 2017, J MAGN MAGN MATER, V430, P52, DOI 10.1016/j.jmmm.2017.01.061",
-            "Lodder JC, 2004, J MAGN MAGN MATER, V272, P1692, DOI 10.1016/j.jmmm.2003.12.259",
-        ),
-        (
-            "Wodarz S, 2017, J MAGN MAGN MATER, V430, P52, DOI 10.1016/j.jmmm.2017.01.061",
-            "Mitsuzuka K, 2007, IEEE T MAGN, V43, P2160, DOI 10.1109/TMAG.2007.893129",
-        ),
-        (
-            "Wodarz S, 2017, J MAGN MAGN MATER, V430, P52, DOI 10.1016/j.jmmm.2017.01.061",
-            "Ouchi T, 2010, ELECTROCHIM ACTA, V55, P8081, DOI 10.1016/j.electacta.2010.02.073",
-        ),
-        (
-            "Wodarz S, 2017, J MAGN MAGN MATER, V430, P52, DOI 10.1016/j.jmmm.2017.01.061",
-            "Pattanaik G, 2006, J APPL PHYS, V99, DOI 10.1063/1.2150805",
-        ),
-        (
-            "Wodarz S, 2017, J MAGN MAGN MATER, V430, P52, DOI 10.1016/j.jmmm.2017.01.061",
-            "Pattanaik G, 2007, ELECTROCHIM ACTA, V52, P2755, DOI 10.1016/j.electacta.2006.07.062",
-        ),
-        (
-            "Wodarz S, 2017, J MAGN MAGN MATER, V430, P52, DOI 10.1016/j.jmmm.2017.01.061",
-            "Piramanayagam SN, 2009, J MAGN MAGN MATER, V321, P485, DOI 10.1016/j.jmmm.2008.05.007",
-        ),
-        (
-            "Wodarz S, 2017, J MAGN MAGN MATER, V430, P52, DOI 10.1016/j.jmmm.2017.01.061",
-            "Ross CA, 2008, MRS BULL, V33, P838, DOI 10.1557/mrs2008.179",
-        ),
-        (
-            "Wodarz S, 2017, J MAGN MAGN MATER, V430, P52, DOI 10.1016/j.jmmm.2017.01.061",
-            "Shiroishi Y, 2009, IEEE T MAGN, V45, P3816, DOI 10.1109/TMAG.2009.2024879",
-        ),
-        (
-            "Wodarz S, 2017, J MAGN MAGN MATER, V430, P52, DOI 10.1016/j.jmmm.2017.01.061",
-            "Sirtori V, 2011, ACS APPL MATER INTER, V3, P1800, DOI 10.1021/am200267u",
-        ),
-        (
-            "Wodarz S, 2017, J MAGN MAGN MATER, V430, P52, DOI 10.1016/j.jmmm.2017.01.061",
-            "Sohn JS, 2009, NANOTECHNOLOGY, V20, DOI 10.1088/0957-4484/20/2/025302",
-        ),
-        (
-            "Wodarz S, 2017, J MAGN MAGN MATER, V430, P52, DOI 10.1016/j.jmmm.2017.01.061",
-            "Sun SH, 2000, SCIENCE, V287, P1989, DOI 10.1126/science.287.5460.1989",
-        ),
-        (
-            "Wodarz S, 2017, J MAGN MAGN MATER, V430, P52, DOI 10.1016/j.jmmm.2017.01.061",
-            "Terris BD, 2007, MICROSYST TECHNOL, V13, P189, DOI 10.1007/s00542-006-0144-9",
-        ),
-        (
-            "Wodarz S, 2017, J MAGN MAGN MATER, V430, P52, DOI 10.1016/j.jmmm.2017.01.061",
-            "Wang JP, 2008, P IEEE, V96, P1847, DOI 10.1109/JPROC.2008.2004318",
-        ),
-        (
-            "Wodarz S, 2017, J MAGN MAGN MATER, V430, P52, DOI 10.1016/j.jmmm.2017.01.061",
-            "Weller D, 1999, IEEE T MAGN, V35, P4423, DOI 10.1109/20.809134",
-        ),
-        (
-            "Wodarz S, 2017, J MAGN MAGN MATER, V430, P52, DOI 10.1016/j.jmmm.2017.01.061",
-            "Weller D, 2000, IEEE T MAGN, V36, P10, DOI 10.1109/20.824418",
-        ),
-        (
-            "Wodarz S, 2017, J MAGN MAGN MATER, V430, P52, DOI 10.1016/j.jmmm.2017.01.061",
-            "Wodarz S, 2016, ELECTROCHIM ACTA, V197, P330, DOI 10.1016/j.electacta.2015.11.136",
-        ),
-        (
-            "Wodarz S, 2017, J MAGN MAGN MATER, V430, P52, DOI 10.1016/j.jmmm.2017.01.061",
-            "Xu X, 2012, J ELECTROCHEM SOC, V159, pD240, DOI 10.1149/2.090204jes",
-        ),
-        (
-            "Wodarz S, 2017, J MAGN MAGN MATER, V430, P52, DOI 10.1016/j.jmmm.2017.01.061",
-            "Yang X, 2007, J VAC SCI TECHNOL B, V25, P2202, DOI 10.1116/1.2798711",
-        ),
-        (
-            "Wodarz S, 2017, J MAGN MAGN MATER, V430, P52, DOI 10.1016/j.jmmm.2017.01.061",
-            "Yang XM, 2009, ACS NANO, V3, P1844, DOI 10.1021/nn900073r",
-        ),
-        (
-            "Wodarz S, 2017, J MAGN MAGN MATER, V430, P52, DOI 10.1016/j.jmmm.2017.01.061",
-            "Yasui N, 2003, APPL PHYS LETT, V83, P3347, DOI 10.1063/1.1622787",
-        ),
-        (
-            "Wodarz S, 2017, J MAGN MAGN MATER, V430, P52, DOI 10.1016/j.jmmm.2017.01.061",
-            "Yua H., 2009, J APPL PHYS, V105",
-        ),
-        (
-            "Wodarz S, 2017, J MAGN MAGN MATER, V430, P52, DOI 10.1016/j.jmmm.2017.01.061",
-            "Zhu JG, 2008, IEEE T MAGN, V44, P125, DOI 10.1109/TMAG.2007.911031",
-        ),
-    ]
-
-    assert list(collection_single_document.citation_pairs()) == pairs
-
-
-def test_command_line_interface():
-    """Test the CLI."""
-    runner = CliRunner()
-    result = runner.invoke(cli.main)
-    assert result.exit_code == 0
-    assert "A little cli for wos tools" in result.output
-    help_result = runner.invoke(cli.main, ["--help"])
-    assert help_result.exit_code == 0
-    assert "--help  Show this message and exit." in help_result.output
-
-
-def test_command_line_interface_citation_pairs(filename_single_document):
-    runner = CliRunner()
-    result = runner.invoke(cli.citation_pairs)
-    assert result.exit_code == 0
-    assert "You should give at least a file with documents." in result.output
-
-    result = runner.invoke(cli.citation_pairs, filename_single_document)
-    assert (
-        "Wodarz S, 2017, J MAGN MAGN MATER, V430, P52, DOI 10.1016/j.jmmm.2017.01.061"
-        in result.output
-    )
diff --git a/tox.ini b/tox.ini
deleted file mode 100644
index bc28a7f..0000000
--- a/tox.ini
+++ /dev/null
@@ -1,24 +0,0 @@
-[tox]
-envlist = py36, py37, flake8
-
-[travis]
-python =
-    3.6: py36
-    3.7: py37
-
-[testenv:flake8]
-basepython = python
-deps = flake8
-commands = flake8 --ignore=E501 wostools
-
-[testenv]
-setenv =
-    PYTHONPATH = {toxinidir}
-deps =
-    -r{toxinidir}/requirements_dev.txt
-; If you want to make tox run the tests with the same versions, create a
-; requirements.txt with the pinned versions and uncomment the following line:
-;     -r{toxinidir}/requirements.txt
-commands =
-    pip install -U pip
-    py.test --basetemp={envtmpdir}
diff --git a/wostools/__init__.py b/wostools/__init__.py
index e73dcc4..5885702 100644
--- a/wostools/__init__.py
+++ b/wostools/__init__.py
@@ -2,8 +2,10 @@
 
 __author__ = """Core of Science"""
 __email__ = "dev@coreofscience.com"
-__version__ = "1.1.0"
+__version__ = "2.0.0"
 
-from wostools.wostools import CollectionLazy, WosToolsError, Article
+from wostools.article import Article
+from wostools.lazy import LazyCollection
+from wostools.cached import CachedCollection
 
-__all__ = ["CollectionLazy", "WosToolsError", "Article"]
+__all__ = ["CachedCollection", "LazyCollection", "Article"]
diff --git a/wostools/_testutils.py b/wostools/_testutils.py
new file mode 100644
index 0000000..0e4dc9b
--- /dev/null
+++ b/wostools/_testutils.py
@@ -0,0 +1,44 @@
+from contextlib import contextmanager
+from dataclasses import dataclass
+from typing import Generic, Iterator, List, Optional, TypeVar
+
+T = TypeVar("T")
+
+
+@dataclass
+class Context(Generic[T]):
+    history: Optional[List[T]] = None
+    error: Optional[Exception] = None
+    data: Optional[T] = None
+
+    def push(self, data: Optional[T], error: Optional[Exception] = None):
+        if self.history is None:
+            self.history = []
+        if self.data:
+            self.history.append(self.data)
+        self.data = data
+        self.error = error
+
+    @contextmanager
+    def capture(self):
+        try:
+            yield
+        except Exception as e:
+            self.push(None, error=e)
+
+    @contextmanager
+    def assert_data(self, name=None) -> Iterator[T]:
+        if name is None:
+            name = "data"
+        assert self.data, f"No {name} computed yet"
+        yield self.data
+
+    @contextmanager
+    def assert_error(self) -> Iterator[Exception]:
+        assert self.error, f"Expected an error and found none"
+        yield self.error
+
+    @contextmanager
+    def assert_history(self, count):
+        assert len(self.history) >= count
+        yield self.history[-count:]
diff --git a/wostools/article.py b/wostools/article.py
new file mode 100644
index 0000000..e66c11f
--- /dev/null
+++ b/wostools/article.py
@@ -0,0 +1,156 @@
+import collections
+import logging
+import re
+from typing import Any, List, Mapping, Optional, Set
+
+from wostools.exceptions import InvalidIsiLine, InvalidReference, MissingLabelFields
+from wostools.fields import parse_all
+
+logger = logging.getLogger(__name__)
+
+# The null part accounts for an ISI wok bug
+ISI_LINE_PATTERN = re.compile(r"^(null)?((?P<field>[A-Z0-9]{2})|  )( (?P<value>.*))?$")
+
+ISI_CITATION_PATTERN = re.compile(
+    r"""^(?P<AU>[^,]+)?,[ ]         # First author
+        (?P<PY>\d{4})?,[ ]          # Publication year
+        (?P<J9>[^,]+)?              # Journal
+        (,[ ]V(?P<VL>[\w\d-]+))?    # Volume
+        (,[ ][Pp](?P<BP>\d+))?      # Start page
+        (,[ ]DOI[ ](?P<DI>.+))?     # The all important DOI
+        """,
+    re.X,
+)
+
+
+class Article(object):
+    def __init__(
+        self,
+        title: Optional[str],
+        authors: List[str],
+        year: Optional[int],
+        journal: Optional[str],
+        volume: Optional[str] = None,
+        page: Optional[str] = None,
+        doi: Optional[str] = None,
+        references: Optional[List[str]] = None,
+        keywords: Optional[List[str]] = None,
+        sources: Optional[Set[str]] = None,
+        extra: Optional[Mapping] = None,
+    ):
+        self.title: Optional[str] = title
+        self.authors: List[str] = authors
+        self.keywords: List[str] = keywords or []
+        self.year: Optional[int] = year
+        self.journal: Optional[str] = journal
+        self.volume: Optional[str] = volume
+        self.page: Optional[str] = page
+        self.doi: Optional[str] = doi
+        self.references: List[str] = references or []
+        self.sources: Set[str] = sources or set()
+        self.extra: Mapping[str, Any] = extra or {}
+
+    @property
+    def label(self):
+        if not (self.authors and self.year and self.journal):
+            raise MissingLabelFields(self)
+        pieces = {
+            "AU": self.authors[0].replace(",", ""),
+            "PY": str(self.year),
+            "J9": str(self.journal),
+            "VL": f"V{self.volume}" if self.volume else None,
+            "BP": f"P{self.page}" if self.page else None,
+            "DI": f"DOI {self.doi}" if self.doi else None,
+        }
+        return ", ".join(value for value in pieces.values() if value)
+
+    def to_dict(self, simplified=True):
+        """
+        Transform the article into some key value pairs for easy transportation.
+        """
+        extra = (
+            {
+                "references": self.references,
+                "extra": self.extra,
+                "sources": list(self.sources),
+            }
+            if not simplified
+            else {}
+        )
+        return {
+            "title": self.title,
+            "authors": self.authors,
+            "keywords": self.keywords,
+            "year": self.year,
+            "journal": self.journal,
+            "volume": self.volume,
+            "page": self.page,
+            "doi": self.doi,
+            **extra,
+        }
+
+    def merge(self, other: "Article") -> "Article":
+        if self.label != other.label:
+            logger.warning(
+                "Mixing articles with different labels might result in tragedy"
+            )
+        return Article(
+            title=self.title or other.title,
+            authors=list(set(self.authors).union(set(other.authors))),
+            year=self.year or other.year,
+            journal=self.journal or other.journal,
+            volume=self.volume or other.volume,
+            page=self.page or other.page,
+            doi=self.doi or other.doi,
+            sources={*self.sources, *other.sources},
+            extra={**self.extra, **other.extra},
+            references=list({*self.references, *other.references}),
+            keywords=list({*self.keywords, *other.keywords}),
+        )
+
+    @classmethod
+    def from_isi_text(cls, raw: str) -> "Article":
+        data = collections.defaultdict(list)
+        field = None
+        for line in raw.split("\n"):
+            match = ISI_LINE_PATTERN.match(line)
+            if not match:
+                raise InvalidIsiLine(line)
+            parsed = match.groupdict()
+            field = parsed.get("field") or field
+            if not field or "value" not in parsed or parsed["value"] is None:
+                continue
+            data[field].append(parsed["value"])
+        processed = parse_all(dict(data))
+        return cls(
+            title=processed.get("title"),
+            authors=processed.get("authors", []),
+            year=processed.get("year"),
+            journal=processed.get("source_abbreviation"),
+            volume=processed.get("volume"),
+            page=processed.get("beginning_page"),
+            doi=processed.get("DOI"),
+            references=processed.get("references"),
+            keywords=processed.get("keywords"),
+            extra=processed,
+            sources={raw},
+        )
+
+    @classmethod
+    def from_isi_citation(cls, reference: str) -> "Article":
+        match = ISI_CITATION_PATTERN.match(reference)
+        if not match:
+            raise InvalidReference(reference)
+        data = {key: [value] for key, value in match.groupdict().items() if value}
+        processed = parse_all(data)
+        return cls(
+            title=processed.get("title"),
+            authors=processed.get("authors", []),
+            year=processed.get("year"),
+            journal=processed.get("source_abbreviation"),
+            volume=processed.get("volume"),
+            page=processed.get("beginning_page"),
+            doi=processed.get("DOI"),
+            extra=processed,
+            sources={reference},
+        )
diff --git a/wostools/base.py b/wostools/base.py
new file mode 100644
index 0000000..ea48f56
--- /dev/null
+++ b/wostools/base.py
@@ -0,0 +1,125 @@
+"""
+Base collection for a shared API.
+"""
+
+import glob
+import logging
+from typing import Iterable, Iterator, Tuple
+
+from wostools.article import Article
+from wostools.exceptions import InvalidReference
+
+logger = logging.getLogger(__name__)
+
+
+class BaseCollection(object):
+    """
+    A collection of WOS text files.
+    """
+
+    def __init__(self, *files):
+        self._files = files
+        for file in self._files:
+            file.seek(0)
+
+    @classmethod
+    def from_glob(cls, pattern):
+        """Creates a new collection from a pattern using glob.
+
+        Args:
+            pattern (str): String with the pattern to be passed to glob.
+
+        Returns:
+            CollectionLazy: Collection with the articles by using the pattern.
+        """
+        return cls.from_filenames(*glob.glob(pattern))
+
+    @classmethod
+    def from_filenames(cls, *filenames):
+        """Creates a new collection from a list of filenames.
+
+        Args:
+            filenames (str): String with the filename.
+
+        Returns:
+            CollectionLazy: Collection with the articles by reading the
+                filenames.
+        """
+        files = [open(filename, encoding="utf-8-sig") for filename in filenames]
+        return cls(*files)
+
+    @property
+    def _article_texts(self) -> Iterable[str]:
+        """Iterates over all the single article texts in the colection.
+
+        Returns:
+            generator: A generator of strings with the text articles.
+        """
+        for filehandle in self._files:
+            filehandle.seek(0)
+            data = filehandle.read()
+            filehandle.seek(0)
+            for article_text in data.split("\n\n"):
+                if article_text != "EF":
+                    yield article_text
+
+    def _articles(self) -> Iterable[Article]:
+        """
+        Should iterate over all the articles in the ISI file, excluding references.
+        """
+        raise NotImplementedError(
+            "Sub classes should know how to iterate over articles"
+        )
+
+    def __iter__(self) -> Iterator[Article]:
+        """
+        Should iterate over all articles known in the collection.
+        """
+        for article in self._articles():
+            yield article
+            for reference in article.references:
+                try:
+                    yield Article.from_isi_citation(reference)
+                except InvalidReference:
+                    logger.info(
+                        f"Ignoring malformed reference '{reference}' from '{article.label}'"
+                    )
+
+    def __len__(self):
+        return sum(1 for _ in self)
+
+    @property
+    def authors(self) -> Iterable[str]:
+        """Iterates over all article authors, including duplicates
+
+        Returns:
+            generator: A generator with the authors (one by one) of the
+                articles in the collection.
+        """
+        raise NotImplementedError("Sub classes should know how to iterate over authors")
+
+    @property
+    def coauthors(self) -> Iterable[Tuple[str, str]]:
+        """Iterates over coauthor pairs.
+
+        Returns:
+            generator: A generator with the pair of coauthors of the articles
+            in the collections.
+        """
+        raise NotImplementedError(
+            "Sub classes should know how to iterate over coauthors"
+        )
+
+    @property
+    def citation_pairs(self) -> Iterable[Tuple[Article, Article]]:
+        """
+        Computes the citation pairs for the articles in the collection.
+
+        Returns:
+            genertator: A generator with the citation links: pairs of article
+            labesl, where the firts element is the article which cites the
+            second element.
+        """
+        raise NotImplementedError(
+            "Sub classes should know how to iterate over citation pairs"
+        )
diff --git a/wostools/cached.py b/wostools/cached.py
new file mode 100644
index 0000000..ac72670
--- /dev/null
+++ b/wostools/cached.py
@@ -0,0 +1,98 @@
+"""
+Collection with a nice cache.
+"""
+
+import itertools
+import logging
+from typing import Dict, Iterable, Iterator, Tuple
+
+from wostools.article import Article
+from wostools.base import BaseCollection
+from wostools.exceptions import InvalidReference
+
+logger = logging.getLogger(__name__)
+
+
+class CachedCollection(BaseCollection):
+    """
+    A collection of WOS text files.
+    """
+
+    def __init__(self, *files):
+        super().__init__(*files)
+        self._cache_key = None
+        self._cache: Dict[str, Article] = {}
+        self._preheat()
+
+    def _articles(self) -> Iterable[Article]:
+        for article_text in self._article_texts:
+            yield Article.from_isi_text(article_text)
+
+    def _add_article(self, article):
+        label = article.label
+        if label in self._cache:
+            article = article.merge(self._cache[label])
+        self._cache[label] = article
+
+    def _preheat(self):
+        # Preheat our cache
+        key = ":".join(str(id(file) for file in self._files))
+        if key == self._cache_key:
+            return
+        for article in self._articles():
+            self._add_article(article)
+            for reference in article.references:
+                try:
+                    self._add_article(Article.from_isi_citation(reference))
+                except InvalidReference:
+                    logger.info(
+                        f"Ignoring malformed reference '{reference}' from '{article.label}'"
+                    )
+        self._cache_key = key
+
+    def __iter__(self) -> Iterator[Article]:
+        """Iterates over all articles.
+
+        Returns:
+            generator: A generator of Articles according to the text articles.
+        """
+        self._preheat()
+        yield from self._cache.values()
+
+    @property
+    def authors(self) -> Iterable[str]:
+        """Iterates over all article authors, including duplicates
+
+        Returns:
+            generator: A generator with the authors (one by one) of the
+                articles in the collection.
+        """
+        for article in self:
+            yield from article.authors
+
+    @property
+    def coauthors(self) -> Iterable[Tuple[str, str]]:
+        """Iterates over coauthor pairs.
+
+        Returns:
+            generator: A generator with the pair of coauthors of the articles
+                in the collections.
+        """
+        for article in self._articles():
+            yield from (
+                (source, target)
+                for source, target in itertools.combinations(sorted(article.authors), 2)
+            )
+
+    def citation_pairs(self) -> Iterable[Tuple[Article, Article]]:
+        """Computes the citation pairs for the articles in the collection.
+
+        Returns:
+            genertator: A generator with the citation links: pairs of article
+            labesl, where the firts element is the article which cites the
+            second element.
+        """
+        for article in self._cache.values():
+            for reference in article.references:
+                if reference in self._cache:
+                    yield (article, self._cache[reference])
diff --git a/wostools/cli.py b/wostools/cli.py
index e7de372..2209f21 100644
--- a/wostools/cli.py
+++ b/wostools/cli.py
@@ -1,9 +1,9 @@
 import json
+import logging
 
 import click
 
-from wostools import CollectionLazy
-from wostools.fields import field_aliases, field_keys
+from wostools import CachedCollection
 
 
 @click.group()
@@ -11,6 +11,8 @@ def main():
     """
     A little cli for wos tools.
     """
+    logger = logging.getLogger("wostools")
+    logger.setLevel(logging.ERROR)
 
 
 @main.command("citation-pairs")
@@ -31,13 +33,15 @@ def citation_pairs(sources, output):
         click.secho("You should give at least a file with documents.", fg="red")
         return
 
-    collection = CollectionLazy.from_filenames(*[f.name for f in sources])
-    pairs = list(collection.citation_pairs())
+    collection = CachedCollection.from_filenames(*[f.name for f in sources])
+    pairs = [
+        (source.label, target.label) for source, target in collection.citation_pairs()
+    ]
 
     json.dump(pairs, output, indent=2)
 
 
-@main.command("to-json")
+@main.command("to-dict")
 @click.argument("sources", type=click.File("r"), nargs=-1)
 @click.option(
     "--output",
@@ -47,37 +51,25 @@ def citation_pairs(sources, output):
     help="File to save json otuput.",
 )
 @click.option(
-    "--raw",
-    default=False,
+    "-m",
+    "--more",
     is_flag=True,
     show_default=True,
-    help="Flag; If true, the fields are the field tags; If false, the fields are the aliases.",
+    default=False,
+    help="Add extra info to the output",
 )
-def to_json(sources, output, raw):
+def to_dict(sources, output, more):
     """
-    Build a collection by using the sources and print the entries converted to
-    to json format or dumps them in the `output`.
+    Build a collection by using the sources and print the citation pairs in json
+    format or dumps them in the `output`.
     """
     if not len(sources) > 0:
         click.secho("You should give at least a file with documents.", fg="red")
         return
 
-    collection = CollectionLazy.from_filenames(*[f.name for f in sources])
-    length = len(collection)
-    output.write("[\n")
-    for i, article in enumerate(collection.articles):
-        fields = field_keys() if raw else field_aliases()
-
-        text = json.dumps(
-            {field: article.data[field] for field in fields if field in article},
-            indent=2,
-        )
-        text = "  " + "\n  ".join(text.split("\n"))
-
-        output.write(text)
-
-        if i + 1 < length:
-            output.write(",\n")
-        else:
-            output.write("\n")
-    output.write("]")
+    collection = CachedCollection.from_filenames(*[f.name for f in sources])
+    json.dump(
+        [article.to_dict(simplified=not more) for article in collection],
+        output,
+        indent=2,
+    )
diff --git a/wostools/exceptions.py b/wostools/exceptions.py
new file mode 100644
index 0000000..cf059e3
--- /dev/null
+++ b/wostools/exceptions.py
@@ -0,0 +1,32 @@
+class WosToolsError(Exception):
+    """
+    Any exception known by wostools.
+    """
+
+
+class InvalidReference(WosToolsError, ValueError):
+    """
+    Raised when we try to create an article out of an invalid reference.
+    """
+
+    def __init__(self, reference: str):
+        super().__init__(f"{reference} does not look like an ISI citation")
+
+
+class InvalidIsiLine(WosToolsError, ValueError):
+    """
+    Raised when we encounter an invalid line when processing an ISI file.
+    """
+
+    def __init__(self, line: str):
+        super().__init__(f"'{line}' is not a valid ISI file line")
+
+
+class MissingLabelFields(WosToolsError, ValueError):
+    """
+    Raised when we don't have any of the required fields for an ISI reference.
+    """
+
+    def __init__(self, article, message: str = None):
+        self.article = article
+        super().__init__(message or "Missing required fields for label")
diff --git a/wostools/fields.py b/wostools/fields.py
index 0e7c21c..75411b9 100644
--- a/wostools/fields.py
+++ b/wostools/fields.py
@@ -4,7 +4,7 @@
 
 import collections
 import functools
-
+from typing import Any, Dict, List, Mapping
 
 IsiField = collections.namedtuple(
     "IsiField", ["key", "description", "parse", "aliases"]
@@ -16,15 +16,23 @@ def joined(seq, sep=" "):
 
 
 def ident(seq):
-    return list(s.strip() for s in seq)
+    return [s.strip() for s in seq]
 
 
 def delimited(seq, delimiter="; "):
-    return joined(seq).split(delimiter)
+    return [
+        word.replace(delimiter.strip(), "")
+        for words in seq
+        for word in words.split(delimiter)
+        if word
+    ]
 
 
 def integer(seq):
-    return int(joined(seq).strip())
+    if len(seq) > 1:
+        raise ValueError(f"Expected no more than one item and got {seq}")
+    (first,) = seq
+    return int(first.strip())
 
 
 FIELDS = {
@@ -58,7 +66,10 @@ def integer(seq):
     "CY": IsiField("CY", "Conference Date", joined, ["conference_date"]),
     "DE": IsiField("DE", "Author Keywords", delimited, ["author_keywords"]),
     "DI": IsiField(
-        "DI", "Digital Object Identifier (DOI)", joined, ["digital_object_identifier"]
+        "DI",
+        "Digital Object Identifier (DOI)",
+        joined,
+        ["digital_object_identifier", "DOI"],
     ),
     "DT": IsiField("DT", "Document Type", joined, ["document_type"]),
     "D2": IsiField(
@@ -126,7 +137,9 @@ def integer(seq):
         ["publication_type"],
     ),
     "PU": IsiField("PU", "Publisher", joined, ["publisher"]),
-    "PY": IsiField("PY", "Year Published", integer, ["year_published"]),
+    "PY": IsiField(
+        "PY", "Year Published", integer, ["year_published", "year", "publication_year"]
+    ),
     "RI": IsiField("RI", "ResearcherID Number", delimited, ["researcherid_number"]),
     "RP": IsiField("RP", "Reprint Address", joined, ["reprint_address"]),
     "SC": IsiField("SC", "Research Areas", delimited, ["research_areas"]),
@@ -168,17 +181,21 @@ def integer(seq):
 }
 
 
-def field_aliases():
-    for fields in FIELDS.values():
-        yield fields.aliases[-1]
-
-
-def field_keys():
-    for fields in FIELDS.values():
-        yield fields.key
+def parse(key: str, value: List) -> Dict:
+    if key in {"FN", "VR"}:
+        # This disregards headers
+        return {}
+    if key not in FIELDS:
+        raise ValueError(f"{key} is not a known ISI field.")
+    try:
+        field = FIELDS[key]
+        parsed = field.parse(value)
+        return {k: parsed for k in [key, *field.aliases]}
+    except ValueError as e:
+        raise ValueError(f"Field {key}: {e}")
 
 
-def preprocess(raw_dict):
+def parse_all(raw_dict: Dict[str, List[str]]) -> Mapping[str, Any]:
     """Preprocesses a dictionary, with information about WoS field tags and its
         value according to a article, with some parser functions that depends on
         the field tag. If there is no a CR field, it adds one to the output with
@@ -200,12 +217,5 @@ def preprocess(raw_dict):
     processed_data = {}
     raw_dict.setdefault("CR", [])
     for key, seq in raw_dict.items():
-        if key in FIELDS:
-            field = FIELDS[key]
-            parsed = field.parse(seq)
-            processed_data[key] = parsed
-            for alias in field.aliases:
-                processed_data[alias] = parsed
-        else:
-            processed_data[key] = " ".join(seq)
+        processed_data.update(parse(key, seq))
     return processed_data
diff --git a/wostools/lazy.py b/wostools/lazy.py
new file mode 100644
index 0000000..36f53d1
--- /dev/null
+++ b/wostools/lazy.py
@@ -0,0 +1,83 @@
+"""
+The whole wostools thing.
+"""
+
+import itertools
+import logging
+from typing import Iterable, Tuple
+
+from wostools.article import Article
+from wostools.base import BaseCollection
+from wostools.exceptions import InvalidReference
+
+logger = logging.getLogger(__name__)
+
+
+class LazyCollection(BaseCollection):
+    """A collection of WOS text files.
+
+    Args:
+        filenames (str): Strings with the names of the files containing
+            articles.
+    """
+
+    @property
+    def _article_texts(self):
+        """Iterates over all the single article texts in the colection.
+
+        Returns:
+            generator: A generator of strings with the text articles.
+        """
+        for filehandle in self._files:
+            filehandle.seek(0)
+            data = filehandle.read()
+            filehandle.seek(0)
+            for article_text in data.split("\n\n"):
+                if article_text != "EF":
+                    yield article_text
+
+    def _articles(self) -> Iterable[Article]:
+        for article_text in self._article_texts:
+            yield Article.from_isi_text(article_text)
+
+    @property
+    def authors(self) -> Iterable[str]:
+        """Iterates over all article authors, including duplicates
+
+        Returns:
+            generator: A generator with the authors (one by one) of the
+                articles in the collection.
+        """
+        for article in self:
+            yield from article.authors
+
+    @property
+    def coauthors(self) -> Iterable[Tuple[str, str]]:
+        """Iterates over coauthor pairs.
+
+        Returns:
+            generator: A generator with the pair of coauthors of the articles
+                in the collections.
+        """
+        for article in self._articles():
+            yield from (
+                (source, target)
+                for source, target in itertools.combinations(sorted(article.authors), 2)
+            )
+
+    def citation_pairs(self) -> Iterable[Tuple[Article, Article]]:
+        """Computes the citation pairs for the articles in the collection.
+
+        Returns:
+            genertator: A generator with the citation links: pairs of article
+            labesl, where the firts element is the article which cites the
+            second element.
+        """
+        for article in self:
+            for reference in article.references:
+                try:
+                    yield (article, Article.from_isi_citation(reference))
+                except InvalidReference:
+                    logger.info(
+                        f"Ignoring malformed reference '{reference}' from '{article.label}'"
+                    )
diff --git a/wostools/wostools.py b/wostools/wostools.py
deleted file mode 100644
index 537e924..0000000
--- a/wostools/wostools.py
+++ /dev/null
@@ -1,330 +0,0 @@
-"""
-The whole wostools thing.
-"""
-
-import collections
-import glob
-import itertools
-import re
-from typing import Dict, Callable, Optional, Tuple, TypeVar, Iterable
-
-from wostools.fields import preprocess
-
-
-LABEL_ATTRIBUTES = {
-    "AU": lambda au: au[0].replace(",", ""),
-    "PY": lambda py: py[0],
-    "J9": lambda j9: j9[0],
-    "VL": lambda vl: f"V{vl[0]}",
-    "BP": lambda bp: f"P{bp[0]}",
-    "DI": lambda di: f"DOI {di[0]}",
-}
-
-
-_T = TypeVar("T")
-_V = TypeVar("V")
-
-
-class WosToolsError(Exception):
-    """
-    All the errors go here.
-    """
-
-    pass
-
-
-def parse_label(label: str) -> Dict:
-    pattern = re.compile(
-        r"""^(?P<AU>[^,]+)?,[ ]         # First author
-            (?P<PY>\d{4})?,[ ]          # Publication year
-            (?P<J9>[^,]+)?              # Journal
-            (,[ ]V(?P<VL>[\w\d-]+))?    # Volume
-            (,[ ][Pp](?P<BP>\d+))?      # Start page
-            (,[ ]DOI[ ](?P<DI>.+))?     # The all important DOI
-            """,
-        re.X,
-    )
-
-    default_value = {attr: 0 if attr == "PY" else None for attr in LABEL_ATTRIBUTES}
-
-    match_result = pattern.match(label)
-    if match_result:
-        match_dict = match_result.groupdict()
-        match_dict["PY"] = int(match_dict["PY"] or 0)
-        return match_dict
-    else:
-        return default_value
-
-
-class Article(object):
-    """
-    Abstract a WoS article. It creates some structures to manage the data
-        related to an article. All the fields could be called as attributes.
-        Finally, it contains a method to return a sanitized (and hope unique)
-        label.
-
-    Args:
-        article_text (str): A string containing the record for a WoS article.
-    """
-
-    def __init__(self, article_text):
-        if article_text.startswith("FN"):
-            article_text = "\n".join(article_text.split("\n")[2:])
-
-        self.__article_text = article_text
-        self.__raw_data = Article.__article_text_to_dict(article_text)
-        self.__processed_data = preprocess(self.__raw_data)
-
-    def __getattr__(self, name):
-        if name not in self.__processed_data:
-            raise AttributeError(
-                f"{self.__class__.__name__} does not have an attribute {name}"
-            )
-        return self.__processed_data[name]
-
-    @property
-    def label_attrs(self):
-        return {attr: self.__processed_data.get(attr) for attr in LABEL_ATTRIBUTES}
-
-    @property
-    def label(self):
-        """Builds a label using the fields ["AU", "PY", "J9", "VL", "PG", "DI"].
-
-        Returns:
-            str: A label with those required fields separated by a comma.
-        """
-
-        normalized_fields = [
-            normalizer(self.__raw_data[field])
-            for field, normalizer in LABEL_ATTRIBUTES.items()
-            if self.__raw_data.get(field)
-        ]
-
-        label = ", ".join(normalized_fields)
-        return label
-
-    def __repr__(self):
-        return self.label
-
-    def keys(self):
-        return self.__raw_data.keys()
-
-    @property
-    def text(self):
-        return self.__article_text
-
-    @property
-    def raw_data(self):
-        return self.__raw_data
-
-    @property
-    def data(self):
-        return self.__processed_data
-
-    @staticmethod
-    def __article_text_to_dict(article_text: str):
-        """Translates an article text into a dict using the WoS field tags:
-                http://wos-resources.roblib.upei.ca/WOK46/help/WOK/hft_wos.html
-
-        Args:
-            article_text (str): String with the text of the record for an article.
-
-        Returns:
-            dict: A dict where the keys are the Web of Science Field Tags and the
-                values are the content of the passed article.
-        """
-
-        if article_text.startswith("FN"):
-            article_text = "\n".join(article_text.split("\n")[2:])
-
-        # Fix little bug with isi files
-        if article_text.startswith("null"):
-            article_text = article_text[4:]
-
-        data = collections.defaultdict(list)
-        field = ""
-        for line in re.split(r"\n+", article_text):
-            name = line[:2]
-            value = line[3:]
-
-            if not name.isspace():
-                field = name
-
-            if field != "ER":
-                data[field].append(value)
-        return dict(data)
-
-    def __contains__(self, value):
-        return value in self.__processed_data
-
-
-class CollectionLazy(object):
-    """A collection of WOS text files.
-
-    Args:
-        *filenames (str): Strings with the names of the files containing
-            articles.
-    """
-
-    def __init__(self, *files):
-        self.__files = files
-        for file in self.__files:
-            file.seek(0)
-
-    @classmethod
-    def from_glob(cls, pattern):
-        """Creates a new collection from a pattern using glob.
-
-        Args:
-            pattern (str): String with the pattern to be passed to glob.
-
-        Returns:
-            CollectionLazy: Collection with the articles by using the pattern.
-        """
-        return cls.from_filenames(*glob.glob(pattern))
-
-    @classmethod
-    def from_filenames(cls, *filenames):
-        """Creates a new collection from a list of filenames.
-
-        Args:
-            *filenames (str): String with the filename.
-
-        Returns:
-            CollectionLazy: Collection with the articles by reading the
-                filenames.
-        """
-        files = []
-        for filename in filenames:
-            try:
-                files.append(open(filename, encoding="utf-8-sig"))
-            except FileNotFoundError:
-                raise WosToolsError(f"The file {filename} was not found")
-        return cls(*files)
-
-    @property
-    def files(self):
-        """Iterates over all files in the collection
-
-        Returns:
-            generator: A generator of stream files.
-        """
-        for filehandle in self.__files:
-            yield filehandle
-
-    @property
-    def __article_texts(self):
-        """Iterates over all the single article texts in the colection.
-
-        Returns:
-            generator: A generator of strings with the text articles.
-        """
-        for filehandle in self.files:
-            filehandle.seek(0)
-            data = filehandle.read()
-            filehandle.seek(0)
-            for article_text in data.split("\n\n"):
-                if article_text != "EF":
-                    yield article_text
-
-    @property
-    def articles(self):
-        """Iterates over all articles.
-
-        Returns:
-            generator: A generator of Articles according to the text articles.
-        """
-        uniques = set()
-        for article_text in self.__article_texts:
-            article = Article(article_text)
-            if article.label not in uniques:
-                uniques.add(article.label)
-                yield article
-            else:
-                continue
-
-    def __len__(self):
-        count = 0
-        for _ in self.articles:
-            count += 1
-        return count
-
-    @property
-    def authors(self):
-        """Iterates over all article authors, including duplicates
-
-        Returns:
-            generator: A generator with the authors (one by one) of the
-                articles in the collection.
-        """
-        authors = (article.AF for article in self.articles if hasattr(article, "AF"))
-        return itertools.chain(*authors)
-
-    @property
-    def coauthors(self):
-        """Iterates over coauthor pairs.
-
-        Returns:
-            generator: A generator with the pair of coauthors of the articles
-                in the collections.
-        """
-        authors_by_article = (
-            article.AF for article in self.articles if hasattr(article, "AF")
-        )
-        return itertools.chain(
-            *(
-                itertools.combinations(sorted(authors), 2)
-                for authors in authors_by_article
-            )
-        )
-
-    def completeness(self):
-        """Computes the completeness of the collection by key.
-
-        Returns:
-            dict: A dictionary where the keys are strings corresponding to the
-                WoS field tags and the values are the ratio between the articles
-                containing that field and the total number of articles. E.g., if
-                all the articles contain the field AF, the completeness for the
-                tag AF is 1. On the other hand, e.g., if the half of the articles
-                contain the tag DI while the other half do not, the completeness
-                for the tag DI is 0.5.
-        """
-        counters = collections.defaultdict(int)
-        total = 0
-        for article in self.articles:
-            total += 1
-            for key in article.keys():
-                counters[key] += 1
-        return {key: val / total for key, val in counters.items()}
-
-    @staticmethod
-    def metadata_pair_parser(
-        article: Article, reference: str
-    ) -> Tuple[Tuple[str, Dict], Tuple[str, Dict]]:
-        """
-        Convenience function to pass to `citation_pairs` so that we get in 
-        each side of a citation the respective labels and attributes.
-        """
-        return (
-            (article.label, article.label_attrs),
-            (reference, parse_label(reference)),
-        )
-
-    def citation_pairs(
-        self, pair_parser: Optional[Callable[[Article, str], Tuple[_T, _V]]] = None
-    ) -> Iterable[Tuple[_T, _V]]:
-        """Computes the citation pairs for the articles in the collection.
-
-        Returns:
-            genertator: A generator with the citation links: pairs of article
-            labesl, where the firts element is the article which cites the
-            second element.
-        """
-        if pair_parser is None:
-            pair_parser = lambda a, r: (a.label, r)
-        yield from (
-            pair_parser(article, reference)
-            for article in self.articles
-            for reference in article.references
-        )