# Create a dataframe of DOIs

In [1]:
import os
import lzma
import csv

import pymongo

In [2]:
client = pymongo.MongoClient('localhost', 27017)
crossref_db = client.crossref
works = crossref_db.works

In [3]:
f'{works.count():,}'

'87,542,370'

In [4]:
def get_issued_date(work):
    """
    Get issued date from a work object, which is the "Eariest of published-print
    and published-online" (https://git.io/vSBPz).
    """
    issued, = work['issued']['date-parts']
    if issued[0] is None:
        return None
    issued = '-'.join('{:02d}'.format(part) for part in issued)
    return issued

In [5]:
# Write a doi to issn mapping to a TSV file
path = os.path.join('data', 'doi-to-issn.tsv.xz')
with lzma.open(path, 'wt') as write_file:
    writer = csv.writer(write_file, delimiter='\t')
    writer.writerow(['doi', 'type', 'issued', 'issn'])
    for work in works.find():
        issued = get_issued_date(work)
        head = work['DOI'], work['type'], issued
        for issn in work.get('ISSN', []):
            row = head + (issn, )
            writer.writerow(row)

In [6]:
client.close()