# Export Crossref types to a TSV

In [1]:
import os
import lzma
import csv
import collections

import pymongo
import pandas

In [2]:
client = pymongo.MongoClient('localhost', 27017)
crossref_db = client.crossref
types = crossref_db.types

In [3]:
types.count()

26

In [4]:
renamer = {
    'id': 'type_id',
    'label': 'type_label',
}

In [5]:
type_df = (
    pandas.DataFrame.from_records(types.find())
    .rename(columns=renamer)
    [list(renamer.values())]
    .sort_values('type_id')
)
type_df.head(2)

Unnamed: 0,type_id,type_label
7,book,Book
14,book-chapter,Book Chapter


In [6]:
path = os.path.join('data', 'doi.tsv.xz')
counter = collections.Counter()
with lzma.open(path, 'rt') as read_file:
    reader = csv.DictReader(read_file, delimiter='\t')
    for row in reader:
        counter[row['type']] += 1

type_df['n_works'] = type_df['type_id'].map(lambda x: counter[x])

In [7]:
type_df.sort_values('n_works', ascending=False).head()

Unnamed: 0,type_id,type_label,n_works
4,journal-article,Journal Article,65424681
14,book-chapter,Book Chapter,10250455
11,proceedings-article,Proceedings Article,4763593
13,component,Component,2719953
22,dataset,Dataset,1483010


In [8]:
path = os.path.join('data', 'types.tsv')
type_df.to_csv(path, sep='\t', index=False)