## Using DH Conference papers to extract citations

This notebook presents some basic results obtained extracting citations from the abstracts of all (ADHO) DH conferences from 2015 to 2020 and  from DhQ journal articles. The dataset is available [here](https://github.com/lehkost/ToolXtractor/)

### Preamble

In [1]:
import ast
import sys
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from bokeh.io import output_notebook, show
from bokeh.plotting import figure
from bs4 import BeautifulSoup

In [2]:
def read_tei(tei_file):
    with open(tei_file, 'r') as tei:
        soup = BeautifulSoup(tei, 'lxml')
        return soup
    raise RuntimeError('Cannot generate a soup from the input')

In [None]:
def elem_to_text(elem, default=''):
    if elem:
        return elem.getText(separator=' ', strip=True)
    else:
        return default

In [4]:
from dataclasses import dataclass

@dataclass
class Person:
    firstname: str
    middlename: str
    surname: str

In [5]:
class TEIFile(object):
    def __init__(self, filename):
        self.filename = filename
        self.soup = read_tei(filename)
        self._text = None
        self._title = ''
        self._abstract = ''

    @property
    def doi(self):
        idno_elem = self.soup.find('idno', type='DOI')
        if not idno_elem:
            return ''
        else:
            return idno_elem.getText()

    @property
    def title(self):
        if not self._title:
            if  not self.soup.title:
                self._title = "na"
            else:
                self._title = self.soup.title.getText()
        return self._title

    @property
    def abstract(self):
        if not self._abstract:
            abstract = self.soup.abstract.getText(separator=' ', strip=True)
            self._abstract = abstract
        return self._abstract

    @property
    def authors(self):
        #authors_in_header = self.soup.analytic.find_all('author')
        authors_in_header = self.soup.find_all('author')

        result = []
        for author in authors_in_header:
            persname = author.persname
            if not persname:
                continue
            firstname = elem_to_text(persname.find("forename"))#, type="first"))
            middlename = elem_to_text(persname.find("forename", type="middle"))
            surname = elem_to_text(persname.surname)
            person = Person(firstname, middlename, surname)
            result.append(person)
        return result
    
    @property
    def bibliography(self):
        bibliography = self.soup.find_all('bibl')
        result = []
        for bibl in bibliography:
            if not bibl:
                continue
            
            result.append(elem_to_text(bibl))
        return result
    
    @property
    def text(self):
        if not self._text:
            divs_text = []
            for div in self.soup.body.find_all("div"):
                # div is neither an appendix nor references, just plain text.
                if not div.get("type"):
                    div_text = div.get_text(separator=' ', strip=True)
                    divs_text.append(div_text)

            plain_text = " ".join(divs_text)
            self._text = plain_text
        return self._text

In [19]:
def get_citation_cn (df_dois_values, default=''):
    df_cn_citations = pd.DataFrame (columns = ['doi','cn_citation']);
    import requests;
    headers_dict = {"Accept": "text/x-bibliography", "locale":"en-EN"};
    for vard in df_orig_dois_values:
        if ( vard != "" and vard!=None):
            print(vard)
            try:
                rd =requests.get("http://doi.org/"+vard, headers=headers_dict, timeout=20)
                # print("result: "+r.content.decode("utf-8"))
                if ('DOI Not Found'in rd.text):
                    df_cn_citations = df_cn_citations.append({'doi': vard, 'cn_citation': 'Not Found'}, ignore_index=True)
                else:
                    df_cn_citations = df_cn_citations.append({'doi': vard, 'cn_citation': rd.content.decode("latin-1")}, ignore_index=True)
            except requests.exceptions.ConnectionError:
              #  print(var)
                df_cn_citations = df_cn_citations.append({'doi': vard, 'cn_citation': int(503)}, ignore_index=True)
            except requests.exceptions.ConnectTimeout:
              #  print(var)
                df_cn_citations = df_cn_citations.append({'doi': vard, 'cn_citation': int(408)}, ignore_index=True)
            except requests.exceptions.ReadTimeout:
                df_cn_citations = df_cn_citations.append({'doi': vard, 'cn_citation': int(408)}, ignore_index=True)
        else:
           # print(var ,0)
            df_cn_citations = df_cn_citations.append({'doi': 'none', 'cn_citation': int(400)}, ignore_index=True)
    return df_cn_citations

In [70]:
def get_citation_from_SSHOC (df_dois_values, default=''):
    df_cn_citations = pd.DataFrame (columns = ['doi','cn_citation']);
    import requests;
    #headers_dict = {"Accept": "text/x-bibliography", "locale":"en-EN"};
    for vard in df_dois_values:
        if ( vard != "" and vard!=None):
            print(vard)
            try:
                rd =requests.get("http://v4e-lab.isti.cnr.it/citationservice/citharvester/getmetadatahtml?pid=https://doi.org/"+vard+"&token=test", timeout=75)
                #print("result: "+rd.content.decode("latin-1"))
                if ('DOI Not Found'in rd.text):
                    df_cn_citations = df_cn_citations.append({'doi': vard, 'cn_citation': 'Not Found'}, ignore_index=True)
                else:
                    df_cn_citations = df_cn_citations.append({'doi': vard, 'cn_citation': rd.content.decode("latin-1")}, ignore_index=True)
            except requests.exceptions.ConnectionError:
              #  print(var)
                df_cn_citations = df_cn_citations.append({'doi': vard, 'cn_citation': int(503)}, ignore_index=True)
            except requests.exceptions.ConnectTimeout:
              #  print(var)
                df_cn_citations = df_cn_citations.append({'doi': vard, 'cn_citation': int(408)}, ignore_index=True)
            except requests.exceptions.ReadTimeout:
                df_cn_citations = df_cn_citations.append({'doi': vard, 'cn_citation': int(408)}, ignore_index=True)
        else:
           # print(var ,0)
            df_cn_citations = df_cn_citations.append({'doi': 'none', 'cn_citation': int(400)}, ignore_index=True)
    return df_cn_citations

In [6]:
import multiprocessing

In [7]:
from os.path import basename, splitext

def basename_without_ext(path):
    base_name = basename(path)
    stem, ext = splitext(base_name)
    if stem.endswith('.tei'):
        # Return base name without tei file
        return stem[0:-4]
    else:
        return stem

In [8]:
def tei_to_csv_entry(tei_file):
    tei = TEIFile(tei_file)
    print(f"Handled {tei_file}")
    base_name = basename_without_ext(tei_file)
    return base_name, tei.authors, tei.title, tei.bibliography#, tei.abstract

In [None]:
from multiprocessing.pool import Pool
pool = Pool()

### Import the DH conference abstracts (2016-20020) and Dhq articles

The dataset is downloaded from https://github.com/lehkost/ToolXtractor/

In [9]:
import glob
from pathlib import Path
papers15 = sorted(Path("/Users/cesare/git/SSHOCCitationService/dataset/ToolXtractor/data/DH/xml/2015/").glob('*.xml'))
papers16 = sorted(Path("/Users/cesare/git/SSHOCCitationService/dataset/ToolXtractor/data/DH/xml/2016/").glob('*.xml'))
papers17 = sorted(Path("/Users/cesare/git/SSHOCCitationService/dataset/ToolXtractor/data/DH/xml/2017/").glob('*.xml'))
papers18 = sorted(Path("/Users/cesare/git/SSHOCCitationService/dataset/ToolXtractor/data/DH/xml/2018/").glob('*.xml'))
papers19 = sorted(Path("/Users/cesare/git/SSHOCCitationService/dataset/ToolXtractor/data/DH/xml/2019/").glob('*.xml'))
papers20 = sorted(Path("/Users/cesare/git/SSHOCCitationService/dataset/ToolXtractor/data/DH/xml/2020/").glob('*.xml'))
papersdhq = sorted(Path("/Users/cesare/git/SSHOCCitationService/dataset/ToolXtractor/data/DHQ/").glob('*.xml'))

In [11]:
csv_entries15 = pool.map(tei_to_csv_entry, papers15)
csv_entries16 = pool.map(tei_to_csv_entry, papers16)
csv_entries17 = pool.map(tei_to_csv_entry, papers17)
csv_entries18 = pool.map(tei_to_csv_entry, papers18)
csv_entries19 = pool.map(tei_to_csv_entry, papers19)
csv_entries20 = pool.map(tei_to_csv_entry, papers20)
csv_entriesdhq=pool.map(tei_to_csv_entry, papersdhq)

In [12]:
result_csv0 = pd.DataFrame(csv_entries15, columns=['ID', 'Authors', 'Title', 'Bibliography'])
result_csv1 = result_csv0.append(pd.DataFrame(csv_entries16, columns=['ID', 'Authors', 'Title', 'Bibliography']))
result_csv2 = result_csv1.append(pd.DataFrame(csv_entries17, columns=['ID', 'Authors', 'Title', 'Bibliography']))
result_csv3 = result_csv2.append(pd.DataFrame(csv_entries18, columns=['ID', 'Authors', 'Title', 'Bibliography']))
result_csv4 = result_csv3.append(pd.DataFrame(csv_entries19, columns=['ID', 'Authors', 'Title', 'Bibliography']))
result_csv5 = result_csv4.append(pd.DataFrame(csv_entries20, columns=['ID', 'Authors', 'Title', 'Bibliography']))
result_csv = result_csv5.append(pd.DataFrame(csv_entriesdhq, columns=['ID', 'Authors', 'Title', 'Bibliography']))
result_csv.count()

ID              2788
Authors         2788
Title           2788
Bibliography    2788
dtype: int64

### Select the papers having the TEI \<bibl\>   elements.

The dataset contains 2788 documents structured according TEI document model.

These documents are parsed to individuate those having the \<bibl\> (bibliographic citation) element, this element  contains a loosely-structured bibliographic citation of which the sub-components may or may not be explicitly tagged. There are 1624 documents having this element.

In [13]:
test_csv=result_csv[result_csv['Bibliography'].str.len()>0]
test_csv.count()

ID              1624
Authors         1624
Title           1624
Bibliography    1624
dtype: int64

In [14]:
# all citations
my_df=test_csv[['ID','Title','Bibliography']]
my_exp_df=my_df.explode('Bibliography')
my_exp_df.count()

ID              26028
Title           26028
Bibliography    26028
dtype: int64

The total number of citations individuated in the dataset is 26028.

### Find citations having a DOI
Regular expressions are used to parse all citations in order to individuate those having a DOI: there are 1162 (on 26028) citations with a DOI.

In [116]:
import re
regex = re.compile(r'\b(10[.][0-9]{4,}(?:[.][0-9]+)*/(?:(?!["&\'<>])\S)+)\b', re.IGNORECASE)
df_refs=my_exp_df.Bibliography.values
df_refs_with_doi = pd.DataFrame(columns=["Reference", "DOI"])
references=[]
DOIs=[]
for reference in df_refs:
    mydoi=re.search(regex, reference)
    if mydoi:
        references.append(reference);
        DOIs.append(mydoi[1]);
df_refs_with_doi['Reference']=references;
df_refs_with_doi['DOI']=DOIs;
df_refs_with_doi.drop_duplicates(inplace=True)
df_refs_with_doi.count()

Reference    1162
DOI          1162
dtype: int64

In [27]:
#Example: five citations that have DOIs
df_refs_with_doi.head()
df_refs_with_doi.head().style.set_properties(subset=['Reference'], **{'width': '600px'})

Unnamed: 0,Reference,DOI
0,"Byrne, G., and Goddard, L. (2010). The Strongest Link: Libraries and Linked Data. D-Lib Magazine , 16 (11/12), doi:10.1045/november2010-byrne.",10.1045/november2010-byrne
1,"Lampert, C. K., and Southwick, S. B. (2013). Leading to Linking: Introducing Linked Data to Academic Library Digital Collections. Journal of Library Metadata , 13 (2–3): 230–53, doi:10.1080/19386389.2013.826095.",10.1080/19386389.2013.826095
2,"Singer, R. (2009). Linked Library Data Now! Journal of Electronic Resources Librarianship , 21 (2): 114–26, doi:10.1080/19411260903035809.",10.1080/19411260903035809
3,"Thomas, L. and Solomon, D. (2014). Active Users: Project Development and Digital Humanities Pedagogy. CEA Critic, 76 (2) (July): 211–20, DOI:10.1353/cea.2014.0014.",10.1353/cea.2014.0014
4,"Farquhar, A. and Baker, J. (2014). Interoperable Infrastructures for Digital Research: A Proposed Pathway for Enabling Transformation. Digital Humanities 2014, http://dx.doi.org/10.6084/m9.figshare.1092550%20.",10.6084/m9.figshare.1092550%20


### Check DOIs: HTTP return status

All DOIs are invoked and the HTTP return status is captured.

In [None]:
df_urls=df_refs_with_doi.DOI.values
df_http_status = pd.DataFrame (columns = ['DOI','status'])
import requests
for var in df_urls:
    #print(var)
    if ( var != ""):
        try:
            r =requests.get("https://doi.org/"+var,timeout=18)
            print("result: "+var+ " ",r.status_code)
            df_http_status = df_http_status.append({'DOI': var, 'status': int(r.status_code)}, ignore_index=True)
        except requests.exceptions.ConnectionError:
          #  print(var)
            df_http_status = df_http_status.append({'DOI': var, 'status': int(503)}, ignore_index=True)
        except requests.exceptions.ConnectTimeout:
          #  print(var)
            df_http_status = df_http_status.append({'DOI': var, 'status': int(408)}, ignore_index=True)
        except requests.exceptions.ReadTimeout:
         #   print(var)
            df_http_status = df_http_status.append({'DOI': var, 'status': int(408)}, ignore_index=True)
        except requests.exceptions.RequestException:
         #   print(var)
            df_http_status = df_http_status.append({'DOI': var, 'status': int(500)}, ignore_index=True)
        except TypeError:
        #    print(var)
            df_http_status = df_http_status.append({'DOI': var, 'status': int(400)}, ignore_index=True)
    else:
       # print(var ,0)
        df_http_status = df_http_status.append({'DOI': var, 'status': int(400)}, ignore_index=True)
df_http_status.head()

### HTTP Return Status: 404 not found

There are 30 DOIs that returns 404

In [135]:
df_404=df_http_status[df_http_status['status'] == 404]
test=df_404.join(df_refs_with_doi.set_index('DOI'), on='DOI')
test.drop_duplicates(subset='DOI', keep='first').count()

DOI          30
status       30
Reference    30
dtype: int64

In [133]:
#example of DOIs returning 404
test.drop_duplicates(subset='DOI', keep='first').reset_index(drop=True).head().style.set_properties(subset=['Reference'], **{'width': '600px'})
#test.to_csv(path_or_buf='../data/notfound.csv', sep=';')

Unnamed: 0,DOI,status,Reference
0,10.1093/llc/fqn03,404,"Lavagnino, J. (2009). Access. Literary and Linguistic Computing, 24 (1) (April), 10.1093/llc/fqn03, http://llc.oxfordjournals.org/content/24/1/63.full.pdf+html."
1,10.1093/llc/gqv010,404,"Eder, M. (2015a). Rolling stylometry. Digital Scholarship in the Humanities , 30 , first published online: 7 April 2015, doi: 10.1093/llc/gqv010."
2,10.4000/socio.1338,404,"Mounier, P. (2015). Une « utopie politique » pour les humanités numériques ?. Socio , 4 : 97–112. doi:10.4000/socio.1338."
3,10.3318/DRI.2012.1,404,"O’Carroll, Aileen and Webb, Sharon : Digital Archiving in Ireland: National Survey of the Humanities and Social Sciences. Maynooth University (2012) DOI: 10.3318/DRI.2012.1"
4,10.9776/13289,404,"Kou, Y. and Nardi, B. (2013). Regulating anti-social behavior on the Internet: The example of League of Legends. iConference 2013 Proceedings , Fort Worth, TX: February 12-15, 2013, pp. 616-22. 10.9776/13289."


### Retrieve citation DOIs using CrossRef API
Crossref API allows querying the database by giving it in input strings that contain bibliography references. The reference string does not to be necessarily a well-written references. The input string is parsed by Crossref using machine learning techniques and the system tries to match the reference string with the metadata that are stored in the database. 

An important feature of Crossref API, is the score of sureness that Crossref API retrieve beside the document’s metadata. For each request, Crossref score indicates how much it is sure about the entities retrieved, if the score value is high the metadata retrieved are probably the corrected ones, if the score is low the metadata retrieved might be the wrong ones.

Here the Crossref query API is used to check if citations whose DOI http connection returns "404 not found" can be found in the Crossref Database and, if yes, what is their DOI. The value *110* is the minimum score value.

In [104]:
import urllib.request, json
references=test['Reference'];
df_citations404 = pd.DataFrame(columns=["Publication_cit", "Crossref_cit_title", "Crossref_cit_author", "Crossref_cit_metadata", "Crossref_DOI"])
originalCitations=[]
crossrefCitations=[]
crossrefCitationsAuthor=[]
crossrefCitationsMetadata=[]
DOIs=[]
score=[]
i=0;
j =0;
for citenf in references:
    cit=urllib.parse.quote_plus(citenf)
    try:
        with urllib.request.urlopen("https://api.crossref.org/works?query.bibliographic="+cit+"&sort=score&mailto=cesare.concordia@gmail.com#", timeout=18000) as url:
            data16 = json.loads(url.read().decode())
            j=j+1
            if (j%5 == 0):
                print(f"{j}, ({i})")
            if (len(data16["message"]["items"])>0) and (data16["message"]["items"][0]['score'] >110):
                originalCitations.append(citenf)
                crossrefCitationsMetadata.append( data16["message"]["items"][0])
                crossrefCitations.append( data16["message"]["items"][0]['title'])
                crossrefCitationsAuthor.append( data16["message"]["items"][0]['author'])
                DOIs.append(data16["message"]["items"][0]['DOI'])
                score.append(data16["message"]["items"][0]['score'])
                i=i+1
                #print(f"{i} found, out of {j}")
            if (j>1000):
                break
    except urllib.error.URLError:
        print(cit)
    except urllib.error.HTTPError:
        print(cit)
        
df_citations404["Publication_cit"] = originalCitations
df_citations404["Crossref_cit_title"] = crossrefCitations
df_citations404["Crossref_cit_author"] = crossrefCitationsAuthor
df_citations404["Crossref_cit_metadata"] = crossrefCitationsMetadata
df_citations404["Crossref_DOI"] = DOIs
df_citations404["Score"] = score
df_citations404.head()

5, (0)
10, (0)
15, (0)
20, (2)
25, (4)
30, (6)
35, (8)


Unnamed: 0,Publication_cit,Crossref_cit_title,Crossref_cit_author,Crossref_cit_metadata,Crossref_DOI,Score
0,"Griffiths Mary and Kim Barbour. ""‘Imagine If O...",['Imagine if our cities talked to us': Questio...,"[{'given': 'Mary', 'family': 'Griffiths', 'seq...","{'indexed': {'date-parts': [[2020, 3, 28]], 'd...",10.20851/publics-03,174.88257
1,"Underwood, T., Bamman, D. and Lee, S. (2018). ...",[The Transformation of Gender in English-Langu...,"[{'given': 'Ted', 'family': 'Underwood', 'sequ...","{'indexed': {'date-parts': [[2020, 10, 23]], '...",10.22148/16.019,116.7961
2,"Bates, M. (1998). Indexing and access for digi...",[Indexing and access for digital libraries and...,"[{'given': 'Marcia J.', 'family': 'Bates', 'se...","{'indexed': {'date-parts': [[2020, 12, 5]], 'd...",10.1002/(sici)1097-4571(1998110)49:13<1185::ai...,111.929085
3,"Spiro, L. (2012). “This Is Why We Fight”: Defi...",[“This Is Why We Fight”: Defining the Values o...,"[{'given': 'Lisa', 'family': 'Spiro', 'sequenc...","{'indexed': {'date-parts': [[2020, 3, 2]], 'da...",10.5749/minnesota/9780816677948.003.0003,136.29298
4,"Shapiro, L. A. (2012). Embodied Cognition. In ...",[6. Embodied Cognition],"[{'given': 'Lawrence A.', 'family': 'Shapiro',...","{'indexed': {'date-parts': [[2020, 12, 6]], 'd...",10.1093/oxfordhb/9780195309799.003.0006,114.83975


Using the Crossref API 8 DOIs have been retrieved

In [136]:
#df_citations404[['Publication_cit', 'Crossref_cit_title', 'Crossref_cit_author', 'Crossref_DOI']].head(10).style.set_properties(subset=['Publication_cit'], **{'width': '200px'})

In [141]:
retrieved404=df_citations404.join(test.set_index('Reference'), on='Publication_cit',  lsuffix='_in_publication')
retrieved404[['Publication_cit', 'Crossref_cit_title', 'Crossref_cit_author','Crossref_DOI', 'DOI']].head(8).style.set_properties(subset=['Publication_cit','Crossref_cit_title'], **{'width': '200px'})

Unnamed: 0,Publication_cit,Crossref_cit_title,Crossref_cit_author,Crossref_DOI,DOI
0,"Griffiths Mary and Kim Barbour. ""‘Imagine If Our Cities Talked to Us’: Questions about the Making of ‘responsive’ Places and Urban Publics."" In Making Publics, Making Places, 27-48. South Australia: University of Adelaide Press, 2016, http://www.jstor.org/stable/10.20851/j.ctt1t304qd.8","[""'Imagine if our cities talked to us': Questions about the making of 'responsive' places and urban publics""]","[{'given': 'Mary', 'family': 'Griffiths', 'sequence': 'first', 'affiliation': []}]",10.20851/publics-03,10.20851/j.ctt1t304qd.8
1,"Underwood, T., Bamman, D. and Lee, S. (2018). The transformation of Gender in English language fiction. Cultural Analytics, http://culturalanalytics.org/2018/02/thetransformation-of-gender-in-english-language-fiction/ (DOI: 10.7910/DVN/TEGMGI)",['The Transformation of Gender in English-Language Fiction'],"[{'given': 'Ted', 'family': 'Underwood', 'sequence': 'first', 'affiliation': []}, {'given': 'David', 'family': 'Bamman', 'sequence': 'additional', 'affiliation': []}, {'given': 'Sabrina', 'family': 'Lee', 'sequence': 'additional', 'affiliation': []}]",10.22148/16.019,10.7910/DVN/TEGMGI
2,"Bates, M. (1998). Indexing and access for digital libraries and the Internet: human, database, and domain factors. Journal of the American Society for Information Science , 49 (13) doi:10.1002/(SICI)1097-4571(1998110)49:13<1185::AID-ASI6>3.3.CO;2-M (accessed 23 March 2011).","['Indexing and access for digital libraries and the internet: Human, database, and domain factors']","[{'given': 'Marcia J.', 'family': 'Bates', 'sequence': 'first', 'affiliation': []}]",10.1002/(sici)1097-4571(1998110)49:13<1185::aid-asi6>3.0.co;2-v,10.1002/(SICI)1097-4571(1998110)49:13
3,"Spiro, L. (2012). “This Is Why We Fight”: Defining the Values of the Digital Humanities. In M. K. Gold (ed.), Debates in the Digital Humanities . University of Minnesota Press, pp. 16–35. Available at http://minnesota.universitypressscholarship.com/view/10.5749/minnesota/9780816677948.001.0001/upso-9780816677948-chapter-3",['“This Is Why We Fight”: Defining the Values of the Digital Humanities'],"[{'given': 'Lisa', 'family': 'Spiro', 'sequence': 'first', 'affiliation': []}]",10.5749/minnesota/9780816677948.003.0003,10.5749/minnesota/9780816677948.001.0001/upso-9780816677948-chapter-3
4,"Shapiro, L. A. (2012). Embodied Cognition. In Margolis, E., Samuels, R., and Stich, S. P. (eds.), The Oxford Handbook of Philosophy of Cognitive Science . Oxford, New York: Oxford University Press, pp. 118–46. http://www.oxfordhandbooks.com/view/10.1093/oxfordhb/9780195309799.001.0001/oxfordhb-9780195309799-e-6 (accessed 26 November 2018).",['6. Embodied Cognition'],"[{'given': 'Lawrence A.', 'family': 'Shapiro', 'sequence': 'first', 'affiliation': []}]",10.1093/oxfordhb/9780195309799.003.0006,10.1093/oxfordhb/9780195309799.001.0001/oxfordhb-9780195309799-e-6
5,"Hirsch, Brett D.,  2011. The Kingdom Has Been Digitized: Electronic Editions  of Renaissance Drama and the Long Shadows of Shakespeare and Print . Literature Compass [online] 8 (9): 568-591.  Available from: http://onlinelibrary.wiley.com/doi/10.1111/lico.2011.8.issue-9/issuetoc [Accessed 27 September 2012].",['The Kingdom has been Digitized: Electronic Editions of Renaissance Drama and the Long Shadows of Shakespeare and Print'],"[{'given': 'Brett D.', 'family': 'Hirsch', 'sequence': 'first', 'affiliation': []}]",10.1111/j.1741-4113.2011.00830.x,10.1111/lico.2011.8.issue-9/issuetoc
6,"Romanello, Matteo. 2013. Creating an Annotated Corpus for Extracting Canonical  Citations from Classics-Related Texts by Using Active Annotation. In Computational Linguistics and Intelligent Text  Processing. 14th International Conference, CICLing 2013, Samos, Greece,  March 24-30, 2013, Proceedings, Part I , edited by Alexander Gelbukh,  1:60–76. Lecture Notes in Computer Science / Theoretical Computer Science and  General Issues. Springer Berlin Heidelberg. doi:10.1007/978-3-642-37247-6\_6 .",['Creating an Annotated Corpus for Extracting Canonical Citations from Classics-Related Texts by Using Active Annotation'],"[{'given': 'Matteo', 'family': 'Romanello', 'sequence': 'first', 'affiliation': []}]",10.1007/978-3-642-37247-6_6,10.1007/978-3-642-37247-6\_6
7,"Zundert, van,  J.J., 2016. Screwmeneutics and Hermenumericals: the  Computationality of Hermeneutics. In S. Schreibman, R. Siemens,  & J. Unsworth, eds. A New Companion to Digital  Humanities . Malden (US), Oxford (UK), etc.: John Wiley & Sons,  Ltd, pp. 331–347. Available at: http://onlinelibrary.wiley.com/doi/10.1002/9781118680605.ch23/summary [Accessed July 26, 2016].",['Screwmeneutics and Hermenumericals'],"[{'given': 'Joris J.', 'family': 'van Zundert', 'sequence': 'first', 'affiliation': []}]",10.1002/9781118680605.ch23,10.1002/9781118680605.ch23/summary
