# Get annotations for annotated manuscripts

Currently uses the CGType:General annotations as an indication that we should pull down the annotations. There is no PMID tag, so we have to pull the PMID from the text of the CGType:General tag.

**Note**: In order to use this, you have to have a hypothes.is API token with permissions to access the relevant hypothes.is group. It should be included in a file called "token" as a single line.

In [1]:
import requests
import yaml
import os
import re
import json

In [2]:
token = open('token').read().rstrip()
api_endpoint = 'https://hypothes.is/api'
group = 'DRL6xW1v'

In [3]:
tagged_general_response = requests.post(api_endpoint + '/search',
    data={
        'group': group,
        'sort': 'created',
        'order': 'desc',
        'limit': 50,
        'tag': 'CGType:General'
    },
    headers={
        'Authorization': 'Bearer %s'%token
    }).json()
annotations_tagged_general = tagged_general_response['rows']

In [4]:
# print(yaml.dump(annotations_tagged_general))

In [5]:
# [x['uri'] for x in annotations_tagged_general]

In [6]:
def find_pmid(cg_general_annotation):
    m = re.search(r'\*\*PMID\*\*: (\d+)', cg_general_annotation['text'])
    if m:
        return m.group(1)
    else:
        return None

In [9]:
os.makedirs('downloaded_data', exist_ok=True)
for general_annotation in annotations_tagged_general:
    pmid = find_pmid(general_annotation)
    if pmid is not None:
        manuscript_annotations_json = requests.post(api_endpoint + '/search',
            data={
                'group': group,
                'sort': 'created',
                'order': 'asc',
                'limit': 80,
                'uri': general_annotation['uri']
            },
            headers={
                'Authorization': 'Bearer %s'%token
            }).json()
        with open(os.path.join('downloaded_data', 'PMID%s.json'%pmid), 'wt') as jsonf:
            json.dump(manuscript_annotations_json, jsonf, indent=2)
        with open(os.path.join('downloaded_data', 'PMID%s.yaml'%pmid), 'wt') as yamlf:
            yaml.dump(manuscript_annotations_json, yamlf)