forked from NLW-paulm/Welsh-Tribunal-annotations
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtypeImage.py
More file actions
executable file
·40 lines (29 loc) · 1.24 KB
/
typeImage.py
File metadata and controls
executable file
·40 lines (29 loc) · 1.24 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
#!/usr/bin/python3
import json
import sys
import requests
from pathlib import Path
def string2json(line):
data = {}
for field in line.split('<p>'):
# print (field)
name, value = field.split(':')[0], ':'.join(field.split(':')[1:])
# print("Name: {}, value: {}".format(name,value))
data[name] = value.strip()
return data
with open(sys.argv[1]) as f:
annolist = json.load(f)
print ('Looking at {}'.format(sys.argv[1]))
outputDir = Path(sys.argv[2])
for anno in annolist["resources"]:
pageData = string2json(anno["resource"]["chars"])
imageId = anno["on"].split('/')[7].split('.')[0]
subDir = pageData['Tag'].replace(" ","_").replace(":","").replace("/","-").replace(",","")
imageUrl = 'http://dams.llgc.org.uk/iiif/2.0/image/{}/full/256,/0/default.jpg'.format(imageId)
response = requests.get(imageUrl)
imageDir = outputDir.joinpath(subDir)
imageDir.mkdir(parents=True, exist_ok=True)
print ('Downloading {} to {}'.format(imageUrl, imageDir.joinpath("{}.jpg".format(imageId))))
with open(imageDir.joinpath("{}.jpg".format(imageId)), "wb") as imgfile:
imgfile.write(response.content)
imgfile.close()