This repository has been archived by the owner on Jul 29, 2020. It is now read-only.
/
ia-metadata-add-genre.py
58 lines (53 loc) · 2.24 KB
/
ia-metadata-add-genre.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright (C) 2020 emijrp <emijrp@gmail.com>
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
import time
import internetarchive
def main():
genres = {
'Gutenberg': 'Literature',
'Khan-academy-videos': 'Course',
'Wikibooks': 'Course',
'Wikinews': 'News',
'Wikipedia': 'Encyclopedia',
'Wikiquote': 'Quotes',
'Wikisource': 'Literature',
'Wikispecies': 'Encyclopedia',
'Wikiversity': 'Course',
'Wikivoyage': 'Travel',
'Wiktionary': 'Dictionary',
}
for project, genre in genres.items():
#https://archive.org/services/docs/api/internetarchive/quickstart.html#searching
for i in internetarchive.search_items('subject:"kiwix" AND subject:"zim" AND subject:"%s"' % (project.lower())).iter_as_items():
try:
itemid = i.item_metadata['metadata']['identifier']
print(itemid)
except:
print('Error in', i)
continue
if not 'genre' in i.item_metadata['metadata']:
if project.lower() in itemid.lower():
r = internetarchive.modify_metadata(itemid, metadata=dict(genre=genre))
if r.status_code == 200:
print('Genre added: %s' % (genre))
else:
print('Error (%s) adding genre: %s' % (r.status_code, genre))
else:
print('Unknown project')
else:
print('Already has genre: %s' % (i.item_metadata['metadata']['genre']))
if __name__ == '__main__':
main()